| 1 | //===- MacroExpansionContext.cpp - Macro expansion information --*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include "clang/Analysis/MacroExpansionContext.h" |
| 10 | #include "llvm/Support/Debug.h" |
| 11 | #include <optional> |
| 12 | |
| 13 | #define DEBUG_TYPE "macro-expansion-context" |
| 14 | |
| 15 | static void dumpTokenInto(const clang::Preprocessor &PP, llvm::raw_ostream &OS, |
| 16 | clang::Token Tok); |
| 17 | |
| 18 | namespace clang { |
| 19 | namespace detail { |
| 20 | class MacroExpansionRangeRecorder : public PPCallbacks { |
| 21 | const Preprocessor &PP; |
| 22 | SourceManager &SM; |
| 23 | MacroExpansionContext::ExpansionRangeMap &ExpansionRanges; |
| 24 | |
| 25 | public: |
| 26 | explicit MacroExpansionRangeRecorder( |
| 27 | const Preprocessor &PP, SourceManager &SM, |
| 28 | MacroExpansionContext::ExpansionRangeMap &ExpansionRanges) |
| 29 | : PP(PP), SM(SM), ExpansionRanges(ExpansionRanges) {} |
| 30 | |
| 31 | void MacroExpands(const Token &MacroName, const MacroDefinition &MD, |
| 32 | SourceRange Range, const MacroArgs *Args) override { |
| 33 | // Ignore annotation tokens like: _Pragma("pack(push, 1)") |
| 34 | if (MacroName.getIdentifierInfo()->getName() == "_Pragma" ) |
| 35 | return; |
| 36 | |
| 37 | SourceLocation MacroNameBegin = SM.getExpansionLoc(Loc: MacroName.getLocation()); |
| 38 | assert(MacroNameBegin == SM.getExpansionLoc(Range.getBegin())); |
| 39 | |
| 40 | const SourceLocation ExpansionEnd = [Range, &SM = SM, &MacroName] { |
| 41 | // If the range is empty, use the length of the macro. |
| 42 | if (Range.getBegin() == Range.getEnd()) |
| 43 | return SM.getExpansionLoc( |
| 44 | Loc: MacroName.getLocation().getLocWithOffset(Offset: MacroName.getLength())); |
| 45 | |
| 46 | // Include the last character. |
| 47 | return SM.getExpansionLoc(Loc: Range.getEnd()).getLocWithOffset(Offset: 1); |
| 48 | }(); |
| 49 | |
| 50 | (void)PP; |
| 51 | LLVM_DEBUG(llvm::dbgs() << "MacroExpands event: '" ; |
| 52 | dumpTokenInto(PP, llvm::dbgs(), MacroName); |
| 53 | llvm::dbgs() |
| 54 | << "' with length " << MacroName.getLength() << " at " ; |
| 55 | MacroNameBegin.print(llvm::dbgs(), SM); |
| 56 | llvm::dbgs() << ", expansion end at " ; |
| 57 | ExpansionEnd.print(llvm::dbgs(), SM); llvm::dbgs() << '\n';); |
| 58 | |
| 59 | // If the expansion range is empty, use the identifier of the macro as a |
| 60 | // range. |
| 61 | MacroExpansionContext::ExpansionRangeMap::iterator It; |
| 62 | bool Inserted; |
| 63 | std::tie(args&: It, args&: Inserted) = |
| 64 | ExpansionRanges.try_emplace(Key: MacroNameBegin, Args: ExpansionEnd); |
| 65 | if (Inserted) { |
| 66 | LLVM_DEBUG(llvm::dbgs() << "maps " ; |
| 67 | It->getFirst().print(llvm::dbgs(), SM); llvm::dbgs() << " to " ; |
| 68 | It->getSecond().print(llvm::dbgs(), SM); |
| 69 | llvm::dbgs() << '\n';); |
| 70 | } else { |
| 71 | if (SM.isBeforeInTranslationUnit(LHS: It->getSecond(), RHS: ExpansionEnd)) { |
| 72 | It->getSecond() = ExpansionEnd; |
| 73 | LLVM_DEBUG( |
| 74 | llvm::dbgs() << "remaps " ; It->getFirst().print(llvm::dbgs(), SM); |
| 75 | llvm::dbgs() << " to " ; It->getSecond().print(llvm::dbgs(), SM); |
| 76 | llvm::dbgs() << '\n';); |
| 77 | } |
| 78 | } |
| 79 | } |
| 80 | }; |
| 81 | } // namespace detail |
| 82 | } // namespace clang |
| 83 | |
| 84 | using namespace clang; |
| 85 | |
| 86 | MacroExpansionContext::MacroExpansionContext(const LangOptions &LangOpts) |
| 87 | : LangOpts(LangOpts) {} |
| 88 | |
| 89 | void MacroExpansionContext::registerForPreprocessor(Preprocessor &NewPP) { |
| 90 | PP = &NewPP; |
| 91 | SM = &NewPP.getSourceManager(); |
| 92 | |
| 93 | // Make sure that the Preprocessor does not outlive the MacroExpansionContext. |
| 94 | PP->addPPCallbacks(C: std::make_unique<detail::MacroExpansionRangeRecorder>( |
| 95 | args&: *PP, args&: *SM, args&: ExpansionRanges)); |
| 96 | // Same applies here. |
| 97 | PP->setTokenWatcher([this](const Token &Tok) { onTokenLexed(Tok); }); |
| 98 | } |
| 99 | |
| 100 | std::optional<StringRef> |
| 101 | MacroExpansionContext::getExpandedText(SourceLocation MacroExpansionLoc) const { |
| 102 | if (MacroExpansionLoc.isMacroID()) |
| 103 | return std::nullopt; |
| 104 | |
| 105 | // If there was no macro expansion at that location, return std::nullopt. |
| 106 | if (ExpansionRanges.find_as(Val: MacroExpansionLoc) == ExpansionRanges.end()) |
| 107 | return std::nullopt; |
| 108 | |
| 109 | // There was macro expansion, but resulted in no tokens, return empty string. |
| 110 | const auto It = ExpandedTokens.find_as(Val: MacroExpansionLoc); |
| 111 | if (It == ExpandedTokens.end()) |
| 112 | return StringRef{"" }; |
| 113 | |
| 114 | // Otherwise we have the actual token sequence as string. |
| 115 | return It->getSecond().str(); |
| 116 | } |
| 117 | |
| 118 | std::optional<StringRef> |
| 119 | MacroExpansionContext::getOriginalText(SourceLocation MacroExpansionLoc) const { |
| 120 | if (MacroExpansionLoc.isMacroID()) |
| 121 | return std::nullopt; |
| 122 | |
| 123 | const auto It = ExpansionRanges.find_as(Val: MacroExpansionLoc); |
| 124 | if (It == ExpansionRanges.end()) |
| 125 | return std::nullopt; |
| 126 | |
| 127 | assert(It->getFirst() != It->getSecond() && |
| 128 | "Every macro expansion must cover a non-empty range." ); |
| 129 | |
| 130 | return Lexer::getSourceText( |
| 131 | Range: CharSourceRange::getCharRange(B: It->getFirst(), E: It->getSecond()), SM: *SM, |
| 132 | LangOpts); |
| 133 | } |
| 134 | |
| 135 | void MacroExpansionContext::dumpExpansionRanges() const { |
| 136 | dumpExpansionRangesToStream(OS&: llvm::dbgs()); |
| 137 | } |
| 138 | void MacroExpansionContext::dumpExpandedTexts() const { |
| 139 | dumpExpandedTextsToStream(OS&: llvm::dbgs()); |
| 140 | } |
| 141 | |
| 142 | void MacroExpansionContext::dumpExpansionRangesToStream(raw_ostream &OS) const { |
| 143 | std::vector<std::pair<SourceLocation, SourceLocation>> LocalExpansionRanges; |
| 144 | LocalExpansionRanges.reserve(n: ExpansionRanges.size()); |
| 145 | for (const auto &Record : ExpansionRanges) |
| 146 | LocalExpansionRanges.emplace_back( |
| 147 | args: std::make_pair(x: Record.getFirst(), y: Record.getSecond())); |
| 148 | llvm::sort(C&: LocalExpansionRanges); |
| 149 | |
| 150 | OS << "\n=============== ExpansionRanges ===============\n" ; |
| 151 | for (const auto &Record : LocalExpansionRanges) { |
| 152 | OS << "> " ; |
| 153 | Record.first.print(OS, SM: *SM); |
| 154 | OS << ", " ; |
| 155 | Record.second.print(OS, SM: *SM); |
| 156 | OS << '\n'; |
| 157 | } |
| 158 | } |
| 159 | |
| 160 | void MacroExpansionContext::dumpExpandedTextsToStream(raw_ostream &OS) const { |
| 161 | std::vector<std::pair<SourceLocation, MacroExpansionText>> |
| 162 | LocalExpandedTokens; |
| 163 | LocalExpandedTokens.reserve(n: ExpandedTokens.size()); |
| 164 | for (const auto &Record : ExpandedTokens) |
| 165 | LocalExpandedTokens.emplace_back( |
| 166 | args: std::make_pair(x: Record.getFirst(), y: Record.getSecond())); |
| 167 | llvm::sort(C&: LocalExpandedTokens); |
| 168 | |
| 169 | OS << "\n=============== ExpandedTokens ===============\n" ; |
| 170 | for (const auto &Record : LocalExpandedTokens) { |
| 171 | OS << "> " ; |
| 172 | Record.first.print(OS, SM: *SM); |
| 173 | OS << " -> '" << Record.second << "'\n" ; |
| 174 | } |
| 175 | } |
| 176 | |
| 177 | static void dumpTokenInto(const Preprocessor &PP, raw_ostream &OS, Token Tok) { |
| 178 | assert(Tok.isNot(tok::raw_identifier)); |
| 179 | |
| 180 | // Ignore annotation tokens like: _Pragma("pack(push, 1)") |
| 181 | if (Tok.isAnnotation()) |
| 182 | return; |
| 183 | |
| 184 | if (IdentifierInfo *II = Tok.getIdentifierInfo()) { |
| 185 | // FIXME: For now, we don't respect whitespaces between macro expanded |
| 186 | // tokens. We just emit a space after every identifier to produce a valid |
| 187 | // code for `int a ;` like expansions. |
| 188 | // ^-^-- Space after the 'int' and 'a' identifiers. |
| 189 | OS << II->getName() << ' '; |
| 190 | } else if (Tok.isLiteral() && !Tok.needsCleaning() && Tok.getLiteralData()) { |
| 191 | OS << StringRef(Tok.getLiteralData(), Tok.getLength()); |
| 192 | } else { |
| 193 | char Tmp[256]; |
| 194 | if (Tok.getLength() < sizeof(Tmp)) { |
| 195 | const char *TokPtr = Tmp; |
| 196 | // FIXME: Might use a different overload for cleaner callsite. |
| 197 | unsigned Len = PP.getSpelling(Tok, Buffer&: TokPtr); |
| 198 | OS.write(Ptr: TokPtr, Size: Len); |
| 199 | } else { |
| 200 | OS << "<too long token>" ; |
| 201 | } |
| 202 | } |
| 203 | } |
| 204 | |
| 205 | void MacroExpansionContext::onTokenLexed(const Token &Tok) { |
| 206 | SourceLocation SLoc = Tok.getLocation(); |
| 207 | if (SLoc.isFileID()) |
| 208 | return; |
| 209 | |
| 210 | LLVM_DEBUG(llvm::dbgs() << "lexed macro expansion token '" ; |
| 211 | dumpTokenInto(*PP, llvm::dbgs(), Tok); llvm::dbgs() << "' at " ; |
| 212 | SLoc.print(llvm::dbgs(), *SM); llvm::dbgs() << '\n';); |
| 213 | |
| 214 | // Remove spelling location. |
| 215 | SourceLocation CurrExpansionLoc = SM->getExpansionLoc(Loc: SLoc); |
| 216 | |
| 217 | MacroExpansionText TokenAsString; |
| 218 | llvm::raw_svector_ostream OS(TokenAsString); |
| 219 | |
| 220 | // FIXME: Prepend newlines and space to produce the exact same output as the |
| 221 | // preprocessor would for this token. |
| 222 | |
| 223 | dumpTokenInto(PP: *PP, OS, Tok); |
| 224 | |
| 225 | ExpansionMap::iterator It; |
| 226 | bool Inserted; |
| 227 | std::tie(args&: It, args&: Inserted) = |
| 228 | ExpandedTokens.try_emplace(Key: CurrExpansionLoc, Args: std::move(TokenAsString)); |
| 229 | if (!Inserted) |
| 230 | It->getSecond().append(RHS: TokenAsString); |
| 231 | } |
| 232 | |
| 233 | |