| 1 | //===- MacroExpansionContext.cpp - Macro expansion information --*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include "clang/Analysis/MacroExpansionContext.h" |
| 10 | #include "clang/Format/Format.h" |
| 11 | #include "llvm/Support/Debug.h" |
| 12 | #include <optional> |
| 13 | |
| 14 | #define DEBUG_TYPE "macro-expansion-context" |
| 15 | |
| 16 | static void dumpTokenInto(const clang::Preprocessor &PP, llvm::raw_ostream &OS, |
| 17 | clang::Token Tok); |
| 18 | |
| 19 | namespace clang { |
| 20 | namespace detail { |
| 21 | class MacroExpansionRangeRecorder : public PPCallbacks { |
| 22 | const Preprocessor &PP; |
| 23 | SourceManager &SM; |
| 24 | MacroExpansionContext::ExpansionRangeMap &ExpansionRanges; |
| 25 | |
| 26 | public: |
| 27 | explicit MacroExpansionRangeRecorder( |
| 28 | const Preprocessor &PP, SourceManager &SM, |
| 29 | MacroExpansionContext::ExpansionRangeMap &ExpansionRanges) |
| 30 | : PP(PP), SM(SM), ExpansionRanges(ExpansionRanges) {} |
| 31 | |
| 32 | void MacroExpands(const Token &MacroName, const MacroDefinition &MD, |
| 33 | SourceRange Range, const MacroArgs *Args) override { |
| 34 | // Ignore annotation tokens like: _Pragma("pack(push, 1)") |
| 35 | if (MacroName.getIdentifierInfo()->getName() == "_Pragma" ) |
| 36 | return; |
| 37 | |
| 38 | SourceLocation MacroNameBegin = SM.getExpansionLoc(Loc: MacroName.getLocation()); |
| 39 | assert(MacroNameBegin == SM.getExpansionLoc(Range.getBegin())); |
| 40 | |
| 41 | const SourceLocation ExpansionEnd = [Range, &SM = SM, &MacroName] { |
| 42 | // If the range is empty, use the length of the macro. |
| 43 | if (Range.getBegin() == Range.getEnd()) |
| 44 | return SM.getExpansionLoc( |
| 45 | Loc: MacroName.getLocation().getLocWithOffset(Offset: MacroName.getLength())); |
| 46 | |
| 47 | // Include the last character. |
| 48 | return SM.getExpansionLoc(Loc: Range.getEnd()).getLocWithOffset(Offset: 1); |
| 49 | }(); |
| 50 | |
| 51 | (void)PP; |
| 52 | LLVM_DEBUG(llvm::dbgs() << "MacroExpands event: '" ; |
| 53 | dumpTokenInto(PP, llvm::dbgs(), MacroName); |
| 54 | llvm::dbgs() |
| 55 | << "' with length " << MacroName.getLength() << " at " ; |
| 56 | MacroNameBegin.print(llvm::dbgs(), SM); |
| 57 | llvm::dbgs() << ", expansion end at " ; |
| 58 | ExpansionEnd.print(llvm::dbgs(), SM); llvm::dbgs() << '\n';); |
| 59 | |
| 60 | // If the expansion range is empty, use the identifier of the macro as a |
| 61 | // range. |
| 62 | MacroExpansionContext::ExpansionRangeMap::iterator It; |
| 63 | bool Inserted; |
| 64 | std::tie(args&: It, args&: Inserted) = |
| 65 | ExpansionRanges.try_emplace(Key: MacroNameBegin, Args: ExpansionEnd); |
| 66 | if (Inserted) { |
| 67 | LLVM_DEBUG(llvm::dbgs() << "maps " ; |
| 68 | It->getFirst().print(llvm::dbgs(), SM); llvm::dbgs() << " to " ; |
| 69 | It->getSecond().print(llvm::dbgs(), SM); |
| 70 | llvm::dbgs() << '\n';); |
| 71 | } else { |
| 72 | if (SM.isBeforeInTranslationUnit(LHS: It->getSecond(), RHS: ExpansionEnd)) { |
| 73 | It->getSecond() = ExpansionEnd; |
| 74 | LLVM_DEBUG( |
| 75 | llvm::dbgs() << "remaps " ; It->getFirst().print(llvm::dbgs(), SM); |
| 76 | llvm::dbgs() << " to " ; It->getSecond().print(llvm::dbgs(), SM); |
| 77 | llvm::dbgs() << '\n';); |
| 78 | } |
| 79 | } |
| 80 | } |
| 81 | }; |
| 82 | } // namespace detail |
| 83 | } // namespace clang |
| 84 | |
| 85 | using namespace clang; |
| 86 | |
| 87 | MacroExpansionContext::MacroExpansionContext(const LangOptions &LangOpts) |
| 88 | : LangOpts(LangOpts) {} |
| 89 | |
| 90 | void MacroExpansionContext::registerForPreprocessor(Preprocessor &NewPP) { |
| 91 | PP = &NewPP; |
| 92 | SM = &NewPP.getSourceManager(); |
| 93 | |
| 94 | // Make sure that the Preprocessor does not outlive the MacroExpansionContext. |
| 95 | PP->addPPCallbacks(C: std::make_unique<detail::MacroExpansionRangeRecorder>( |
| 96 | args&: *PP, args&: *SM, args&: ExpansionRanges)); |
| 97 | // Same applies here. |
| 98 | PP->setTokenWatcher([this](const Token &Tok) { onTokenLexed(Tok); }); |
| 99 | } |
| 100 | |
| 101 | std::optional<StringRef> |
| 102 | MacroExpansionContext::getExpandedText(SourceLocation MacroExpansionLoc) const { |
| 103 | if (MacroExpansionLoc.isMacroID()) |
| 104 | return std::nullopt; |
| 105 | |
| 106 | // If there was no macro expansion at that location, return std::nullopt. |
| 107 | if (ExpansionRanges.find_as(Val: MacroExpansionLoc) == ExpansionRanges.end()) |
| 108 | return std::nullopt; |
| 109 | |
| 110 | // There was macro expansion, but resulted in no tokens, return empty string. |
| 111 | const auto It = ExpandedTokens.find_as(Val: MacroExpansionLoc); |
| 112 | if (It == ExpandedTokens.end()) |
| 113 | return StringRef{"" }; |
| 114 | |
| 115 | // Otherwise we have the actual token sequence as string. |
| 116 | return It->getSecond().str(); |
| 117 | } |
| 118 | |
| 119 | std::optional<StringRef> |
| 120 | MacroExpansionContext::getOriginalText(SourceLocation MacroExpansionLoc) const { |
| 121 | if (MacroExpansionLoc.isMacroID()) |
| 122 | return std::nullopt; |
| 123 | |
| 124 | const auto It = ExpansionRanges.find_as(Val: MacroExpansionLoc); |
| 125 | if (It == ExpansionRanges.end()) |
| 126 | return std::nullopt; |
| 127 | |
| 128 | assert(It->getFirst() != It->getSecond() && |
| 129 | "Every macro expansion must cover a non-empty range." ); |
| 130 | |
| 131 | return Lexer::getSourceText( |
| 132 | Range: CharSourceRange::getCharRange(B: It->getFirst(), E: It->getSecond()), SM: *SM, |
| 133 | LangOpts); |
| 134 | } |
| 135 | |
| 136 | std::optional<StringRef> MacroExpansionContext::getFormattedExpandedText( |
| 137 | SourceLocation MacroExpansionLoc) const { |
| 138 | std::optional<StringRef> ExpandedText = getExpandedText(MacroExpansionLoc); |
| 139 | if (!ExpandedText) |
| 140 | return std::nullopt; |
| 141 | |
| 142 | auto [It, Inserted] = |
| 143 | FormattedExpandedTokens.try_emplace(Key: MacroExpansionLoc, Args: "" ); |
| 144 | if (!Inserted) |
| 145 | return StringRef(It->getSecond()); |
| 146 | |
| 147 | clang::format::FormatStyle Style = clang::format::getLLVMStyle(); |
| 148 | |
| 149 | std::string MacroCodeBlock = ExpandedText->str(); |
| 150 | |
| 151 | std::vector<clang::tooling::Range> Ranges; |
| 152 | Ranges.emplace_back(args: 0, args: MacroCodeBlock.length()); |
| 153 | |
| 154 | clang::tooling::Replacements Replacements = clang::format::reformat( |
| 155 | Style, Code: MacroCodeBlock, Ranges, FileName: "<macro-expansion>" ); |
| 156 | |
| 157 | llvm::Expected<std::string> Result = |
| 158 | clang::tooling::applyAllReplacements(Code: MacroCodeBlock, Replaces: Replacements); |
| 159 | |
| 160 | It->getSecond() = Result ? std::move(*Result) : std::move(MacroCodeBlock); |
| 161 | |
| 162 | return StringRef(It->getSecond()); |
| 163 | } |
| 164 | |
| 165 | void MacroExpansionContext::dumpExpansionRanges() const { |
| 166 | dumpExpansionRangesToStream(OS&: llvm::dbgs()); |
| 167 | } |
| 168 | void MacroExpansionContext::dumpExpandedTexts() const { |
| 169 | dumpExpandedTextsToStream(OS&: llvm::dbgs()); |
| 170 | } |
| 171 | |
| 172 | void MacroExpansionContext::dumpExpansionRangesToStream(raw_ostream &OS) const { |
| 173 | std::vector<std::pair<SourceLocation, SourceLocation>> LocalExpansionRanges; |
| 174 | LocalExpansionRanges.reserve(n: ExpansionRanges.size()); |
| 175 | for (const auto &Record : ExpansionRanges) |
| 176 | LocalExpansionRanges.emplace_back( |
| 177 | args: std::make_pair(x: Record.getFirst(), y: Record.getSecond())); |
| 178 | llvm::sort(C&: LocalExpansionRanges); |
| 179 | |
| 180 | OS << "\n=============== ExpansionRanges ===============\n" ; |
| 181 | for (const auto &Record : LocalExpansionRanges) { |
| 182 | OS << "> " ; |
| 183 | Record.first.print(OS, SM: *SM); |
| 184 | OS << ", " ; |
| 185 | Record.second.print(OS, SM: *SM); |
| 186 | OS << '\n'; |
| 187 | } |
| 188 | } |
| 189 | |
| 190 | void MacroExpansionContext::dumpExpandedTextsToStream(raw_ostream &OS) const { |
| 191 | std::vector<std::pair<SourceLocation, MacroExpansionText>> |
| 192 | LocalExpandedTokens; |
| 193 | LocalExpandedTokens.reserve(n: ExpandedTokens.size()); |
| 194 | for (const auto &Record : ExpandedTokens) |
| 195 | LocalExpandedTokens.emplace_back( |
| 196 | args: std::make_pair(x: Record.getFirst(), y: Record.getSecond())); |
| 197 | llvm::sort(C&: LocalExpandedTokens); |
| 198 | |
| 199 | OS << "\n=============== ExpandedTokens ===============\n" ; |
| 200 | for (const auto &Record : LocalExpandedTokens) { |
| 201 | OS << "> " ; |
| 202 | Record.first.print(OS, SM: *SM); |
| 203 | OS << " -> '" << Record.second << "'\n" ; |
| 204 | } |
| 205 | } |
| 206 | |
| 207 | static void dumpTokenInto(const Preprocessor &PP, raw_ostream &OS, Token Tok) { |
| 208 | assert(Tok.isNot(tok::raw_identifier)); |
| 209 | |
| 210 | // Ignore annotation tokens like: _Pragma("pack(push, 1)") |
| 211 | if (Tok.isAnnotation()) |
| 212 | return; |
| 213 | |
| 214 | if (IdentifierInfo *II = Tok.getIdentifierInfo()) { |
| 215 | // FIXME: For now, we don't respect whitespaces between macro expanded |
| 216 | // tokens. We just emit a space after every identifier to produce a valid |
| 217 | // code for `int a ;` like expansions. |
| 218 | // ^-^-- Space after the 'int' and 'a' identifiers. |
| 219 | OS << II->getName() << ' '; |
| 220 | } else if (Tok.isLiteral() && !Tok.needsCleaning() && Tok.getLiteralData()) { |
| 221 | OS << StringRef(Tok.getLiteralData(), Tok.getLength()); |
| 222 | } else { |
| 223 | char Tmp[256]; |
| 224 | if (Tok.getLength() < sizeof(Tmp)) { |
| 225 | const char *TokPtr = Tmp; |
| 226 | // FIXME: Might use a different overload for cleaner callsite. |
| 227 | unsigned Len = PP.getSpelling(Tok, Buffer&: TokPtr); |
| 228 | OS.write(Ptr: TokPtr, Size: Len); |
| 229 | } else { |
| 230 | OS << "<too long token>" ; |
| 231 | } |
| 232 | } |
| 233 | } |
| 234 | |
| 235 | void MacroExpansionContext::onTokenLexed(const Token &Tok) { |
| 236 | SourceLocation SLoc = Tok.getLocation(); |
| 237 | if (SLoc.isFileID()) |
| 238 | return; |
| 239 | |
| 240 | LLVM_DEBUG(llvm::dbgs() << "lexed macro expansion token '" ; |
| 241 | dumpTokenInto(*PP, llvm::dbgs(), Tok); llvm::dbgs() << "' at " ; |
| 242 | SLoc.print(llvm::dbgs(), *SM); llvm::dbgs() << '\n';); |
| 243 | |
| 244 | // Remove spelling location. |
| 245 | SourceLocation CurrExpansionLoc = SM->getExpansionLoc(Loc: SLoc); |
| 246 | |
| 247 | MacroExpansionText TokenAsString; |
| 248 | llvm::raw_svector_ostream OS(TokenAsString); |
| 249 | |
| 250 | // FIXME: Prepend newlines and space to produce the exact same output as the |
| 251 | // preprocessor would for this token. |
| 252 | |
| 253 | dumpTokenInto(PP: *PP, OS, Tok); |
| 254 | |
| 255 | ExpansionMap::iterator It; |
| 256 | bool Inserted; |
| 257 | std::tie(args&: It, args&: Inserted) = |
| 258 | ExpandedTokens.try_emplace(Key: CurrExpansionLoc, Args: std::move(TokenAsString)); |
| 259 | if (!Inserted) |
| 260 | It->getSecond().append(RHS: TokenAsString); |
| 261 | } |
| 262 | |
| 263 | |