1//===- MacroExpansionContext.cpp - Macro expansion information --*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "clang/Analysis/MacroExpansionContext.h"
10#include "clang/Format/Format.h"
11#include "llvm/Support/Debug.h"
12#include <optional>
13
14#define DEBUG_TYPE "macro-expansion-context"
15
16static void dumpTokenInto(const clang::Preprocessor &PP, llvm::raw_ostream &OS,
17 clang::Token Tok);
18
19namespace clang {
20namespace detail {
21class MacroExpansionRangeRecorder : public PPCallbacks {
22 const Preprocessor &PP;
23 SourceManager &SM;
24 MacroExpansionContext::ExpansionRangeMap &ExpansionRanges;
25
26public:
27 explicit MacroExpansionRangeRecorder(
28 const Preprocessor &PP, SourceManager &SM,
29 MacroExpansionContext::ExpansionRangeMap &ExpansionRanges)
30 : PP(PP), SM(SM), ExpansionRanges(ExpansionRanges) {}
31
32 void MacroExpands(const Token &MacroName, const MacroDefinition &MD,
33 SourceRange Range, const MacroArgs *Args) override {
34 // Ignore annotation tokens like: _Pragma("pack(push, 1)")
35 if (MacroName.getIdentifierInfo()->getName() == "_Pragma")
36 return;
37
38 SourceLocation MacroNameBegin = SM.getExpansionLoc(Loc: MacroName.getLocation());
39 assert(MacroNameBegin == SM.getExpansionLoc(Range.getBegin()));
40
41 const SourceLocation ExpansionEnd = [Range, &SM = SM, &MacroName] {
42 // If the range is empty, use the length of the macro.
43 if (Range.getBegin() == Range.getEnd())
44 return SM.getExpansionLoc(
45 Loc: MacroName.getLocation().getLocWithOffset(Offset: MacroName.getLength()));
46
47 // Include the last character.
48 return SM.getExpansionLoc(Loc: Range.getEnd()).getLocWithOffset(Offset: 1);
49 }();
50
51 (void)PP;
52 LLVM_DEBUG(llvm::dbgs() << "MacroExpands event: '";
53 dumpTokenInto(PP, llvm::dbgs(), MacroName);
54 llvm::dbgs()
55 << "' with length " << MacroName.getLength() << " at ";
56 MacroNameBegin.print(llvm::dbgs(), SM);
57 llvm::dbgs() << ", expansion end at ";
58 ExpansionEnd.print(llvm::dbgs(), SM); llvm::dbgs() << '\n';);
59
60 // If the expansion range is empty, use the identifier of the macro as a
61 // range.
62 MacroExpansionContext::ExpansionRangeMap::iterator It;
63 bool Inserted;
64 std::tie(args&: It, args&: Inserted) =
65 ExpansionRanges.try_emplace(Key: MacroNameBegin, Args: ExpansionEnd);
66 if (Inserted) {
67 LLVM_DEBUG(llvm::dbgs() << "maps ";
68 It->getFirst().print(llvm::dbgs(), SM); llvm::dbgs() << " to ";
69 It->getSecond().print(llvm::dbgs(), SM);
70 llvm::dbgs() << '\n';);
71 } else {
72 if (SM.isBeforeInTranslationUnit(LHS: It->getSecond(), RHS: ExpansionEnd)) {
73 It->getSecond() = ExpansionEnd;
74 LLVM_DEBUG(
75 llvm::dbgs() << "remaps "; It->getFirst().print(llvm::dbgs(), SM);
76 llvm::dbgs() << " to "; It->getSecond().print(llvm::dbgs(), SM);
77 llvm::dbgs() << '\n';);
78 }
79 }
80 }
81};
82} // namespace detail
83} // namespace clang
84
85using namespace clang;
86
87MacroExpansionContext::MacroExpansionContext(const LangOptions &LangOpts)
88 : LangOpts(LangOpts) {}
89
90void MacroExpansionContext::registerForPreprocessor(Preprocessor &NewPP) {
91 PP = &NewPP;
92 SM = &NewPP.getSourceManager();
93
94 // Make sure that the Preprocessor does not outlive the MacroExpansionContext.
95 PP->addPPCallbacks(C: std::make_unique<detail::MacroExpansionRangeRecorder>(
96 args&: *PP, args&: *SM, args&: ExpansionRanges));
97 // Same applies here.
98 PP->setTokenWatcher([this](const Token &Tok) { onTokenLexed(Tok); });
99}
100
101std::optional<StringRef>
102MacroExpansionContext::getExpandedText(SourceLocation MacroExpansionLoc) const {
103 if (MacroExpansionLoc.isMacroID())
104 return std::nullopt;
105
106 // If there was no macro expansion at that location, return std::nullopt.
107 if (ExpansionRanges.find_as(Val: MacroExpansionLoc) == ExpansionRanges.end())
108 return std::nullopt;
109
110 // There was macro expansion, but resulted in no tokens, return empty string.
111 const auto It = ExpandedTokens.find_as(Val: MacroExpansionLoc);
112 if (It == ExpandedTokens.end())
113 return StringRef{""};
114
115 // Otherwise we have the actual token sequence as string.
116 return It->getSecond().str();
117}
118
119std::optional<StringRef>
120MacroExpansionContext::getOriginalText(SourceLocation MacroExpansionLoc) const {
121 if (MacroExpansionLoc.isMacroID())
122 return std::nullopt;
123
124 const auto It = ExpansionRanges.find_as(Val: MacroExpansionLoc);
125 if (It == ExpansionRanges.end())
126 return std::nullopt;
127
128 assert(It->getFirst() != It->getSecond() &&
129 "Every macro expansion must cover a non-empty range.");
130
131 return Lexer::getSourceText(
132 Range: CharSourceRange::getCharRange(B: It->getFirst(), E: It->getSecond()), SM: *SM,
133 LangOpts);
134}
135
136std::optional<StringRef> MacroExpansionContext::getFormattedExpandedText(
137 SourceLocation MacroExpansionLoc) const {
138 std::optional<StringRef> ExpandedText = getExpandedText(MacroExpansionLoc);
139 if (!ExpandedText)
140 return std::nullopt;
141
142 auto [It, Inserted] =
143 FormattedExpandedTokens.try_emplace(Key: MacroExpansionLoc, Args: "");
144 if (!Inserted)
145 return StringRef(It->getSecond());
146
147 clang::format::FormatStyle Style = clang::format::getLLVMStyle();
148
149 std::string MacroCodeBlock = ExpandedText->str();
150
151 std::vector<clang::tooling::Range> Ranges;
152 Ranges.emplace_back(args: 0, args: MacroCodeBlock.length());
153
154 clang::tooling::Replacements Replacements = clang::format::reformat(
155 Style, Code: MacroCodeBlock, Ranges, FileName: "<macro-expansion>");
156
157 llvm::Expected<std::string> Result =
158 clang::tooling::applyAllReplacements(Code: MacroCodeBlock, Replaces: Replacements);
159
160 It->getSecond() = Result ? std::move(*Result) : std::move(MacroCodeBlock);
161
162 return StringRef(It->getSecond());
163}
164
165void MacroExpansionContext::dumpExpansionRanges() const {
166 dumpExpansionRangesToStream(OS&: llvm::dbgs());
167}
168void MacroExpansionContext::dumpExpandedTexts() const {
169 dumpExpandedTextsToStream(OS&: llvm::dbgs());
170}
171
172void MacroExpansionContext::dumpExpansionRangesToStream(raw_ostream &OS) const {
173 std::vector<std::pair<SourceLocation, SourceLocation>> LocalExpansionRanges;
174 LocalExpansionRanges.reserve(n: ExpansionRanges.size());
175 for (const auto &Record : ExpansionRanges)
176 LocalExpansionRanges.emplace_back(
177 args: std::make_pair(x: Record.getFirst(), y: Record.getSecond()));
178 llvm::sort(C&: LocalExpansionRanges);
179
180 OS << "\n=============== ExpansionRanges ===============\n";
181 for (const auto &Record : LocalExpansionRanges) {
182 OS << "> ";
183 Record.first.print(OS, SM: *SM);
184 OS << ", ";
185 Record.second.print(OS, SM: *SM);
186 OS << '\n';
187 }
188}
189
190void MacroExpansionContext::dumpExpandedTextsToStream(raw_ostream &OS) const {
191 std::vector<std::pair<SourceLocation, MacroExpansionText>>
192 LocalExpandedTokens;
193 LocalExpandedTokens.reserve(n: ExpandedTokens.size());
194 for (const auto &Record : ExpandedTokens)
195 LocalExpandedTokens.emplace_back(
196 args: std::make_pair(x: Record.getFirst(), y: Record.getSecond()));
197 llvm::sort(C&: LocalExpandedTokens);
198
199 OS << "\n=============== ExpandedTokens ===============\n";
200 for (const auto &Record : LocalExpandedTokens) {
201 OS << "> ";
202 Record.first.print(OS, SM: *SM);
203 OS << " -> '" << Record.second << "'\n";
204 }
205}
206
207static void dumpTokenInto(const Preprocessor &PP, raw_ostream &OS, Token Tok) {
208 assert(Tok.isNot(tok::raw_identifier));
209
210 // Ignore annotation tokens like: _Pragma("pack(push, 1)")
211 if (Tok.isAnnotation())
212 return;
213
214 if (IdentifierInfo *II = Tok.getIdentifierInfo()) {
215 // FIXME: For now, we don't respect whitespaces between macro expanded
216 // tokens. We just emit a space after every identifier to produce a valid
217 // code for `int a ;` like expansions.
218 // ^-^-- Space after the 'int' and 'a' identifiers.
219 OS << II->getName() << ' ';
220 } else if (Tok.isLiteral() && !Tok.needsCleaning() && Tok.getLiteralData()) {
221 OS << StringRef(Tok.getLiteralData(), Tok.getLength());
222 } else {
223 char Tmp[256];
224 if (Tok.getLength() < sizeof(Tmp)) {
225 const char *TokPtr = Tmp;
226 // FIXME: Might use a different overload for cleaner callsite.
227 unsigned Len = PP.getSpelling(Tok, Buffer&: TokPtr);
228 OS.write(Ptr: TokPtr, Size: Len);
229 } else {
230 OS << "<too long token>";
231 }
232 }
233}
234
235void MacroExpansionContext::onTokenLexed(const Token &Tok) {
236 SourceLocation SLoc = Tok.getLocation();
237 if (SLoc.isFileID())
238 return;
239
240 LLVM_DEBUG(llvm::dbgs() << "lexed macro expansion token '";
241 dumpTokenInto(*PP, llvm::dbgs(), Tok); llvm::dbgs() << "' at ";
242 SLoc.print(llvm::dbgs(), *SM); llvm::dbgs() << '\n';);
243
244 // Remove spelling location.
245 SourceLocation CurrExpansionLoc = SM->getExpansionLoc(Loc: SLoc);
246
247 MacroExpansionText TokenAsString;
248 llvm::raw_svector_ostream OS(TokenAsString);
249
250 // FIXME: Prepend newlines and space to produce the exact same output as the
251 // preprocessor would for this token.
252
253 dumpTokenInto(PP: *PP, OS, Tok);
254
255 ExpansionMap::iterator It;
256 bool Inserted;
257 std::tie(args&: It, args&: Inserted) =
258 ExpandedTokens.try_emplace(Key: CurrExpansionLoc, Args: std::move(TokenAsString));
259 if (!Inserted)
260 It->getSecond().append(RHS: TokenAsString);
261}
262
263