1//===- MacroExpansionContext.cpp - Macro expansion information --*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "clang/Analysis/MacroExpansionContext.h"
10#include "llvm/Support/Debug.h"
11#include <optional>
12
13#define DEBUG_TYPE "macro-expansion-context"
14
15static void dumpTokenInto(const clang::Preprocessor &PP, llvm::raw_ostream &OS,
16 clang::Token Tok);
17
18namespace clang {
19namespace detail {
20class MacroExpansionRangeRecorder : public PPCallbacks {
21 const Preprocessor &PP;
22 SourceManager &SM;
23 MacroExpansionContext::ExpansionRangeMap &ExpansionRanges;
24
25public:
26 explicit MacroExpansionRangeRecorder(
27 const Preprocessor &PP, SourceManager &SM,
28 MacroExpansionContext::ExpansionRangeMap &ExpansionRanges)
29 : PP(PP), SM(SM), ExpansionRanges(ExpansionRanges) {}
30
31 void MacroExpands(const Token &MacroName, const MacroDefinition &MD,
32 SourceRange Range, const MacroArgs *Args) override {
33 // Ignore annotation tokens like: _Pragma("pack(push, 1)")
34 if (MacroName.getIdentifierInfo()->getName() == "_Pragma")
35 return;
36
37 SourceLocation MacroNameBegin = SM.getExpansionLoc(Loc: MacroName.getLocation());
38 assert(MacroNameBegin == SM.getExpansionLoc(Range.getBegin()));
39
40 const SourceLocation ExpansionEnd = [Range, &SM = SM, &MacroName] {
41 // If the range is empty, use the length of the macro.
42 if (Range.getBegin() == Range.getEnd())
43 return SM.getExpansionLoc(
44 Loc: MacroName.getLocation().getLocWithOffset(Offset: MacroName.getLength()));
45
46 // Include the last character.
47 return SM.getExpansionLoc(Loc: Range.getEnd()).getLocWithOffset(Offset: 1);
48 }();
49
50 (void)PP;
51 LLVM_DEBUG(llvm::dbgs() << "MacroExpands event: '";
52 dumpTokenInto(PP, llvm::dbgs(), MacroName);
53 llvm::dbgs()
54 << "' with length " << MacroName.getLength() << " at ";
55 MacroNameBegin.print(llvm::dbgs(), SM);
56 llvm::dbgs() << ", expansion end at ";
57 ExpansionEnd.print(llvm::dbgs(), SM); llvm::dbgs() << '\n';);
58
59 // If the expansion range is empty, use the identifier of the macro as a
60 // range.
61 MacroExpansionContext::ExpansionRangeMap::iterator It;
62 bool Inserted;
63 std::tie(args&: It, args&: Inserted) =
64 ExpansionRanges.try_emplace(Key: MacroNameBegin, Args: ExpansionEnd);
65 if (Inserted) {
66 LLVM_DEBUG(llvm::dbgs() << "maps ";
67 It->getFirst().print(llvm::dbgs(), SM); llvm::dbgs() << " to ";
68 It->getSecond().print(llvm::dbgs(), SM);
69 llvm::dbgs() << '\n';);
70 } else {
71 if (SM.isBeforeInTranslationUnit(LHS: It->getSecond(), RHS: ExpansionEnd)) {
72 It->getSecond() = ExpansionEnd;
73 LLVM_DEBUG(
74 llvm::dbgs() << "remaps "; It->getFirst().print(llvm::dbgs(), SM);
75 llvm::dbgs() << " to "; It->getSecond().print(llvm::dbgs(), SM);
76 llvm::dbgs() << '\n';);
77 }
78 }
79 }
80};
81} // namespace detail
82} // namespace clang
83
84using namespace clang;
85
86MacroExpansionContext::MacroExpansionContext(const LangOptions &LangOpts)
87 : LangOpts(LangOpts) {}
88
89void MacroExpansionContext::registerForPreprocessor(Preprocessor &NewPP) {
90 PP = &NewPP;
91 SM = &NewPP.getSourceManager();
92
93 // Make sure that the Preprocessor does not outlive the MacroExpansionContext.
94 PP->addPPCallbacks(C: std::make_unique<detail::MacroExpansionRangeRecorder>(
95 args&: *PP, args&: *SM, args&: ExpansionRanges));
96 // Same applies here.
97 PP->setTokenWatcher([this](const Token &Tok) { onTokenLexed(Tok); });
98}
99
100std::optional<StringRef>
101MacroExpansionContext::getExpandedText(SourceLocation MacroExpansionLoc) const {
102 if (MacroExpansionLoc.isMacroID())
103 return std::nullopt;
104
105 // If there was no macro expansion at that location, return std::nullopt.
106 if (ExpansionRanges.find_as(Val: MacroExpansionLoc) == ExpansionRanges.end())
107 return std::nullopt;
108
109 // There was macro expansion, but resulted in no tokens, return empty string.
110 const auto It = ExpandedTokens.find_as(Val: MacroExpansionLoc);
111 if (It == ExpandedTokens.end())
112 return StringRef{""};
113
114 // Otherwise we have the actual token sequence as string.
115 return It->getSecond().str();
116}
117
118std::optional<StringRef>
119MacroExpansionContext::getOriginalText(SourceLocation MacroExpansionLoc) const {
120 if (MacroExpansionLoc.isMacroID())
121 return std::nullopt;
122
123 const auto It = ExpansionRanges.find_as(Val: MacroExpansionLoc);
124 if (It == ExpansionRanges.end())
125 return std::nullopt;
126
127 assert(It->getFirst() != It->getSecond() &&
128 "Every macro expansion must cover a non-empty range.");
129
130 return Lexer::getSourceText(
131 Range: CharSourceRange::getCharRange(B: It->getFirst(), E: It->getSecond()), SM: *SM,
132 LangOpts);
133}
134
135void MacroExpansionContext::dumpExpansionRanges() const {
136 dumpExpansionRangesToStream(OS&: llvm::dbgs());
137}
138void MacroExpansionContext::dumpExpandedTexts() const {
139 dumpExpandedTextsToStream(OS&: llvm::dbgs());
140}
141
142void MacroExpansionContext::dumpExpansionRangesToStream(raw_ostream &OS) const {
143 std::vector<std::pair<SourceLocation, SourceLocation>> LocalExpansionRanges;
144 LocalExpansionRanges.reserve(n: ExpansionRanges.size());
145 for (const auto &Record : ExpansionRanges)
146 LocalExpansionRanges.emplace_back(
147 args: std::make_pair(x: Record.getFirst(), y: Record.getSecond()));
148 llvm::sort(C&: LocalExpansionRanges);
149
150 OS << "\n=============== ExpansionRanges ===============\n";
151 for (const auto &Record : LocalExpansionRanges) {
152 OS << "> ";
153 Record.first.print(OS, SM: *SM);
154 OS << ", ";
155 Record.second.print(OS, SM: *SM);
156 OS << '\n';
157 }
158}
159
160void MacroExpansionContext::dumpExpandedTextsToStream(raw_ostream &OS) const {
161 std::vector<std::pair<SourceLocation, MacroExpansionText>>
162 LocalExpandedTokens;
163 LocalExpandedTokens.reserve(n: ExpandedTokens.size());
164 for (const auto &Record : ExpandedTokens)
165 LocalExpandedTokens.emplace_back(
166 args: std::make_pair(x: Record.getFirst(), y: Record.getSecond()));
167 llvm::sort(C&: LocalExpandedTokens);
168
169 OS << "\n=============== ExpandedTokens ===============\n";
170 for (const auto &Record : LocalExpandedTokens) {
171 OS << "> ";
172 Record.first.print(OS, SM: *SM);
173 OS << " -> '" << Record.second << "'\n";
174 }
175}
176
177static void dumpTokenInto(const Preprocessor &PP, raw_ostream &OS, Token Tok) {
178 assert(Tok.isNot(tok::raw_identifier));
179
180 // Ignore annotation tokens like: _Pragma("pack(push, 1)")
181 if (Tok.isAnnotation())
182 return;
183
184 if (IdentifierInfo *II = Tok.getIdentifierInfo()) {
185 // FIXME: For now, we don't respect whitespaces between macro expanded
186 // tokens. We just emit a space after every identifier to produce a valid
187 // code for `int a ;` like expansions.
188 // ^-^-- Space after the 'int' and 'a' identifiers.
189 OS << II->getName() << ' ';
190 } else if (Tok.isLiteral() && !Tok.needsCleaning() && Tok.getLiteralData()) {
191 OS << StringRef(Tok.getLiteralData(), Tok.getLength());
192 } else {
193 char Tmp[256];
194 if (Tok.getLength() < sizeof(Tmp)) {
195 const char *TokPtr = Tmp;
196 // FIXME: Might use a different overload for cleaner callsite.
197 unsigned Len = PP.getSpelling(Tok, Buffer&: TokPtr);
198 OS.write(Ptr: TokPtr, Size: Len);
199 } else {
200 OS << "<too long token>";
201 }
202 }
203}
204
205void MacroExpansionContext::onTokenLexed(const Token &Tok) {
206 SourceLocation SLoc = Tok.getLocation();
207 if (SLoc.isFileID())
208 return;
209
210 LLVM_DEBUG(llvm::dbgs() << "lexed macro expansion token '";
211 dumpTokenInto(*PP, llvm::dbgs(), Tok); llvm::dbgs() << "' at ";
212 SLoc.print(llvm::dbgs(), *SM); llvm::dbgs() << '\n';);
213
214 // Remove spelling location.
215 SourceLocation CurrExpansionLoc = SM->getExpansionLoc(Loc: SLoc);
216
217 MacroExpansionText TokenAsString;
218 llvm::raw_svector_ostream OS(TokenAsString);
219
220 // FIXME: Prepend newlines and space to produce the exact same output as the
221 // preprocessor would for this token.
222
223 dumpTokenInto(PP: *PP, OS, Tok);
224
225 ExpansionMap::iterator It;
226 bool Inserted;
227 std::tie(args&: It, args&: Inserted) =
228 ExpandedTokens.try_emplace(Key: CurrExpansionLoc, Args: std::move(TokenAsString));
229 if (!Inserted)
230 It->getSecond().append(RHS: TokenAsString);
231}
232
233