| 1 | //===--- MacroExpander.cpp - Format C++ code --------------------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | /// |
| 9 | /// \file |
| 10 | /// This file contains the implementation of MacroExpander, which handles macro |
| 11 | /// configuration and expansion while formatting. |
| 12 | /// |
| 13 | //===----------------------------------------------------------------------===// |
| 14 | |
| 15 | #include "Macros.h" |
| 16 | |
| 17 | #include "Encoding.h" |
| 18 | #include "FormatToken.h" |
| 19 | #include "FormatTokenLexer.h" |
| 20 | #include "clang/Basic/TokenKinds.h" |
| 21 | #include "clang/Format/Format.h" |
| 22 | #include "clang/Lex/HeaderSearch.h" |
| 23 | #include "clang/Lex/Lexer.h" |
| 24 | #include "clang/Lex/PreprocessorOptions.h" |
| 25 | #include "llvm/ADT/StringSet.h" |
| 26 | #include "llvm/Support/ErrorHandling.h" |
| 27 | |
| 28 | namespace clang { |
| 29 | namespace format { |
| 30 | |
| 31 | struct MacroExpander::Definition { |
| 32 | StringRef Name; |
| 33 | SmallVector<FormatToken *, 8> Params; |
| 34 | SmallVector<FormatToken *, 8> Body; |
| 35 | |
| 36 | // Map from each argument's name to its position in the argument list. |
| 37 | // With "M(x, y) x + y": |
| 38 | // x -> 0 |
| 39 | // y -> 1 |
| 40 | llvm::StringMap<size_t> ArgMap; |
| 41 | |
| 42 | bool ObjectLike = true; |
| 43 | }; |
| 44 | |
| 45 | class MacroExpander::DefinitionParser { |
| 46 | public: |
| 47 | DefinitionParser(ArrayRef<FormatToken *> Tokens) : Tokens(Tokens) { |
| 48 | assert(!Tokens.empty()); |
| 49 | Current = Tokens[0]; |
| 50 | } |
| 51 | |
| 52 | // Parse the token stream and return the corresponding Definition object. |
| 53 | // Returns an empty definition object with a null-Name on error. |
| 54 | MacroExpander::Definition parse() { |
| 55 | if (Current->isNot(Kind: tok::identifier)) |
| 56 | return {}; |
| 57 | Def.Name = Current->TokenText; |
| 58 | nextToken(); |
| 59 | if (Current->is(Kind: tok::l_paren)) { |
| 60 | Def.ObjectLike = false; |
| 61 | if (!parseParams()) |
| 62 | return {}; |
| 63 | } |
| 64 | if (!parseExpansion()) |
| 65 | return {}; |
| 66 | |
| 67 | return Def; |
| 68 | } |
| 69 | |
| 70 | private: |
| 71 | bool parseParams() { |
| 72 | assert(Current->is(tok::l_paren)); |
| 73 | nextToken(); |
| 74 | while (Current->is(Kind: tok::identifier)) { |
| 75 | Def.Params.push_back(Elt: Current); |
| 76 | Def.ArgMap[Def.Params.back()->TokenText] = Def.Params.size() - 1; |
| 77 | nextToken(); |
| 78 | if (Current->isNot(Kind: tok::comma)) |
| 79 | break; |
| 80 | nextToken(); |
| 81 | } |
| 82 | if (Current->isNot(Kind: tok::r_paren)) |
| 83 | return false; |
| 84 | nextToken(); |
| 85 | return true; |
| 86 | } |
| 87 | |
| 88 | bool parseExpansion() { |
| 89 | if (!Current->isOneOf(K1: tok::equal, K2: tok::eof)) |
| 90 | return false; |
| 91 | if (Current->is(Kind: tok::equal)) |
| 92 | nextToken(); |
| 93 | parseTail(); |
| 94 | return true; |
| 95 | } |
| 96 | |
| 97 | void parseTail() { |
| 98 | while (Current->isNot(Kind: tok::eof)) { |
| 99 | Def.Body.push_back(Elt: Current); |
| 100 | nextToken(); |
| 101 | } |
| 102 | Def.Body.push_back(Elt: Current); |
| 103 | } |
| 104 | |
| 105 | void nextToken() { |
| 106 | if (Pos + 1 < Tokens.size()) |
| 107 | ++Pos; |
| 108 | Current = Tokens[Pos]; |
| 109 | Current->Finalized = true; |
| 110 | } |
| 111 | |
| 112 | size_t Pos = 0; |
| 113 | FormatToken *Current = nullptr; |
| 114 | Definition Def; |
| 115 | ArrayRef<FormatToken *> Tokens; |
| 116 | }; |
| 117 | |
| 118 | MacroExpander::MacroExpander( |
| 119 | const std::vector<std::string> &Macros, SourceManager &SourceMgr, |
| 120 | const FormatStyle &Style, |
| 121 | llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator, |
| 122 | IdentifierTable &IdentTable) |
| 123 | : SourceMgr(SourceMgr), Style(Style), Allocator(Allocator), |
| 124 | IdentTable(IdentTable) { |
| 125 | for (const std::string &Macro : Macros) |
| 126 | parseDefinition(Macro); |
| 127 | } |
| 128 | |
| 129 | MacroExpander::~MacroExpander() = default; |
| 130 | |
| 131 | void MacroExpander::parseDefinition(const std::string &Macro) { |
| 132 | Buffers.push_back( |
| 133 | Elt: llvm::MemoryBuffer::getMemBufferCopy(InputData: Macro, BufferName: "<scratch space>" )); |
| 134 | FileID FID = SourceMgr.createFileID(Buffer: Buffers.back()->getMemBufferRef()); |
| 135 | FormatTokenLexer Lex(SourceMgr, FID, 0, Style, encoding::Encoding_UTF8, |
| 136 | Allocator, IdentTable); |
| 137 | const auto Tokens = Lex.lex(); |
| 138 | if (!Tokens.empty()) { |
| 139 | DefinitionParser Parser(Tokens); |
| 140 | auto Definition = Parser.parse(); |
| 141 | if (Definition.ObjectLike) { |
| 142 | ObjectLike[Definition.Name] = std::move(Definition); |
| 143 | } else { |
| 144 | FunctionLike[Definition.Name][Definition.Params.size()] = |
| 145 | std::move(Definition); |
| 146 | } |
| 147 | } |
| 148 | } |
| 149 | |
| 150 | bool MacroExpander::defined(StringRef Name) const { |
| 151 | return FunctionLike.contains(Key: Name) || ObjectLike.contains(Key: Name); |
| 152 | } |
| 153 | |
| 154 | bool MacroExpander::objectLike(StringRef Name) const { |
| 155 | return ObjectLike.contains(Key: Name); |
| 156 | } |
| 157 | |
| 158 | bool MacroExpander::hasArity(StringRef Name, unsigned Arity) const { |
| 159 | auto it = FunctionLike.find(Key: Name); |
| 160 | return it != FunctionLike.end() && it->second.contains(Val: Arity); |
| 161 | } |
| 162 | |
| 163 | SmallVector<FormatToken *, 8> |
| 164 | MacroExpander::expand(FormatToken *ID, |
| 165 | std::optional<ArgsList> OptionalArgs) const { |
| 166 | if (OptionalArgs) |
| 167 | assert(hasArity(ID->TokenText, OptionalArgs->size())); |
| 168 | else |
| 169 | assert(objectLike(ID->TokenText)); |
| 170 | const Definition &Def = OptionalArgs |
| 171 | ? FunctionLike.find(Key: ID->TokenText) |
| 172 | ->second.find(Val: OptionalArgs.value().size()) |
| 173 | ->second |
| 174 | : ObjectLike.find(Key: ID->TokenText)->second; |
| 175 | ArgsList Args = OptionalArgs ? OptionalArgs.value() : ArgsList(); |
| 176 | SmallVector<FormatToken *, 8> Result; |
| 177 | // Expand each argument at most once. |
| 178 | llvm::StringSet<> ExpandedArgs; |
| 179 | |
| 180 | // Adds the given token to Result. |
| 181 | auto pushToken = [&](FormatToken *Tok) { |
| 182 | Tok->MacroCtx->ExpandedFrom.push_back(Elt: ID); |
| 183 | Result.push_back(Elt: Tok); |
| 184 | }; |
| 185 | |
| 186 | // If Tok references a parameter, adds the corresponding argument to Result. |
| 187 | // Returns false if Tok does not reference a parameter. |
| 188 | auto expandArgument = [&](FormatToken *Tok) -> bool { |
| 189 | // If the current token references a parameter, expand the corresponding |
| 190 | // argument. |
| 191 | if (Tok->isNot(Kind: tok::identifier)) |
| 192 | return false; |
| 193 | if (!ExpandedArgs.insert(key: Tok->TokenText).second) |
| 194 | return false; |
| 195 | auto I = Def.ArgMap.find(Key: Tok->TokenText); |
| 196 | if (I == Def.ArgMap.end()) |
| 197 | return false; |
| 198 | // If there are fewer arguments than referenced parameters, treat the |
| 199 | // parameter as empty. |
| 200 | // FIXME: Potentially fully abort the expansion instead. |
| 201 | if (I->getValue() >= Args.size()) |
| 202 | return true; |
| 203 | for (FormatToken *Arg : Args[I->getValue()]) { |
| 204 | // A token can be part of a macro argument at multiple levels. |
| 205 | // For example, with "ID(x) x": |
| 206 | // in ID(ID(x)), 'x' is expanded first as argument to the inner |
| 207 | // ID, then again as argument to the outer ID. We keep the macro |
| 208 | // role the token had from the inner expansion. |
| 209 | if (!Arg->MacroCtx) |
| 210 | Arg->MacroCtx = MacroExpansion(MR_ExpandedArg); |
| 211 | pushToken(Arg); |
| 212 | } |
| 213 | return true; |
| 214 | }; |
| 215 | |
| 216 | // Expand the definition into Result. |
| 217 | for (FormatToken *Tok : Def.Body) { |
| 218 | if (expandArgument(Tok)) |
| 219 | continue; |
| 220 | // Create a copy of the tokens from the macro body, i.e. were not provided |
| 221 | // by user code. |
| 222 | FormatToken *New = new (Allocator.Allocate()) FormatToken; |
| 223 | New->copyFrom(Tok: *Tok); |
| 224 | assert(!New->MacroCtx); |
| 225 | // Tokens that are not part of the user code are not formatted. |
| 226 | New->MacroCtx = MacroExpansion(MR_Hidden); |
| 227 | pushToken(New); |
| 228 | } |
| 229 | assert(Result.size() >= 1 && Result.back()->is(tok::eof)); |
| 230 | if (Result.size() > 1) { |
| 231 | ++Result[0]->MacroCtx->StartOfExpansion; |
| 232 | ++Result[Result.size() - 2]->MacroCtx->EndOfExpansion; |
| 233 | } else { |
| 234 | // If the macro expansion is empty, mark the start and end. |
| 235 | Result[0]->MacroCtx->StartOfExpansion = 1; |
| 236 | Result[0]->MacroCtx->EndOfExpansion = 1; |
| 237 | } |
| 238 | return Result; |
| 239 | } |
| 240 | |
| 241 | } // namespace format |
| 242 | } // namespace clang |
| 243 | |