1//===--- FormatTokenSource.h - Format C++ code ------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file defines the \c FormatTokenSource interface, which provides a token
11/// stream as well as the ability to manipulate the token stream.
12///
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_CLANG_LIB_FORMAT_FORMATTOKENSOURCE_H
16#define LLVM_CLANG_LIB_FORMAT_FORMATTOKENSOURCE_H
17
18#include "UnwrappedLineParser.h"
19
20#define DEBUG_TYPE "format-token-source"
21
22namespace clang {
23namespace format {
24
25// Navigate a token stream.
26//
27// Enables traversal of a token stream, resetting the position in a token
28// stream, as well as inserting new tokens.
29class FormatTokenSource {
30public:
31 virtual ~FormatTokenSource() {}
32
33 // Returns the next token in the token stream.
34 virtual FormatToken *getNextToken() = 0;
35
36 // Returns the token preceding the token returned by the last call to
37 // getNextToken() in the token stream, or nullptr if no such token exists.
38 //
39 // Must not be called directly at the position directly after insertTokens()
40 // is called.
41 virtual FormatToken *getPreviousToken() = 0;
42
43 // Returns the token that would be returned by the next call to
44 // getNextToken().
45 virtual FormatToken *peekNextToken(bool SkipComment = false) = 0;
46
47 // Returns whether we are at the end of the file.
48 // This can be different from whether getNextToken() returned an eof token
49 // when the FormatTokenSource is a view on a part of the token stream.
50 virtual bool isEOF() = 0;
51
52 // Gets the current position in the token stream, to be used by setPosition().
53 //
54 // Note that the value of the position is not meaningful, and specifically
55 // should not be used to get relative token positions.
56 virtual unsigned getPosition() = 0;
57
58 // Resets the token stream to the state it was in when getPosition() returned
59 // Position, and return the token at that position in the stream.
60 virtual FormatToken *setPosition(unsigned Position) = 0;
61
62 // Insert the given tokens before the current position.
63 // Returns the first token in \c Tokens.
64 // The next returned token will be the second token in \c Tokens.
65 // Requires the last token in Tokens to be EOF; once the EOF token is reached,
66 // the next token will be the last token returned by getNextToken();
67 //
68 // For example, given the token sequence 'a1 a2':
69 // getNextToken() -> a1
70 // insertTokens('b1 b2') -> b1
71 // getNextToken() -> b2
72 // getNextToken() -> a1
73 // getNextToken() -> a2
74 virtual FormatToken *insertTokens(ArrayRef<FormatToken *> Tokens) = 0;
75
76 [[nodiscard]] FormatToken *getNextNonComment() {
77 FormatToken *Tok;
78 do {
79 Tok = getNextToken();
80 assert(Tok);
81 } while (Tok->is(Kind: tok::comment));
82 return Tok;
83 }
84};
85
86class IndexedTokenSource : public FormatTokenSource {
87public:
88 IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
89 : Tokens(Tokens), Position(-1) {}
90
91 FormatToken *getNextToken() override {
92 if (Position >= 0 && isEOF()) {
93 LLVM_DEBUG({
94 llvm::dbgs() << "Next ";
95 dbgToken(Position);
96 });
97 return Tokens[Position];
98 }
99 Position = successor(Current: Position);
100 LLVM_DEBUG({
101 llvm::dbgs() << "Next ";
102 dbgToken(Position);
103 });
104 return Tokens[Position];
105 }
106
107 FormatToken *getPreviousToken() override {
108 assert(Position <= 0 || Tokens[Position - 1]->isNot(tok::eof));
109 return Position > 0 ? Tokens[Position - 1] : nullptr;
110 }
111
112 FormatToken *peekNextToken(bool SkipComment = false) override {
113 if (isEOF())
114 return Tokens[Position];
115 int Next = successor(Current: Position);
116 if (SkipComment)
117 while (Tokens[Next]->is(Kind: tok::comment))
118 Next = successor(Current: Next);
119 LLVM_DEBUG({
120 llvm::dbgs() << "Peeking ";
121 dbgToken(Next);
122 });
123 return Tokens[Next];
124 }
125
126 bool isEOF() override {
127 return Position == -1 ? false : Tokens[Position]->is(Kind: tok::eof);
128 }
129
130 unsigned getPosition() override {
131 LLVM_DEBUG(llvm::dbgs() << "Getting Position: " << Position << "\n");
132 assert(Position >= 0);
133 return Position;
134 }
135
136 FormatToken *setPosition(unsigned P) override {
137 LLVM_DEBUG(llvm::dbgs() << "Setting Position: " << P << "\n");
138 Position = P;
139 return Tokens[Position];
140 }
141
142 FormatToken *insertTokens(ArrayRef<FormatToken *> New) override {
143 assert(Position != -1);
144 assert((*New.rbegin())->Tok.is(tok::eof));
145 int Next = Tokens.size();
146 Tokens.append(in_start: New.begin(), in_end: New.end());
147 LLVM_DEBUG({
148 llvm::dbgs() << "Inserting:\n";
149 for (int I = Next, E = Tokens.size(); I != E; ++I)
150 dbgToken(I, " ");
151 llvm::dbgs() << " Jump from: " << (Tokens.size() - 1) << " -> "
152 << Position << "\n";
153 });
154 Jumps[Tokens.size() - 1] = Position;
155 Position = Next;
156 LLVM_DEBUG({
157 llvm::dbgs() << "At inserted token ";
158 dbgToken(Position);
159 });
160 return Tokens[Position];
161 }
162
163 void reset() { Position = -1; }
164
165private:
166 int successor(int Current) const {
167 int Next = Current + 1;
168 auto it = Jumps.find(Val: Next);
169 if (it != Jumps.end()) {
170 Next = it->second;
171 assert(!Jumps.contains(Next));
172 }
173 return Next;
174 }
175
176 void dbgToken(int Position, StringRef Indent = "") {
177 FormatToken *Tok = Tokens[Position];
178 llvm::dbgs() << Indent << "[" << Position
179 << "] Token: " << Tok->Tok.getName() << " / " << Tok->TokenText
180 << ", Macro: " << !!Tok->MacroCtx << "\n";
181 }
182
183 SmallVector<FormatToken *> Tokens;
184 int Position;
185
186 // Maps from position a to position b, so that when we reach a, the token
187 // stream continues at position b instead.
188 llvm::DenseMap<int, int> Jumps;
189};
190
191class ScopedMacroState : public FormatTokenSource {
192public:
193 ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
194 FormatToken *&ResetToken)
195 : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
196 PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
197 Token(nullptr), PreviousToken(nullptr) {
198 FakeEOF.Tok.startToken();
199 FakeEOF.Tok.setKind(tok::eof);
200 TokenSource = this;
201 Line.Level = 0;
202 Line.InPPDirective = true;
203 // InMacroBody gets set after the `#define x` part.
204 }
205
206 ~ScopedMacroState() override {
207 TokenSource = PreviousTokenSource;
208 ResetToken = Token;
209 Line.InPPDirective = false;
210 Line.InMacroBody = false;
211 Line.Level = PreviousLineLevel;
212 }
213
214 FormatToken *getNextToken() override {
215 // The \c UnwrappedLineParser guards against this by never calling
216 // \c getNextToken() after it has encountered the first eof token.
217 assert(!eof());
218 PreviousToken = Token;
219 Token = PreviousTokenSource->getNextToken();
220 if (eof())
221 return &FakeEOF;
222 return Token;
223 }
224
225 FormatToken *getPreviousToken() override {
226 return PreviousTokenSource->getPreviousToken();
227 }
228
229 FormatToken *peekNextToken(bool SkipComment) override {
230 if (eof())
231 return &FakeEOF;
232 return PreviousTokenSource->peekNextToken(SkipComment);
233 }
234
235 bool isEOF() override { return PreviousTokenSource->isEOF(); }
236
237 unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
238
239 FormatToken *setPosition(unsigned Position) override {
240 PreviousToken = nullptr;
241 Token = PreviousTokenSource->setPosition(Position);
242 return Token;
243 }
244
245 FormatToken *insertTokens(ArrayRef<FormatToken *> Tokens) override {
246 llvm_unreachable("Cannot insert tokens while parsing a macro.");
247 return nullptr;
248 }
249
250private:
251 bool eof() {
252 return Token && Token->HasUnescapedNewline &&
253 !continuesLineComment(FormatTok: *Token, Previous: PreviousToken,
254 /*MinColumnToken=*/MinColumnToken: PreviousToken);
255 }
256
257 FormatToken FakeEOF;
258 UnwrappedLine &Line;
259 FormatTokenSource *&TokenSource;
260 FormatToken *&ResetToken;
261 unsigned PreviousLineLevel;
262 FormatTokenSource *PreviousTokenSource;
263
264 FormatToken *Token;
265 FormatToken *PreviousToken;
266};
267
268} // namespace format
269} // namespace clang
270
271#undef DEBUG_TYPE
272
273#endif
274