1 | //===- Parser.cpp - Matcher expression parser -----------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// |
9 | /// \file |
10 | /// Recursive parser implementation for the matcher expression grammar. |
11 | /// |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "clang/ASTMatchers/Dynamic/Parser.h" |
15 | #include "clang/ASTMatchers/ASTMatchersInternal.h" |
16 | #include "clang/ASTMatchers/Dynamic/Diagnostics.h" |
17 | #include "clang/ASTMatchers/Dynamic/Registry.h" |
18 | #include "clang/Basic/CharInfo.h" |
19 | #include "llvm/ADT/StringRef.h" |
20 | #include "llvm/Support/ErrorHandling.h" |
21 | #include "llvm/Support/ManagedStatic.h" |
22 | #include <algorithm> |
23 | #include <cassert> |
24 | #include <cerrno> |
25 | #include <cstddef> |
26 | #include <cstdlib> |
27 | #include <optional> |
28 | #include <string> |
29 | #include <utility> |
30 | #include <vector> |
31 | |
32 | namespace clang { |
33 | namespace ast_matchers { |
34 | namespace dynamic { |
35 | |
36 | /// Simple structure to hold information for one token from the parser. |
37 | struct Parser::TokenInfo { |
38 | /// Different possible tokens. |
39 | enum TokenKind { |
40 | TK_Eof, |
41 | TK_NewLine, |
42 | TK_OpenParen, |
43 | TK_CloseParen, |
44 | TK_Comma, |
45 | TK_Period, |
46 | TK_Literal, |
47 | TK_Ident, |
48 | TK_InvalidChar, |
49 | TK_Error, |
50 | TK_CodeCompletion |
51 | }; |
52 | |
53 | /// Some known identifiers. |
54 | static const char* const ID_Bind; |
55 | static const char *const ID_With; |
56 | |
57 | TokenInfo() = default; |
58 | |
59 | StringRef Text; |
60 | TokenKind Kind = TK_Eof; |
61 | SourceRange Range; |
62 | VariantValue Value; |
63 | }; |
64 | |
65 | const char* const Parser::TokenInfo::ID_Bind = "bind" ; |
66 | const char *const Parser::TokenInfo::ID_With = "with" ; |
67 | |
68 | /// Simple tokenizer for the parser. |
69 | class Parser::CodeTokenizer { |
70 | public: |
71 | explicit CodeTokenizer(StringRef &MatcherCode, Diagnostics *Error) |
72 | : Code(MatcherCode), StartOfLine(MatcherCode), Error(Error) { |
73 | NextToken = getNextToken(); |
74 | } |
75 | |
76 | CodeTokenizer(StringRef &MatcherCode, Diagnostics *Error, |
77 | unsigned CodeCompletionOffset) |
78 | : Code(MatcherCode), StartOfLine(MatcherCode), Error(Error), |
79 | CodeCompletionLocation(MatcherCode.data() + CodeCompletionOffset) { |
80 | NextToken = getNextToken(); |
81 | } |
82 | |
83 | /// Returns but doesn't consume the next token. |
84 | const TokenInfo &peekNextToken() const { return NextToken; } |
85 | |
86 | /// Consumes and returns the next token. |
87 | TokenInfo consumeNextToken() { |
88 | TokenInfo ThisToken = NextToken; |
89 | NextToken = getNextToken(); |
90 | return ThisToken; |
91 | } |
92 | |
93 | TokenInfo SkipNewlines() { |
94 | while (NextToken.Kind == TokenInfo::TK_NewLine) |
95 | NextToken = getNextToken(); |
96 | return NextToken; |
97 | } |
98 | |
99 | TokenInfo consumeNextTokenIgnoreNewlines() { |
100 | SkipNewlines(); |
101 | if (NextToken.Kind == TokenInfo::TK_Eof) |
102 | return NextToken; |
103 | return consumeNextToken(); |
104 | } |
105 | |
106 | TokenInfo::TokenKind nextTokenKind() const { return NextToken.Kind; } |
107 | |
108 | private: |
109 | TokenInfo getNextToken() { |
110 | consumeWhitespace(); |
111 | TokenInfo Result; |
112 | Result.Range.Start = currentLocation(); |
113 | |
114 | if (CodeCompletionLocation && CodeCompletionLocation <= Code.data()) { |
115 | Result.Kind = TokenInfo::TK_CodeCompletion; |
116 | Result.Text = StringRef(CodeCompletionLocation, 0); |
117 | CodeCompletionLocation = nullptr; |
118 | return Result; |
119 | } |
120 | |
121 | if (Code.empty()) { |
122 | Result.Kind = TokenInfo::TK_Eof; |
123 | Result.Text = "" ; |
124 | return Result; |
125 | } |
126 | |
127 | switch (Code[0]) { |
128 | case '#': |
129 | Code = Code.drop_until(F: [](char c) { return c == '\n'; }); |
130 | return getNextToken(); |
131 | case ',': |
132 | Result.Kind = TokenInfo::TK_Comma; |
133 | Result.Text = Code.substr(Start: 0, N: 1); |
134 | Code = Code.drop_front(); |
135 | break; |
136 | case '.': |
137 | Result.Kind = TokenInfo::TK_Period; |
138 | Result.Text = Code.substr(Start: 0, N: 1); |
139 | Code = Code.drop_front(); |
140 | break; |
141 | case '\n': |
142 | ++Line; |
143 | StartOfLine = Code.drop_front(); |
144 | Result.Kind = TokenInfo::TK_NewLine; |
145 | Result.Text = Code.substr(Start: 0, N: 1); |
146 | Code = Code.drop_front(); |
147 | break; |
148 | case '(': |
149 | Result.Kind = TokenInfo::TK_OpenParen; |
150 | Result.Text = Code.substr(Start: 0, N: 1); |
151 | Code = Code.drop_front(); |
152 | break; |
153 | case ')': |
154 | Result.Kind = TokenInfo::TK_CloseParen; |
155 | Result.Text = Code.substr(Start: 0, N: 1); |
156 | Code = Code.drop_front(); |
157 | break; |
158 | |
159 | case '"': |
160 | case '\'': |
161 | // Parse a string literal. |
162 | consumeStringLiteral(Result: &Result); |
163 | break; |
164 | |
165 | case '0': case '1': case '2': case '3': case '4': |
166 | case '5': case '6': case '7': case '8': case '9': |
167 | // Parse an unsigned and float literal. |
168 | consumeNumberLiteral(Result: &Result); |
169 | break; |
170 | |
171 | default: |
172 | if (isAlphanumeric(c: Code[0])) { |
173 | // Parse an identifier |
174 | size_t TokenLength = 1; |
175 | while (true) { |
176 | // A code completion location in/immediately after an identifier will |
177 | // cause the portion of the identifier before the code completion |
178 | // location to become a code completion token. |
179 | if (CodeCompletionLocation == Code.data() + TokenLength) { |
180 | CodeCompletionLocation = nullptr; |
181 | Result.Kind = TokenInfo::TK_CodeCompletion; |
182 | Result.Text = Code.substr(Start: 0, N: TokenLength); |
183 | Code = Code.drop_front(N: TokenLength); |
184 | return Result; |
185 | } |
186 | if (TokenLength == Code.size() || !isAlphanumeric(c: Code[TokenLength])) |
187 | break; |
188 | ++TokenLength; |
189 | } |
190 | if (TokenLength == 4 && Code.starts_with(Prefix: "true" )) { |
191 | Result.Kind = TokenInfo::TK_Literal; |
192 | Result.Value = true; |
193 | } else if (TokenLength == 5 && Code.starts_with(Prefix: "false" )) { |
194 | Result.Kind = TokenInfo::TK_Literal; |
195 | Result.Value = false; |
196 | } else { |
197 | Result.Kind = TokenInfo::TK_Ident; |
198 | Result.Text = Code.substr(Start: 0, N: TokenLength); |
199 | } |
200 | Code = Code.drop_front(N: TokenLength); |
201 | } else { |
202 | Result.Kind = TokenInfo::TK_InvalidChar; |
203 | Result.Text = Code.substr(Start: 0, N: 1); |
204 | Code = Code.drop_front(N: 1); |
205 | } |
206 | break; |
207 | } |
208 | |
209 | Result.Range.End = currentLocation(); |
210 | return Result; |
211 | } |
212 | |
213 | /// Consume an unsigned and float literal. |
214 | void (TokenInfo *Result) { |
215 | bool isFloatingLiteral = false; |
216 | unsigned Length = 1; |
217 | if (Code.size() > 1) { |
218 | // Consume the 'x' or 'b' radix modifier, if present. |
219 | switch (toLowercase(c: Code[1])) { |
220 | case 'x': case 'b': Length = 2; |
221 | } |
222 | } |
223 | while (Length < Code.size() && isHexDigit(c: Code[Length])) |
224 | ++Length; |
225 | |
226 | // Try to recognize a floating point literal. |
227 | while (Length < Code.size()) { |
228 | char c = Code[Length]; |
229 | if (c == '-' || c == '+' || c == '.' || isHexDigit(c)) { |
230 | isFloatingLiteral = true; |
231 | Length++; |
232 | } else { |
233 | break; |
234 | } |
235 | } |
236 | |
237 | Result->Text = Code.substr(Start: 0, N: Length); |
238 | Code = Code.drop_front(N: Length); |
239 | |
240 | if (isFloatingLiteral) { |
241 | char *end; |
242 | errno = 0; |
243 | std::string Text = Result->Text.str(); |
244 | double doubleValue = strtod(nptr: Text.c_str(), endptr: &end); |
245 | if (*end == 0 && errno == 0) { |
246 | Result->Kind = TokenInfo::TK_Literal; |
247 | Result->Value = doubleValue; |
248 | return; |
249 | } |
250 | } else { |
251 | unsigned Value; |
252 | if (!Result->Text.getAsInteger(Radix: 0, Result&: Value)) { |
253 | Result->Kind = TokenInfo::TK_Literal; |
254 | Result->Value = Value; |
255 | return; |
256 | } |
257 | } |
258 | |
259 | SourceRange Range; |
260 | Range.Start = Result->Range.Start; |
261 | Range.End = currentLocation(); |
262 | Error->addError(Range, Error: Error->ET_ParserNumberError) << Result->Text; |
263 | Result->Kind = TokenInfo::TK_Error; |
264 | } |
265 | |
266 | /// Consume a string literal. |
267 | /// |
268 | /// \c Code must be positioned at the start of the literal (the opening |
269 | /// quote). Consumed until it finds the same closing quote character. |
270 | void consumeStringLiteral(TokenInfo *Result) { |
271 | bool InEscape = false; |
272 | const char Marker = Code[0]; |
273 | for (size_t Length = 1, Size = Code.size(); Length != Size; ++Length) { |
274 | if (InEscape) { |
275 | InEscape = false; |
276 | continue; |
277 | } |
278 | if (Code[Length] == '\\') { |
279 | InEscape = true; |
280 | continue; |
281 | } |
282 | if (Code[Length] == Marker) { |
283 | Result->Kind = TokenInfo::TK_Literal; |
284 | Result->Text = Code.substr(Start: 0, N: Length + 1); |
285 | Result->Value = Code.substr(Start: 1, N: Length - 1); |
286 | Code = Code.drop_front(N: Length + 1); |
287 | return; |
288 | } |
289 | } |
290 | |
291 | StringRef ErrorText = Code; |
292 | Code = Code.drop_front(N: Code.size()); |
293 | SourceRange Range; |
294 | Range.Start = Result->Range.Start; |
295 | Range.End = currentLocation(); |
296 | Error->addError(Range, Error: Error->ET_ParserStringError) << ErrorText; |
297 | Result->Kind = TokenInfo::TK_Error; |
298 | } |
299 | |
300 | /// Consume all leading whitespace from \c Code. |
301 | void consumeWhitespace() { |
302 | // Don't trim newlines. |
303 | Code = Code.ltrim(Chars: " \t\v\f\r" ); |
304 | } |
305 | |
306 | SourceLocation currentLocation() { |
307 | SourceLocation Location; |
308 | Location.Line = Line; |
309 | Location.Column = Code.data() - StartOfLine.data() + 1; |
310 | return Location; |
311 | } |
312 | |
313 | StringRef &Code; |
314 | StringRef StartOfLine; |
315 | unsigned Line = 1; |
316 | Diagnostics *Error; |
317 | TokenInfo NextToken; |
318 | const char *CodeCompletionLocation = nullptr; |
319 | }; |
320 | |
321 | Parser::Sema::~Sema() = default; |
322 | |
323 | std::vector<ArgKind> Parser::Sema::getAcceptedCompletionTypes( |
324 | llvm::ArrayRef<std::pair<MatcherCtor, unsigned>> Context) { |
325 | return {}; |
326 | } |
327 | |
328 | std::vector<MatcherCompletion> |
329 | Parser::Sema::getMatcherCompletions(llvm::ArrayRef<ArgKind> AcceptedTypes) { |
330 | return {}; |
331 | } |
332 | |
333 | struct Parser::ScopedContextEntry { |
334 | Parser *P; |
335 | |
336 | ScopedContextEntry(Parser *P, MatcherCtor C) : P(P) { |
337 | P->ContextStack.push_back(x: std::make_pair(x&: C, y: 0u)); |
338 | } |
339 | |
340 | ~ScopedContextEntry() { |
341 | P->ContextStack.pop_back(); |
342 | } |
343 | |
344 | void nextArg() { |
345 | ++P->ContextStack.back().second; |
346 | } |
347 | }; |
348 | |
349 | /// Parse expressions that start with an identifier. |
350 | /// |
351 | /// This function can parse named values and matchers. |
352 | /// In case of failure it will try to determine the user's intent to give |
353 | /// an appropriate error message. |
354 | bool Parser::parseIdentifierPrefixImpl(VariantValue *Value) { |
355 | const TokenInfo NameToken = Tokenizer->consumeNextToken(); |
356 | |
357 | if (Tokenizer->nextTokenKind() != TokenInfo::TK_OpenParen) { |
358 | // Parse as a named value. |
359 | if (const VariantValue NamedValue = |
360 | NamedValues ? NamedValues->lookup(Key: NameToken.Text) |
361 | : VariantValue()) { |
362 | |
363 | if (Tokenizer->nextTokenKind() != TokenInfo::TK_Period) { |
364 | *Value = NamedValue; |
365 | return true; |
366 | } |
367 | |
368 | std::string BindID; |
369 | Tokenizer->consumeNextToken(); |
370 | TokenInfo ChainCallToken = Tokenizer->consumeNextToken(); |
371 | if (ChainCallToken.Kind == TokenInfo::TK_CodeCompletion) { |
372 | addCompletion(CompToken: ChainCallToken, Completion: MatcherCompletion("bind(\"" , "bind" , 1)); |
373 | return false; |
374 | } |
375 | |
376 | if (ChainCallToken.Kind != TokenInfo::TK_Ident || |
377 | (ChainCallToken.Text != TokenInfo::ID_Bind && |
378 | ChainCallToken.Text != TokenInfo::ID_With)) { |
379 | Error->addError(Range: ChainCallToken.Range, |
380 | Error: Error->ET_ParserMalformedChainedExpr); |
381 | return false; |
382 | } |
383 | if (ChainCallToken.Text == TokenInfo::ID_With) { |
384 | |
385 | Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error, |
386 | NameToken.Text, NameToken.Range); |
387 | |
388 | Error->addError(Range: ChainCallToken.Range, |
389 | Error: Error->ET_RegistryMatcherNoWithSupport); |
390 | return false; |
391 | } |
392 | if (!parseBindID(BindID)) |
393 | return false; |
394 | |
395 | assert(NamedValue.isMatcher()); |
396 | std::optional<DynTypedMatcher> Result = |
397 | NamedValue.getMatcher().getSingleMatcher(); |
398 | if (Result) { |
399 | std::optional<DynTypedMatcher> Bound = Result->tryBind(ID: BindID); |
400 | if (Bound) { |
401 | *Value = VariantMatcher::SingleMatcher(Matcher: *Bound); |
402 | return true; |
403 | } |
404 | } |
405 | return false; |
406 | } |
407 | |
408 | if (Tokenizer->nextTokenKind() == TokenInfo::TK_NewLine) { |
409 | Error->addError(Range: Tokenizer->peekNextToken().Range, |
410 | Error: Error->ET_ParserNoOpenParen) |
411 | << "NewLine" ; |
412 | return false; |
413 | } |
414 | |
415 | // If the syntax is correct and the name is not a matcher either, report |
416 | // unknown named value. |
417 | if ((Tokenizer->nextTokenKind() == TokenInfo::TK_Comma || |
418 | Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen || |
419 | Tokenizer->nextTokenKind() == TokenInfo::TK_NewLine || |
420 | Tokenizer->nextTokenKind() == TokenInfo::TK_Eof) && |
421 | !S->lookupMatcherCtor(MatcherName: NameToken.Text)) { |
422 | Error->addError(Range: NameToken.Range, Error: Error->ET_RegistryValueNotFound) |
423 | << NameToken.Text; |
424 | return false; |
425 | } |
426 | // Otherwise, fallback to the matcher parser. |
427 | } |
428 | |
429 | Tokenizer->SkipNewlines(); |
430 | |
431 | assert(NameToken.Kind == TokenInfo::TK_Ident); |
432 | TokenInfo OpenToken = Tokenizer->consumeNextToken(); |
433 | if (OpenToken.Kind != TokenInfo::TK_OpenParen) { |
434 | Error->addError(Range: OpenToken.Range, Error: Error->ET_ParserNoOpenParen) |
435 | << OpenToken.Text; |
436 | return false; |
437 | } |
438 | |
439 | std::optional<MatcherCtor> Ctor = S->lookupMatcherCtor(MatcherName: NameToken.Text); |
440 | |
441 | // Parse as a matcher expression. |
442 | return parseMatcherExpressionImpl(NameToken, OpenToken, Ctor, Value); |
443 | } |
444 | |
445 | bool Parser::parseBindID(std::string &BindID) { |
446 | // Parse the parenthesized argument to .bind("foo") |
447 | const TokenInfo OpenToken = Tokenizer->consumeNextToken(); |
448 | const TokenInfo IDToken = Tokenizer->consumeNextTokenIgnoreNewlines(); |
449 | const TokenInfo CloseToken = Tokenizer->consumeNextTokenIgnoreNewlines(); |
450 | |
451 | // TODO: We could use different error codes for each/some to be more |
452 | // explicit about the syntax error. |
453 | if (OpenToken.Kind != TokenInfo::TK_OpenParen) { |
454 | Error->addError(Range: OpenToken.Range, Error: Error->ET_ParserMalformedBindExpr); |
455 | return false; |
456 | } |
457 | if (IDToken.Kind != TokenInfo::TK_Literal || !IDToken.Value.isString()) { |
458 | Error->addError(Range: IDToken.Range, Error: Error->ET_ParserMalformedBindExpr); |
459 | return false; |
460 | } |
461 | if (CloseToken.Kind != TokenInfo::TK_CloseParen) { |
462 | Error->addError(Range: CloseToken.Range, Error: Error->ET_ParserMalformedBindExpr); |
463 | return false; |
464 | } |
465 | BindID = IDToken.Value.getString(); |
466 | return true; |
467 | } |
468 | |
469 | bool Parser::parseMatcherBuilder(MatcherCtor Ctor, const TokenInfo &NameToken, |
470 | const TokenInfo &OpenToken, |
471 | VariantValue *Value) { |
472 | std::vector<ParserValue> Args; |
473 | TokenInfo EndToken; |
474 | |
475 | Tokenizer->SkipNewlines(); |
476 | |
477 | { |
478 | ScopedContextEntry SCE(this, Ctor); |
479 | |
480 | while (Tokenizer->nextTokenKind() != TokenInfo::TK_Eof) { |
481 | if (Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen) { |
482 | // End of args. |
483 | EndToken = Tokenizer->consumeNextToken(); |
484 | break; |
485 | } |
486 | if (!Args.empty()) { |
487 | // We must find a , token to continue. |
488 | TokenInfo CommaToken = Tokenizer->consumeNextToken(); |
489 | if (CommaToken.Kind != TokenInfo::TK_Comma) { |
490 | Error->addError(Range: CommaToken.Range, Error: Error->ET_ParserNoComma) |
491 | << CommaToken.Text; |
492 | return false; |
493 | } |
494 | } |
495 | |
496 | Diagnostics::Context Ctx(Diagnostics::Context::MatcherArg, Error, |
497 | NameToken.Text, NameToken.Range, |
498 | Args.size() + 1); |
499 | ParserValue ArgValue; |
500 | Tokenizer->SkipNewlines(); |
501 | |
502 | if (Tokenizer->peekNextToken().Kind == TokenInfo::TK_CodeCompletion) { |
503 | addExpressionCompletions(); |
504 | return false; |
505 | } |
506 | |
507 | TokenInfo NodeMatcherToken = Tokenizer->consumeNextToken(); |
508 | |
509 | if (NodeMatcherToken.Kind != TokenInfo::TK_Ident) { |
510 | Error->addError(Range: NameToken.Range, Error: Error->ET_ParserFailedToBuildMatcher) |
511 | << NameToken.Text; |
512 | return false; |
513 | } |
514 | |
515 | ArgValue.Text = NodeMatcherToken.Text; |
516 | ArgValue.Range = NodeMatcherToken.Range; |
517 | |
518 | std::optional<MatcherCtor> MappedMatcher = |
519 | S->lookupMatcherCtor(MatcherName: ArgValue.Text); |
520 | |
521 | if (!MappedMatcher) { |
522 | Error->addError(Range: NodeMatcherToken.Range, |
523 | Error: Error->ET_RegistryMatcherNotFound) |
524 | << NodeMatcherToken.Text; |
525 | return false; |
526 | } |
527 | |
528 | ASTNodeKind NK = S->nodeMatcherType(*MappedMatcher); |
529 | |
530 | if (NK.isNone()) { |
531 | Error->addError(Range: NodeMatcherToken.Range, |
532 | Error: Error->ET_RegistryNonNodeMatcher) |
533 | << NodeMatcherToken.Text; |
534 | return false; |
535 | } |
536 | |
537 | ArgValue.Value = NK; |
538 | |
539 | Tokenizer->SkipNewlines(); |
540 | Args.push_back(x: ArgValue); |
541 | |
542 | SCE.nextArg(); |
543 | } |
544 | } |
545 | |
546 | if (EndToken.Kind == TokenInfo::TK_Eof) { |
547 | Error->addError(Range: OpenToken.Range, Error: Error->ET_ParserNoCloseParen); |
548 | return false; |
549 | } |
550 | |
551 | internal::MatcherDescriptorPtr BuiltCtor = |
552 | S->buildMatcherCtor(Ctor, NameRange: NameToken.Range, Args, Error); |
553 | |
554 | if (!BuiltCtor.get()) { |
555 | Error->addError(Range: NameToken.Range, Error: Error->ET_ParserFailedToBuildMatcher) |
556 | << NameToken.Text; |
557 | return false; |
558 | } |
559 | |
560 | std::string BindID; |
561 | if (Tokenizer->peekNextToken().Kind == TokenInfo::TK_Period) { |
562 | Tokenizer->consumeNextToken(); |
563 | TokenInfo ChainCallToken = Tokenizer->consumeNextToken(); |
564 | if (ChainCallToken.Kind == TokenInfo::TK_CodeCompletion) { |
565 | addCompletion(CompToken: ChainCallToken, Completion: MatcherCompletion("bind(\"" , "bind" , 1)); |
566 | addCompletion(CompToken: ChainCallToken, Completion: MatcherCompletion("with(" , "with" , 1)); |
567 | return false; |
568 | } |
569 | if (ChainCallToken.Kind != TokenInfo::TK_Ident || |
570 | (ChainCallToken.Text != TokenInfo::ID_Bind && |
571 | ChainCallToken.Text != TokenInfo::ID_With)) { |
572 | Error->addError(Range: ChainCallToken.Range, |
573 | Error: Error->ET_ParserMalformedChainedExpr); |
574 | return false; |
575 | } |
576 | if (ChainCallToken.Text == TokenInfo::ID_Bind) { |
577 | if (!parseBindID(BindID)) |
578 | return false; |
579 | Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error, |
580 | NameToken.Text, NameToken.Range); |
581 | SourceRange MatcherRange = NameToken.Range; |
582 | MatcherRange.End = ChainCallToken.Range.End; |
583 | VariantMatcher Result = S->actOnMatcherExpression( |
584 | Ctor: BuiltCtor.get(), NameRange: MatcherRange, BindID, Args: {}, Error); |
585 | if (Result.isNull()) |
586 | return false; |
587 | |
588 | *Value = Result; |
589 | return true; |
590 | } else if (ChainCallToken.Text == TokenInfo::ID_With) { |
591 | Tokenizer->SkipNewlines(); |
592 | |
593 | if (Tokenizer->nextTokenKind() != TokenInfo::TK_OpenParen) { |
594 | StringRef ErrTxt = Tokenizer->nextTokenKind() == TokenInfo::TK_Eof |
595 | ? StringRef("EOF" ) |
596 | : Tokenizer->peekNextToken().Text; |
597 | Error->addError(Range: Tokenizer->peekNextToken().Range, |
598 | Error: Error->ET_ParserNoOpenParen) |
599 | << ErrTxt; |
600 | return false; |
601 | } |
602 | |
603 | TokenInfo WithOpenToken = Tokenizer->consumeNextToken(); |
604 | |
605 | return parseMatcherExpressionImpl(NameToken, OpenToken: WithOpenToken, |
606 | Ctor: BuiltCtor.get(), Value); |
607 | } |
608 | } |
609 | |
610 | Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error, |
611 | NameToken.Text, NameToken.Range); |
612 | SourceRange MatcherRange = NameToken.Range; |
613 | MatcherRange.End = EndToken.Range.End; |
614 | VariantMatcher Result = S->actOnMatcherExpression( |
615 | Ctor: BuiltCtor.get(), NameRange: MatcherRange, BindID, Args: {}, Error); |
616 | if (Result.isNull()) |
617 | return false; |
618 | |
619 | *Value = Result; |
620 | return true; |
621 | } |
622 | |
623 | /// Parse and validate a matcher expression. |
624 | /// \return \c true on success, in which case \c Value has the matcher parsed. |
625 | /// If the input is malformed, or some argument has an error, it |
626 | /// returns \c false. |
627 | bool Parser::parseMatcherExpressionImpl(const TokenInfo &NameToken, |
628 | const TokenInfo &OpenToken, |
629 | std::optional<MatcherCtor> Ctor, |
630 | VariantValue *Value) { |
631 | if (!Ctor) { |
632 | Error->addError(Range: NameToken.Range, Error: Error->ET_RegistryMatcherNotFound) |
633 | << NameToken.Text; |
634 | // Do not return here. We need to continue to give completion suggestions. |
635 | } |
636 | |
637 | if (Ctor && *Ctor && S->isBuilderMatcher(*Ctor)) |
638 | return parseMatcherBuilder(Ctor: *Ctor, NameToken, OpenToken, Value); |
639 | |
640 | std::vector<ParserValue> Args; |
641 | TokenInfo EndToken; |
642 | |
643 | Tokenizer->SkipNewlines(); |
644 | |
645 | { |
646 | ScopedContextEntry SCE(this, Ctor.value_or(u: nullptr)); |
647 | |
648 | while (Tokenizer->nextTokenKind() != TokenInfo::TK_Eof) { |
649 | if (Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen) { |
650 | // End of args. |
651 | EndToken = Tokenizer->consumeNextToken(); |
652 | break; |
653 | } |
654 | if (!Args.empty()) { |
655 | // We must find a , token to continue. |
656 | const TokenInfo CommaToken = Tokenizer->consumeNextToken(); |
657 | if (CommaToken.Kind != TokenInfo::TK_Comma) { |
658 | Error->addError(Range: CommaToken.Range, Error: Error->ET_ParserNoComma) |
659 | << CommaToken.Text; |
660 | return false; |
661 | } |
662 | } |
663 | |
664 | Diagnostics::Context Ctx(Diagnostics::Context::MatcherArg, Error, |
665 | NameToken.Text, NameToken.Range, |
666 | Args.size() + 1); |
667 | ParserValue ArgValue; |
668 | Tokenizer->SkipNewlines(); |
669 | ArgValue.Text = Tokenizer->peekNextToken().Text; |
670 | ArgValue.Range = Tokenizer->peekNextToken().Range; |
671 | if (!parseExpressionImpl(Value: &ArgValue.Value)) { |
672 | return false; |
673 | } |
674 | |
675 | Tokenizer->SkipNewlines(); |
676 | Args.push_back(x: ArgValue); |
677 | SCE.nextArg(); |
678 | } |
679 | } |
680 | |
681 | if (EndToken.Kind == TokenInfo::TK_Eof) { |
682 | Error->addError(Range: OpenToken.Range, Error: Error->ET_ParserNoCloseParen); |
683 | return false; |
684 | } |
685 | |
686 | std::string BindID; |
687 | if (Tokenizer->peekNextToken().Kind == TokenInfo::TK_Period) { |
688 | Tokenizer->consumeNextToken(); |
689 | TokenInfo ChainCallToken = Tokenizer->consumeNextToken(); |
690 | if (ChainCallToken.Kind == TokenInfo::TK_CodeCompletion) { |
691 | addCompletion(CompToken: ChainCallToken, Completion: MatcherCompletion("bind(\"" , "bind" , 1)); |
692 | return false; |
693 | } |
694 | |
695 | if (ChainCallToken.Kind != TokenInfo::TK_Ident) { |
696 | Error->addError(Range: ChainCallToken.Range, |
697 | Error: Error->ET_ParserMalformedChainedExpr); |
698 | return false; |
699 | } |
700 | if (ChainCallToken.Text == TokenInfo::ID_With) { |
701 | |
702 | Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error, |
703 | NameToken.Text, NameToken.Range); |
704 | |
705 | Error->addError(Range: ChainCallToken.Range, |
706 | Error: Error->ET_RegistryMatcherNoWithSupport); |
707 | return false; |
708 | } |
709 | if (ChainCallToken.Text != TokenInfo::ID_Bind) { |
710 | Error->addError(Range: ChainCallToken.Range, |
711 | Error: Error->ET_ParserMalformedChainedExpr); |
712 | return false; |
713 | } |
714 | if (!parseBindID(BindID)) |
715 | return false; |
716 | } |
717 | |
718 | if (!Ctor) |
719 | return false; |
720 | |
721 | // Merge the start and end infos. |
722 | Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error, |
723 | NameToken.Text, NameToken.Range); |
724 | SourceRange MatcherRange = NameToken.Range; |
725 | MatcherRange.End = EndToken.Range.End; |
726 | VariantMatcher Result = S->actOnMatcherExpression( |
727 | Ctor: *Ctor, NameRange: MatcherRange, BindID, Args, Error); |
728 | if (Result.isNull()) return false; |
729 | |
730 | *Value = Result; |
731 | return true; |
732 | } |
733 | |
734 | // If the prefix of this completion matches the completion token, add it to |
735 | // Completions minus the prefix. |
736 | void Parser::addCompletion(const TokenInfo &CompToken, |
737 | const MatcherCompletion& Completion) { |
738 | if (StringRef(Completion.TypedText).starts_with(Prefix: CompToken.Text) && |
739 | Completion.Specificity > 0) { |
740 | Completions.emplace_back(args: Completion.TypedText.substr(pos: CompToken.Text.size()), |
741 | args: Completion.MatcherDecl, args: Completion.Specificity); |
742 | } |
743 | } |
744 | |
745 | std::vector<MatcherCompletion> Parser::getNamedValueCompletions( |
746 | ArrayRef<ArgKind> AcceptedTypes) { |
747 | if (!NamedValues) return std::vector<MatcherCompletion>(); |
748 | std::vector<MatcherCompletion> Result; |
749 | for (const auto &Entry : *NamedValues) { |
750 | unsigned Specificity; |
751 | if (Entry.getValue().isConvertibleTo(Kinds: AcceptedTypes, Specificity: &Specificity)) { |
752 | std::string Decl = |
753 | (Entry.getValue().getTypeAsString() + " " + Entry.getKey()).str(); |
754 | Result.emplace_back(args: Entry.getKey(), args&: Decl, args&: Specificity); |
755 | } |
756 | } |
757 | return Result; |
758 | } |
759 | |
760 | void Parser::addExpressionCompletions() { |
761 | const TokenInfo CompToken = Tokenizer->consumeNextTokenIgnoreNewlines(); |
762 | assert(CompToken.Kind == TokenInfo::TK_CodeCompletion); |
763 | |
764 | // We cannot complete code if there is an invalid element on the context |
765 | // stack. |
766 | for (ContextStackTy::iterator I = ContextStack.begin(), |
767 | E = ContextStack.end(); |
768 | I != E; ++I) { |
769 | if (!I->first) |
770 | return; |
771 | } |
772 | |
773 | auto AcceptedTypes = S->getAcceptedCompletionTypes(Context: ContextStack); |
774 | for (const auto &Completion : S->getMatcherCompletions(AcceptedTypes)) { |
775 | addCompletion(CompToken, Completion); |
776 | } |
777 | |
778 | for (const auto &Completion : getNamedValueCompletions(AcceptedTypes)) { |
779 | addCompletion(CompToken, Completion); |
780 | } |
781 | } |
782 | |
783 | /// Parse an <Expression> |
784 | bool Parser::parseExpressionImpl(VariantValue *Value) { |
785 | switch (Tokenizer->nextTokenKind()) { |
786 | case TokenInfo::TK_Literal: |
787 | *Value = Tokenizer->consumeNextToken().Value; |
788 | return true; |
789 | |
790 | case TokenInfo::TK_Ident: |
791 | return parseIdentifierPrefixImpl(Value); |
792 | |
793 | case TokenInfo::TK_CodeCompletion: |
794 | addExpressionCompletions(); |
795 | return false; |
796 | |
797 | case TokenInfo::TK_Eof: |
798 | Error->addError(Range: Tokenizer->consumeNextToken().Range, |
799 | Error: Error->ET_ParserNoCode); |
800 | return false; |
801 | |
802 | case TokenInfo::TK_Error: |
803 | // This error was already reported by the tokenizer. |
804 | return false; |
805 | case TokenInfo::TK_NewLine: |
806 | case TokenInfo::TK_OpenParen: |
807 | case TokenInfo::TK_CloseParen: |
808 | case TokenInfo::TK_Comma: |
809 | case TokenInfo::TK_Period: |
810 | case TokenInfo::TK_InvalidChar: |
811 | const TokenInfo Token = Tokenizer->consumeNextToken(); |
812 | Error->addError(Range: Token.Range, Error: Error->ET_ParserInvalidToken) |
813 | << (Token.Kind == TokenInfo::TK_NewLine ? "NewLine" : Token.Text); |
814 | return false; |
815 | } |
816 | |
817 | llvm_unreachable("Unknown token kind." ); |
818 | } |
819 | |
820 | static llvm::ManagedStatic<Parser::RegistrySema> DefaultRegistrySema; |
821 | |
822 | Parser::Parser(CodeTokenizer *Tokenizer, Sema *S, |
823 | const NamedValueMap *NamedValues, Diagnostics *Error) |
824 | : Tokenizer(Tokenizer), S(S ? S : &*DefaultRegistrySema), |
825 | NamedValues(NamedValues), Error(Error) {} |
826 | |
827 | Parser::RegistrySema::~RegistrySema() = default; |
828 | |
829 | std::optional<MatcherCtor> |
830 | Parser::RegistrySema::lookupMatcherCtor(StringRef MatcherName) { |
831 | return Registry::lookupMatcherCtor(MatcherName); |
832 | } |
833 | |
834 | VariantMatcher Parser::RegistrySema::actOnMatcherExpression( |
835 | MatcherCtor Ctor, SourceRange NameRange, StringRef BindID, |
836 | ArrayRef<ParserValue> Args, Diagnostics *Error) { |
837 | if (BindID.empty()) { |
838 | return Registry::constructMatcher(Ctor, NameRange, Args, Error); |
839 | } else { |
840 | return Registry::constructBoundMatcher(Ctor, NameRange, BindID, Args, |
841 | Error); |
842 | } |
843 | } |
844 | |
845 | std::vector<ArgKind> Parser::RegistrySema::getAcceptedCompletionTypes( |
846 | ArrayRef<std::pair<MatcherCtor, unsigned>> Context) { |
847 | return Registry::getAcceptedCompletionTypes(Context); |
848 | } |
849 | |
850 | std::vector<MatcherCompletion> Parser::RegistrySema::getMatcherCompletions( |
851 | ArrayRef<ArgKind> AcceptedTypes) { |
852 | return Registry::getMatcherCompletions(AcceptedTypes); |
853 | } |
854 | |
855 | bool Parser::RegistrySema::isBuilderMatcher(MatcherCtor Ctor) const { |
856 | return Registry::isBuilderMatcher(Ctor); |
857 | } |
858 | |
859 | ASTNodeKind Parser::RegistrySema::nodeMatcherType(MatcherCtor Ctor) const { |
860 | return Registry::nodeMatcherType(Ctor); |
861 | } |
862 | |
863 | internal::MatcherDescriptorPtr |
864 | Parser::RegistrySema::buildMatcherCtor(MatcherCtor Ctor, SourceRange NameRange, |
865 | ArrayRef<ParserValue> Args, |
866 | Diagnostics *Error) const { |
867 | return Registry::buildMatcherCtor(Ctor, NameRange, Args, Error); |
868 | } |
869 | |
870 | bool Parser::parseExpression(StringRef &Code, Sema *S, |
871 | const NamedValueMap *NamedValues, |
872 | VariantValue *Value, Diagnostics *Error) { |
873 | CodeTokenizer Tokenizer(Code, Error); |
874 | if (!Parser(&Tokenizer, S, NamedValues, Error).parseExpressionImpl(Value)) |
875 | return false; |
876 | auto NT = Tokenizer.peekNextToken(); |
877 | if (NT.Kind != TokenInfo::TK_Eof && NT.Kind != TokenInfo::TK_NewLine) { |
878 | Error->addError(Range: Tokenizer.peekNextToken().Range, |
879 | Error: Error->ET_ParserTrailingCode); |
880 | return false; |
881 | } |
882 | return true; |
883 | } |
884 | |
885 | std::vector<MatcherCompletion> |
886 | Parser::completeExpression(StringRef &Code, unsigned CompletionOffset, Sema *S, |
887 | const NamedValueMap *NamedValues) { |
888 | Diagnostics Error; |
889 | CodeTokenizer Tokenizer(Code, &Error, CompletionOffset); |
890 | Parser P(&Tokenizer, S, NamedValues, &Error); |
891 | VariantValue Dummy; |
892 | P.parseExpressionImpl(Value: &Dummy); |
893 | |
894 | // Sort by specificity, then by name. |
895 | llvm::sort(C&: P.Completions, |
896 | Comp: [](const MatcherCompletion &A, const MatcherCompletion &B) { |
897 | if (A.Specificity != B.Specificity) |
898 | return A.Specificity > B.Specificity; |
899 | return A.TypedText < B.TypedText; |
900 | }); |
901 | |
902 | return P.Completions; |
903 | } |
904 | |
905 | std::optional<DynTypedMatcher> |
906 | Parser::parseMatcherExpression(StringRef &Code, Sema *S, |
907 | const NamedValueMap *NamedValues, |
908 | Diagnostics *Error) { |
909 | VariantValue Value; |
910 | if (!parseExpression(Code, S, NamedValues, Value: &Value, Error)) |
911 | return std::nullopt; |
912 | if (!Value.isMatcher()) { |
913 | Error->addError(Range: SourceRange(), Error: Error->ET_ParserNotAMatcher); |
914 | return std::nullopt; |
915 | } |
916 | std::optional<DynTypedMatcher> Result = Value.getMatcher().getSingleMatcher(); |
917 | if (!Result) { |
918 | Error->addError(Range: SourceRange(), Error: Error->ET_ParserOverloadedType) |
919 | << Value.getTypeAsString(); |
920 | } |
921 | return Result; |
922 | } |
923 | |
924 | } // namespace dynamic |
925 | } // namespace ast_matchers |
926 | } // namespace clang |
927 | |