1//===- Parser.cpp - Matcher expression parser -----------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// Recursive parser implementation for the matcher expression grammar.
11///
12//===----------------------------------------------------------------------===//
13
14#include "clang/ASTMatchers/Dynamic/Parser.h"
15#include "clang/ASTMatchers/ASTMatchersInternal.h"
16#include "clang/ASTMatchers/Dynamic/Diagnostics.h"
17#include "clang/ASTMatchers/Dynamic/Registry.h"
18#include "clang/Basic/CharInfo.h"
19#include "llvm/ADT/StringRef.h"
20#include "llvm/Support/ErrorHandling.h"
21#include "llvm/Support/ManagedStatic.h"
22#include <algorithm>
23#include <cassert>
24#include <cerrno>
25#include <cstddef>
26#include <cstdlib>
27#include <optional>
28#include <string>
29#include <utility>
30#include <vector>
31
32namespace clang {
33namespace ast_matchers {
34namespace dynamic {
35
36/// Simple structure to hold information for one token from the parser.
37struct Parser::TokenInfo {
38 /// Different possible tokens.
39 enum TokenKind {
40 TK_Eof,
41 TK_NewLine,
42 TK_OpenParen,
43 TK_CloseParen,
44 TK_Comma,
45 TK_Period,
46 TK_Literal,
47 TK_Ident,
48 TK_InvalidChar,
49 TK_Error,
50 TK_CodeCompletion
51 };
52
53 /// Some known identifiers.
54 static const char* const ID_Bind;
55 static const char *const ID_With;
56
57 TokenInfo() = default;
58
59 StringRef Text;
60 TokenKind Kind = TK_Eof;
61 SourceRange Range;
62 VariantValue Value;
63};
64
65const char* const Parser::TokenInfo::ID_Bind = "bind";
66const char *const Parser::TokenInfo::ID_With = "with";
67
68/// Simple tokenizer for the parser.
69class Parser::CodeTokenizer {
70public:
71 explicit CodeTokenizer(StringRef &MatcherCode, Diagnostics *Error)
72 : Code(MatcherCode), StartOfLine(MatcherCode), Error(Error) {
73 NextToken = getNextToken();
74 }
75
76 CodeTokenizer(StringRef &MatcherCode, Diagnostics *Error,
77 unsigned CodeCompletionOffset)
78 : Code(MatcherCode), StartOfLine(MatcherCode), Error(Error),
79 CodeCompletionLocation(MatcherCode.data() + CodeCompletionOffset) {
80 NextToken = getNextToken();
81 }
82
83 /// Returns but doesn't consume the next token.
84 const TokenInfo &peekNextToken() const { return NextToken; }
85
86 /// Consumes and returns the next token.
87 TokenInfo consumeNextToken() {
88 TokenInfo ThisToken = NextToken;
89 NextToken = getNextToken();
90 return ThisToken;
91 }
92
93 TokenInfo SkipNewlines() {
94 while (NextToken.Kind == TokenInfo::TK_NewLine)
95 NextToken = getNextToken();
96 return NextToken;
97 }
98
99 TokenInfo consumeNextTokenIgnoreNewlines() {
100 SkipNewlines();
101 if (NextToken.Kind == TokenInfo::TK_Eof)
102 return NextToken;
103 return consumeNextToken();
104 }
105
106 TokenInfo::TokenKind nextTokenKind() const { return NextToken.Kind; }
107
108private:
109 TokenInfo getNextToken() {
110 consumeWhitespace();
111 TokenInfo Result;
112 Result.Range.Start = currentLocation();
113
114 if (CodeCompletionLocation && CodeCompletionLocation <= Code.data()) {
115 Result.Kind = TokenInfo::TK_CodeCompletion;
116 Result.Text = StringRef(CodeCompletionLocation, 0);
117 CodeCompletionLocation = nullptr;
118 return Result;
119 }
120
121 if (Code.empty()) {
122 Result.Kind = TokenInfo::TK_Eof;
123 Result.Text = "";
124 return Result;
125 }
126
127 switch (Code[0]) {
128 case '#':
129 Code = Code.drop_until(F: [](char c) { return c == '\n'; });
130 return getNextToken();
131 case ',':
132 Result.Kind = TokenInfo::TK_Comma;
133 Result.Text = Code.substr(Start: 0, N: 1);
134 Code = Code.drop_front();
135 break;
136 case '.':
137 Result.Kind = TokenInfo::TK_Period;
138 Result.Text = Code.substr(Start: 0, N: 1);
139 Code = Code.drop_front();
140 break;
141 case '\n':
142 ++Line;
143 StartOfLine = Code.drop_front();
144 Result.Kind = TokenInfo::TK_NewLine;
145 Result.Text = Code.substr(Start: 0, N: 1);
146 Code = Code.drop_front();
147 break;
148 case '(':
149 Result.Kind = TokenInfo::TK_OpenParen;
150 Result.Text = Code.substr(Start: 0, N: 1);
151 Code = Code.drop_front();
152 break;
153 case ')':
154 Result.Kind = TokenInfo::TK_CloseParen;
155 Result.Text = Code.substr(Start: 0, N: 1);
156 Code = Code.drop_front();
157 break;
158
159 case '"':
160 case '\'':
161 // Parse a string literal.
162 consumeStringLiteral(Result: &Result);
163 break;
164
165 case '0': case '1': case '2': case '3': case '4':
166 case '5': case '6': case '7': case '8': case '9':
167 // Parse an unsigned and float literal.
168 consumeNumberLiteral(Result: &Result);
169 break;
170
171 default:
172 if (isAlphanumeric(c: Code[0])) {
173 // Parse an identifier
174 size_t TokenLength = 1;
175 while (true) {
176 // A code completion location in/immediately after an identifier will
177 // cause the portion of the identifier before the code completion
178 // location to become a code completion token.
179 if (CodeCompletionLocation == Code.data() + TokenLength) {
180 CodeCompletionLocation = nullptr;
181 Result.Kind = TokenInfo::TK_CodeCompletion;
182 Result.Text = Code.substr(Start: 0, N: TokenLength);
183 Code = Code.drop_front(N: TokenLength);
184 return Result;
185 }
186 if (TokenLength == Code.size() || !isAlphanumeric(c: Code[TokenLength]))
187 break;
188 ++TokenLength;
189 }
190 if (TokenLength == 4 && Code.starts_with(Prefix: "true")) {
191 Result.Kind = TokenInfo::TK_Literal;
192 Result.Value = true;
193 } else if (TokenLength == 5 && Code.starts_with(Prefix: "false")) {
194 Result.Kind = TokenInfo::TK_Literal;
195 Result.Value = false;
196 } else {
197 Result.Kind = TokenInfo::TK_Ident;
198 Result.Text = Code.substr(Start: 0, N: TokenLength);
199 }
200 Code = Code.drop_front(N: TokenLength);
201 } else {
202 Result.Kind = TokenInfo::TK_InvalidChar;
203 Result.Text = Code.substr(Start: 0, N: 1);
204 Code = Code.drop_front(N: 1);
205 }
206 break;
207 }
208
209 Result.Range.End = currentLocation();
210 return Result;
211 }
212
213 /// Consume an unsigned and float literal.
214 void consumeNumberLiteral(TokenInfo *Result) {
215 bool isFloatingLiteral = false;
216 unsigned Length = 1;
217 if (Code.size() > 1) {
218 // Consume the 'x' or 'b' radix modifier, if present.
219 switch (toLowercase(c: Code[1])) {
220 case 'x': case 'b': Length = 2;
221 }
222 }
223 while (Length < Code.size() && isHexDigit(c: Code[Length]))
224 ++Length;
225
226 // Try to recognize a floating point literal.
227 while (Length < Code.size()) {
228 char c = Code[Length];
229 if (c == '-' || c == '+' || c == '.' || isHexDigit(c)) {
230 isFloatingLiteral = true;
231 Length++;
232 } else {
233 break;
234 }
235 }
236
237 Result->Text = Code.substr(Start: 0, N: Length);
238 Code = Code.drop_front(N: Length);
239
240 if (isFloatingLiteral) {
241 char *end;
242 errno = 0;
243 std::string Text = Result->Text.str();
244 double doubleValue = strtod(nptr: Text.c_str(), endptr: &end);
245 if (*end == 0 && errno == 0) {
246 Result->Kind = TokenInfo::TK_Literal;
247 Result->Value = doubleValue;
248 return;
249 }
250 } else {
251 unsigned Value;
252 if (!Result->Text.getAsInteger(Radix: 0, Result&: Value)) {
253 Result->Kind = TokenInfo::TK_Literal;
254 Result->Value = Value;
255 return;
256 }
257 }
258
259 SourceRange Range;
260 Range.Start = Result->Range.Start;
261 Range.End = currentLocation();
262 Error->addError(Range, Error: Error->ET_ParserNumberError) << Result->Text;
263 Result->Kind = TokenInfo::TK_Error;
264 }
265
266 /// Consume a string literal.
267 ///
268 /// \c Code must be positioned at the start of the literal (the opening
269 /// quote). Consumed until it finds the same closing quote character.
270 void consumeStringLiteral(TokenInfo *Result) {
271 bool InEscape = false;
272 const char Marker = Code[0];
273 for (size_t Length = 1, Size = Code.size(); Length != Size; ++Length) {
274 if (InEscape) {
275 InEscape = false;
276 continue;
277 }
278 if (Code[Length] == '\\') {
279 InEscape = true;
280 continue;
281 }
282 if (Code[Length] == Marker) {
283 Result->Kind = TokenInfo::TK_Literal;
284 Result->Text = Code.substr(Start: 0, N: Length + 1);
285 Result->Value = Code.substr(Start: 1, N: Length - 1);
286 Code = Code.drop_front(N: Length + 1);
287 return;
288 }
289 }
290
291 StringRef ErrorText = Code;
292 Code = Code.drop_front(N: Code.size());
293 SourceRange Range;
294 Range.Start = Result->Range.Start;
295 Range.End = currentLocation();
296 Error->addError(Range, Error: Error->ET_ParserStringError) << ErrorText;
297 Result->Kind = TokenInfo::TK_Error;
298 }
299
300 /// Consume all leading whitespace from \c Code.
301 void consumeWhitespace() {
302 // Don't trim newlines.
303 Code = Code.ltrim(Chars: " \t\v\f\r");
304 }
305
306 SourceLocation currentLocation() {
307 SourceLocation Location;
308 Location.Line = Line;
309 Location.Column = Code.data() - StartOfLine.data() + 1;
310 return Location;
311 }
312
313 StringRef &Code;
314 StringRef StartOfLine;
315 unsigned Line = 1;
316 Diagnostics *Error;
317 TokenInfo NextToken;
318 const char *CodeCompletionLocation = nullptr;
319};
320
321Parser::Sema::~Sema() = default;
322
323std::vector<ArgKind> Parser::Sema::getAcceptedCompletionTypes(
324 llvm::ArrayRef<std::pair<MatcherCtor, unsigned>> Context) {
325 return {};
326}
327
328std::vector<MatcherCompletion>
329Parser::Sema::getMatcherCompletions(llvm::ArrayRef<ArgKind> AcceptedTypes) {
330 return {};
331}
332
333struct Parser::ScopedContextEntry {
334 Parser *P;
335
336 ScopedContextEntry(Parser *P, MatcherCtor C) : P(P) {
337 P->ContextStack.push_back(x: std::make_pair(x&: C, y: 0u));
338 }
339
340 ~ScopedContextEntry() {
341 P->ContextStack.pop_back();
342 }
343
344 void nextArg() {
345 ++P->ContextStack.back().second;
346 }
347};
348
349/// Parse expressions that start with an identifier.
350///
351/// This function can parse named values and matchers.
352/// In case of failure it will try to determine the user's intent to give
353/// an appropriate error message.
354bool Parser::parseIdentifierPrefixImpl(VariantValue *Value) {
355 const TokenInfo NameToken = Tokenizer->consumeNextToken();
356
357 if (Tokenizer->nextTokenKind() != TokenInfo::TK_OpenParen) {
358 // Parse as a named value.
359 if (const VariantValue NamedValue =
360 NamedValues ? NamedValues->lookup(Key: NameToken.Text)
361 : VariantValue()) {
362
363 if (Tokenizer->nextTokenKind() != TokenInfo::TK_Period) {
364 *Value = NamedValue;
365 return true;
366 }
367
368 std::string BindID;
369 Tokenizer->consumeNextToken();
370 TokenInfo ChainCallToken = Tokenizer->consumeNextToken();
371 if (ChainCallToken.Kind == TokenInfo::TK_CodeCompletion) {
372 addCompletion(CompToken: ChainCallToken, Completion: MatcherCompletion("bind(\"", "bind", 1));
373 return false;
374 }
375
376 if (ChainCallToken.Kind != TokenInfo::TK_Ident ||
377 (ChainCallToken.Text != TokenInfo::ID_Bind &&
378 ChainCallToken.Text != TokenInfo::ID_With)) {
379 Error->addError(Range: ChainCallToken.Range,
380 Error: Error->ET_ParserMalformedChainedExpr);
381 return false;
382 }
383 if (ChainCallToken.Text == TokenInfo::ID_With) {
384
385 Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error,
386 NameToken.Text, NameToken.Range);
387
388 Error->addError(Range: ChainCallToken.Range,
389 Error: Error->ET_RegistryMatcherNoWithSupport);
390 return false;
391 }
392 if (!parseBindID(BindID))
393 return false;
394
395 assert(NamedValue.isMatcher());
396 std::optional<DynTypedMatcher> Result =
397 NamedValue.getMatcher().getSingleMatcher();
398 if (Result) {
399 std::optional<DynTypedMatcher> Bound = Result->tryBind(ID: BindID);
400 if (Bound) {
401 *Value = VariantMatcher::SingleMatcher(Matcher: *Bound);
402 return true;
403 }
404 }
405 return false;
406 }
407
408 if (Tokenizer->nextTokenKind() == TokenInfo::TK_NewLine) {
409 Error->addError(Range: Tokenizer->peekNextToken().Range,
410 Error: Error->ET_ParserNoOpenParen)
411 << "NewLine";
412 return false;
413 }
414
415 // If the syntax is correct and the name is not a matcher either, report
416 // unknown named value.
417 if ((Tokenizer->nextTokenKind() == TokenInfo::TK_Comma ||
418 Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen ||
419 Tokenizer->nextTokenKind() == TokenInfo::TK_NewLine ||
420 Tokenizer->nextTokenKind() == TokenInfo::TK_Eof) &&
421 !S->lookupMatcherCtor(MatcherName: NameToken.Text)) {
422 Error->addError(Range: NameToken.Range, Error: Error->ET_RegistryValueNotFound)
423 << NameToken.Text;
424 return false;
425 }
426 // Otherwise, fallback to the matcher parser.
427 }
428
429 Tokenizer->SkipNewlines();
430
431 assert(NameToken.Kind == TokenInfo::TK_Ident);
432 TokenInfo OpenToken = Tokenizer->consumeNextToken();
433 if (OpenToken.Kind != TokenInfo::TK_OpenParen) {
434 Error->addError(Range: OpenToken.Range, Error: Error->ET_ParserNoOpenParen)
435 << OpenToken.Text;
436 return false;
437 }
438
439 std::optional<MatcherCtor> Ctor = S->lookupMatcherCtor(MatcherName: NameToken.Text);
440
441 // Parse as a matcher expression.
442 return parseMatcherExpressionImpl(NameToken, OpenToken, Ctor, Value);
443}
444
445bool Parser::parseBindID(std::string &BindID) {
446 // Parse the parenthesized argument to .bind("foo")
447 const TokenInfo OpenToken = Tokenizer->consumeNextToken();
448 const TokenInfo IDToken = Tokenizer->consumeNextTokenIgnoreNewlines();
449 const TokenInfo CloseToken = Tokenizer->consumeNextTokenIgnoreNewlines();
450
451 // TODO: We could use different error codes for each/some to be more
452 // explicit about the syntax error.
453 if (OpenToken.Kind != TokenInfo::TK_OpenParen) {
454 Error->addError(Range: OpenToken.Range, Error: Error->ET_ParserMalformedBindExpr);
455 return false;
456 }
457 if (IDToken.Kind != TokenInfo::TK_Literal || !IDToken.Value.isString()) {
458 Error->addError(Range: IDToken.Range, Error: Error->ET_ParserMalformedBindExpr);
459 return false;
460 }
461 if (CloseToken.Kind != TokenInfo::TK_CloseParen) {
462 Error->addError(Range: CloseToken.Range, Error: Error->ET_ParserMalformedBindExpr);
463 return false;
464 }
465 BindID = IDToken.Value.getString();
466 return true;
467}
468
469bool Parser::parseMatcherBuilder(MatcherCtor Ctor, const TokenInfo &NameToken,
470 const TokenInfo &OpenToken,
471 VariantValue *Value) {
472 std::vector<ParserValue> Args;
473 TokenInfo EndToken;
474
475 Tokenizer->SkipNewlines();
476
477 {
478 ScopedContextEntry SCE(this, Ctor);
479
480 while (Tokenizer->nextTokenKind() != TokenInfo::TK_Eof) {
481 if (Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen) {
482 // End of args.
483 EndToken = Tokenizer->consumeNextToken();
484 break;
485 }
486 if (!Args.empty()) {
487 // We must find a , token to continue.
488 TokenInfo CommaToken = Tokenizer->consumeNextToken();
489 if (CommaToken.Kind != TokenInfo::TK_Comma) {
490 Error->addError(Range: CommaToken.Range, Error: Error->ET_ParserNoComma)
491 << CommaToken.Text;
492 return false;
493 }
494 }
495
496 Diagnostics::Context Ctx(Diagnostics::Context::MatcherArg, Error,
497 NameToken.Text, NameToken.Range,
498 Args.size() + 1);
499 ParserValue ArgValue;
500 Tokenizer->SkipNewlines();
501
502 if (Tokenizer->peekNextToken().Kind == TokenInfo::TK_CodeCompletion) {
503 addExpressionCompletions();
504 return false;
505 }
506
507 TokenInfo NodeMatcherToken = Tokenizer->consumeNextToken();
508
509 if (NodeMatcherToken.Kind != TokenInfo::TK_Ident) {
510 Error->addError(Range: NameToken.Range, Error: Error->ET_ParserFailedToBuildMatcher)
511 << NameToken.Text;
512 return false;
513 }
514
515 ArgValue.Text = NodeMatcherToken.Text;
516 ArgValue.Range = NodeMatcherToken.Range;
517
518 std::optional<MatcherCtor> MappedMatcher =
519 S->lookupMatcherCtor(MatcherName: ArgValue.Text);
520
521 if (!MappedMatcher) {
522 Error->addError(Range: NodeMatcherToken.Range,
523 Error: Error->ET_RegistryMatcherNotFound)
524 << NodeMatcherToken.Text;
525 return false;
526 }
527
528 ASTNodeKind NK = S->nodeMatcherType(*MappedMatcher);
529
530 if (NK.isNone()) {
531 Error->addError(Range: NodeMatcherToken.Range,
532 Error: Error->ET_RegistryNonNodeMatcher)
533 << NodeMatcherToken.Text;
534 return false;
535 }
536
537 ArgValue.Value = NK;
538
539 Tokenizer->SkipNewlines();
540 Args.push_back(x: ArgValue);
541
542 SCE.nextArg();
543 }
544 }
545
546 if (EndToken.Kind == TokenInfo::TK_Eof) {
547 Error->addError(Range: OpenToken.Range, Error: Error->ET_ParserNoCloseParen);
548 return false;
549 }
550
551 internal::MatcherDescriptorPtr BuiltCtor =
552 S->buildMatcherCtor(Ctor, NameRange: NameToken.Range, Args, Error);
553
554 if (!BuiltCtor.get()) {
555 Error->addError(Range: NameToken.Range, Error: Error->ET_ParserFailedToBuildMatcher)
556 << NameToken.Text;
557 return false;
558 }
559
560 std::string BindID;
561 if (Tokenizer->peekNextToken().Kind == TokenInfo::TK_Period) {
562 Tokenizer->consumeNextToken();
563 TokenInfo ChainCallToken = Tokenizer->consumeNextToken();
564 if (ChainCallToken.Kind == TokenInfo::TK_CodeCompletion) {
565 addCompletion(CompToken: ChainCallToken, Completion: MatcherCompletion("bind(\"", "bind", 1));
566 addCompletion(CompToken: ChainCallToken, Completion: MatcherCompletion("with(", "with", 1));
567 return false;
568 }
569 if (ChainCallToken.Kind != TokenInfo::TK_Ident ||
570 (ChainCallToken.Text != TokenInfo::ID_Bind &&
571 ChainCallToken.Text != TokenInfo::ID_With)) {
572 Error->addError(Range: ChainCallToken.Range,
573 Error: Error->ET_ParserMalformedChainedExpr);
574 return false;
575 }
576 if (ChainCallToken.Text == TokenInfo::ID_Bind) {
577 if (!parseBindID(BindID))
578 return false;
579 Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error,
580 NameToken.Text, NameToken.Range);
581 SourceRange MatcherRange = NameToken.Range;
582 MatcherRange.End = ChainCallToken.Range.End;
583 VariantMatcher Result = S->actOnMatcherExpression(
584 Ctor: BuiltCtor.get(), NameRange: MatcherRange, BindID, Args: {}, Error);
585 if (Result.isNull())
586 return false;
587
588 *Value = Result;
589 return true;
590 } else if (ChainCallToken.Text == TokenInfo::ID_With) {
591 Tokenizer->SkipNewlines();
592
593 if (Tokenizer->nextTokenKind() != TokenInfo::TK_OpenParen) {
594 StringRef ErrTxt = Tokenizer->nextTokenKind() == TokenInfo::TK_Eof
595 ? StringRef("EOF")
596 : Tokenizer->peekNextToken().Text;
597 Error->addError(Range: Tokenizer->peekNextToken().Range,
598 Error: Error->ET_ParserNoOpenParen)
599 << ErrTxt;
600 return false;
601 }
602
603 TokenInfo WithOpenToken = Tokenizer->consumeNextToken();
604
605 return parseMatcherExpressionImpl(NameToken, OpenToken: WithOpenToken,
606 Ctor: BuiltCtor.get(), Value);
607 }
608 }
609
610 Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error,
611 NameToken.Text, NameToken.Range);
612 SourceRange MatcherRange = NameToken.Range;
613 MatcherRange.End = EndToken.Range.End;
614 VariantMatcher Result = S->actOnMatcherExpression(
615 Ctor: BuiltCtor.get(), NameRange: MatcherRange, BindID, Args: {}, Error);
616 if (Result.isNull())
617 return false;
618
619 *Value = Result;
620 return true;
621}
622
623/// Parse and validate a matcher expression.
624/// \return \c true on success, in which case \c Value has the matcher parsed.
625/// If the input is malformed, or some argument has an error, it
626/// returns \c false.
627bool Parser::parseMatcherExpressionImpl(const TokenInfo &NameToken,
628 const TokenInfo &OpenToken,
629 std::optional<MatcherCtor> Ctor,
630 VariantValue *Value) {
631 if (!Ctor) {
632 Error->addError(Range: NameToken.Range, Error: Error->ET_RegistryMatcherNotFound)
633 << NameToken.Text;
634 // Do not return here. We need to continue to give completion suggestions.
635 }
636
637 if (Ctor && *Ctor && S->isBuilderMatcher(*Ctor))
638 return parseMatcherBuilder(Ctor: *Ctor, NameToken, OpenToken, Value);
639
640 std::vector<ParserValue> Args;
641 TokenInfo EndToken;
642
643 Tokenizer->SkipNewlines();
644
645 {
646 ScopedContextEntry SCE(this, Ctor.value_or(u: nullptr));
647
648 while (Tokenizer->nextTokenKind() != TokenInfo::TK_Eof) {
649 if (Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen) {
650 // End of args.
651 EndToken = Tokenizer->consumeNextToken();
652 break;
653 }
654 if (!Args.empty()) {
655 // We must find a , token to continue.
656 const TokenInfo CommaToken = Tokenizer->consumeNextToken();
657 if (CommaToken.Kind != TokenInfo::TK_Comma) {
658 Error->addError(Range: CommaToken.Range, Error: Error->ET_ParserNoComma)
659 << CommaToken.Text;
660 return false;
661 }
662 }
663
664 Diagnostics::Context Ctx(Diagnostics::Context::MatcherArg, Error,
665 NameToken.Text, NameToken.Range,
666 Args.size() + 1);
667 ParserValue ArgValue;
668 Tokenizer->SkipNewlines();
669 ArgValue.Text = Tokenizer->peekNextToken().Text;
670 ArgValue.Range = Tokenizer->peekNextToken().Range;
671 if (!parseExpressionImpl(Value: &ArgValue.Value)) {
672 return false;
673 }
674
675 Tokenizer->SkipNewlines();
676 Args.push_back(x: ArgValue);
677 SCE.nextArg();
678 }
679 }
680
681 if (EndToken.Kind == TokenInfo::TK_Eof) {
682 Error->addError(Range: OpenToken.Range, Error: Error->ET_ParserNoCloseParen);
683 return false;
684 }
685
686 std::string BindID;
687 if (Tokenizer->peekNextToken().Kind == TokenInfo::TK_Period) {
688 Tokenizer->consumeNextToken();
689 TokenInfo ChainCallToken = Tokenizer->consumeNextToken();
690 if (ChainCallToken.Kind == TokenInfo::TK_CodeCompletion) {
691 addCompletion(CompToken: ChainCallToken, Completion: MatcherCompletion("bind(\"", "bind", 1));
692 return false;
693 }
694
695 if (ChainCallToken.Kind != TokenInfo::TK_Ident) {
696 Error->addError(Range: ChainCallToken.Range,
697 Error: Error->ET_ParserMalformedChainedExpr);
698 return false;
699 }
700 if (ChainCallToken.Text == TokenInfo::ID_With) {
701
702 Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error,
703 NameToken.Text, NameToken.Range);
704
705 Error->addError(Range: ChainCallToken.Range,
706 Error: Error->ET_RegistryMatcherNoWithSupport);
707 return false;
708 }
709 if (ChainCallToken.Text != TokenInfo::ID_Bind) {
710 Error->addError(Range: ChainCallToken.Range,
711 Error: Error->ET_ParserMalformedChainedExpr);
712 return false;
713 }
714 if (!parseBindID(BindID))
715 return false;
716 }
717
718 if (!Ctor)
719 return false;
720
721 // Merge the start and end infos.
722 Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error,
723 NameToken.Text, NameToken.Range);
724 SourceRange MatcherRange = NameToken.Range;
725 MatcherRange.End = EndToken.Range.End;
726 VariantMatcher Result = S->actOnMatcherExpression(
727 Ctor: *Ctor, NameRange: MatcherRange, BindID, Args, Error);
728 if (Result.isNull()) return false;
729
730 *Value = Result;
731 return true;
732}
733
734// If the prefix of this completion matches the completion token, add it to
735// Completions minus the prefix.
736void Parser::addCompletion(const TokenInfo &CompToken,
737 const MatcherCompletion& Completion) {
738 if (StringRef(Completion.TypedText).starts_with(Prefix: CompToken.Text) &&
739 Completion.Specificity > 0) {
740 Completions.emplace_back(args: Completion.TypedText.substr(pos: CompToken.Text.size()),
741 args: Completion.MatcherDecl, args: Completion.Specificity);
742 }
743}
744
745std::vector<MatcherCompletion> Parser::getNamedValueCompletions(
746 ArrayRef<ArgKind> AcceptedTypes) {
747 if (!NamedValues) return std::vector<MatcherCompletion>();
748 std::vector<MatcherCompletion> Result;
749 for (const auto &Entry : *NamedValues) {
750 unsigned Specificity;
751 if (Entry.getValue().isConvertibleTo(Kinds: AcceptedTypes, Specificity: &Specificity)) {
752 std::string Decl =
753 (Entry.getValue().getTypeAsString() + " " + Entry.getKey()).str();
754 Result.emplace_back(args: Entry.getKey(), args&: Decl, args&: Specificity);
755 }
756 }
757 return Result;
758}
759
760void Parser::addExpressionCompletions() {
761 const TokenInfo CompToken = Tokenizer->consumeNextTokenIgnoreNewlines();
762 assert(CompToken.Kind == TokenInfo::TK_CodeCompletion);
763
764 // We cannot complete code if there is an invalid element on the context
765 // stack.
766 for (ContextStackTy::iterator I = ContextStack.begin(),
767 E = ContextStack.end();
768 I != E; ++I) {
769 if (!I->first)
770 return;
771 }
772
773 auto AcceptedTypes = S->getAcceptedCompletionTypes(Context: ContextStack);
774 for (const auto &Completion : S->getMatcherCompletions(AcceptedTypes)) {
775 addCompletion(CompToken, Completion);
776 }
777
778 for (const auto &Completion : getNamedValueCompletions(AcceptedTypes)) {
779 addCompletion(CompToken, Completion);
780 }
781}
782
783/// Parse an <Expression>
784bool Parser::parseExpressionImpl(VariantValue *Value) {
785 switch (Tokenizer->nextTokenKind()) {
786 case TokenInfo::TK_Literal:
787 *Value = Tokenizer->consumeNextToken().Value;
788 return true;
789
790 case TokenInfo::TK_Ident:
791 return parseIdentifierPrefixImpl(Value);
792
793 case TokenInfo::TK_CodeCompletion:
794 addExpressionCompletions();
795 return false;
796
797 case TokenInfo::TK_Eof:
798 Error->addError(Range: Tokenizer->consumeNextToken().Range,
799 Error: Error->ET_ParserNoCode);
800 return false;
801
802 case TokenInfo::TK_Error:
803 // This error was already reported by the tokenizer.
804 return false;
805 case TokenInfo::TK_NewLine:
806 case TokenInfo::TK_OpenParen:
807 case TokenInfo::TK_CloseParen:
808 case TokenInfo::TK_Comma:
809 case TokenInfo::TK_Period:
810 case TokenInfo::TK_InvalidChar:
811 const TokenInfo Token = Tokenizer->consumeNextToken();
812 Error->addError(Range: Token.Range, Error: Error->ET_ParserInvalidToken)
813 << (Token.Kind == TokenInfo::TK_NewLine ? "NewLine" : Token.Text);
814 return false;
815 }
816
817 llvm_unreachable("Unknown token kind.");
818}
819
820static llvm::ManagedStatic<Parser::RegistrySema> DefaultRegistrySema;
821
822Parser::Parser(CodeTokenizer *Tokenizer, Sema *S,
823 const NamedValueMap *NamedValues, Diagnostics *Error)
824 : Tokenizer(Tokenizer), S(S ? S : &*DefaultRegistrySema),
825 NamedValues(NamedValues), Error(Error) {}
826
827Parser::RegistrySema::~RegistrySema() = default;
828
829std::optional<MatcherCtor>
830Parser::RegistrySema::lookupMatcherCtor(StringRef MatcherName) {
831 return Registry::lookupMatcherCtor(MatcherName);
832}
833
834VariantMatcher Parser::RegistrySema::actOnMatcherExpression(
835 MatcherCtor Ctor, SourceRange NameRange, StringRef BindID,
836 ArrayRef<ParserValue> Args, Diagnostics *Error) {
837 if (BindID.empty()) {
838 return Registry::constructMatcher(Ctor, NameRange, Args, Error);
839 } else {
840 return Registry::constructBoundMatcher(Ctor, NameRange, BindID, Args,
841 Error);
842 }
843}
844
845std::vector<ArgKind> Parser::RegistrySema::getAcceptedCompletionTypes(
846 ArrayRef<std::pair<MatcherCtor, unsigned>> Context) {
847 return Registry::getAcceptedCompletionTypes(Context);
848}
849
850std::vector<MatcherCompletion> Parser::RegistrySema::getMatcherCompletions(
851 ArrayRef<ArgKind> AcceptedTypes) {
852 return Registry::getMatcherCompletions(AcceptedTypes);
853}
854
855bool Parser::RegistrySema::isBuilderMatcher(MatcherCtor Ctor) const {
856 return Registry::isBuilderMatcher(Ctor);
857}
858
859ASTNodeKind Parser::RegistrySema::nodeMatcherType(MatcherCtor Ctor) const {
860 return Registry::nodeMatcherType(Ctor);
861}
862
863internal::MatcherDescriptorPtr
864Parser::RegistrySema::buildMatcherCtor(MatcherCtor Ctor, SourceRange NameRange,
865 ArrayRef<ParserValue> Args,
866 Diagnostics *Error) const {
867 return Registry::buildMatcherCtor(Ctor, NameRange, Args, Error);
868}
869
870bool Parser::parseExpression(StringRef &Code, Sema *S,
871 const NamedValueMap *NamedValues,
872 VariantValue *Value, Diagnostics *Error) {
873 CodeTokenizer Tokenizer(Code, Error);
874 if (!Parser(&Tokenizer, S, NamedValues, Error).parseExpressionImpl(Value))
875 return false;
876 auto NT = Tokenizer.peekNextToken();
877 if (NT.Kind != TokenInfo::TK_Eof && NT.Kind != TokenInfo::TK_NewLine) {
878 Error->addError(Range: Tokenizer.peekNextToken().Range,
879 Error: Error->ET_ParserTrailingCode);
880 return false;
881 }
882 return true;
883}
884
885std::vector<MatcherCompletion>
886Parser::completeExpression(StringRef &Code, unsigned CompletionOffset, Sema *S,
887 const NamedValueMap *NamedValues) {
888 Diagnostics Error;
889 CodeTokenizer Tokenizer(Code, &Error, CompletionOffset);
890 Parser P(&Tokenizer, S, NamedValues, &Error);
891 VariantValue Dummy;
892 P.parseExpressionImpl(Value: &Dummy);
893
894 // Sort by specificity, then by name.
895 llvm::sort(C&: P.Completions,
896 Comp: [](const MatcherCompletion &A, const MatcherCompletion &B) {
897 if (A.Specificity != B.Specificity)
898 return A.Specificity > B.Specificity;
899 return A.TypedText < B.TypedText;
900 });
901
902 return P.Completions;
903}
904
905std::optional<DynTypedMatcher>
906Parser::parseMatcherExpression(StringRef &Code, Sema *S,
907 const NamedValueMap *NamedValues,
908 Diagnostics *Error) {
909 VariantValue Value;
910 if (!parseExpression(Code, S, NamedValues, Value: &Value, Error))
911 return std::nullopt;
912 if (!Value.isMatcher()) {
913 Error->addError(Range: SourceRange(), Error: Error->ET_ParserNotAMatcher);
914 return std::nullopt;
915 }
916 std::optional<DynTypedMatcher> Result = Value.getMatcher().getSingleMatcher();
917 if (!Result) {
918 Error->addError(Range: SourceRange(), Error: Error->ET_ParserOverloadedType)
919 << Value.getTypeAsString();
920 }
921 return Result;
922}
923
924} // namespace dynamic
925} // namespace ast_matchers
926} // namespace clang
927