1//===- Parser.cpp - Matcher expression parser -----------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// Recursive parser implementation for the matcher expression grammar.
11///
12//===----------------------------------------------------------------------===//
13
14#include "clang/ASTMatchers/Dynamic/Parser.h"
15#include "clang/ASTMatchers/ASTMatchersInternal.h"
16#include "clang/ASTMatchers/Dynamic/Diagnostics.h"
17#include "clang/ASTMatchers/Dynamic/Registry.h"
18#include "clang/Basic/CharInfo.h"
19#include "llvm/ADT/StringRef.h"
20#include "llvm/Support/ErrorHandling.h"
21#include "llvm/Support/ManagedStatic.h"
22#include <cassert>
23#include <cerrno>
24#include <cstddef>
25#include <cstdlib>
26#include <optional>
27#include <string>
28#include <utility>
29#include <vector>
30
31namespace clang {
32namespace ast_matchers {
33namespace dynamic {
34
35/// Simple structure to hold information for one token from the parser.
36struct Parser::TokenInfo {
37 /// Different possible tokens.
38 enum TokenKind {
39 TK_Eof,
40 TK_NewLine,
41 TK_OpenParen,
42 TK_CloseParen,
43 TK_Comma,
44 TK_Period,
45 TK_Literal,
46 TK_Ident,
47 TK_InvalidChar,
48 TK_Error,
49 TK_CodeCompletion
50 };
51
52 /// Some known identifiers.
53 static const char* const ID_Bind;
54 static const char *const ID_With;
55
56 TokenInfo() = default;
57
58 StringRef Text;
59 TokenKind Kind = TK_Eof;
60 SourceRange Range;
61 VariantValue Value;
62};
63
64const char* const Parser::TokenInfo::ID_Bind = "bind";
65const char *const Parser::TokenInfo::ID_With = "with";
66
67/// Simple tokenizer for the parser.
68class Parser::CodeTokenizer {
69public:
70 explicit CodeTokenizer(StringRef &MatcherCode, Diagnostics *Error)
71 : Code(MatcherCode), StartOfLine(MatcherCode), Error(Error) {
72 NextToken = getNextToken();
73 }
74
75 CodeTokenizer(StringRef &MatcherCode, Diagnostics *Error,
76 unsigned CodeCompletionOffset)
77 : Code(MatcherCode), StartOfLine(MatcherCode), Error(Error),
78 CodeCompletionLocation(MatcherCode.data() + CodeCompletionOffset) {
79 NextToken = getNextToken();
80 }
81
82 /// Returns but doesn't consume the next token.
83 const TokenInfo &peekNextToken() const { return NextToken; }
84
85 /// Consumes and returns the next token.
86 TokenInfo consumeNextToken() {
87 TokenInfo ThisToken = NextToken;
88 NextToken = getNextToken();
89 return ThisToken;
90 }
91
92 TokenInfo SkipNewlines() {
93 while (NextToken.Kind == TokenInfo::TK_NewLine)
94 NextToken = getNextToken();
95 return NextToken;
96 }
97
98 TokenInfo consumeNextTokenIgnoreNewlines() {
99 SkipNewlines();
100 if (NextToken.Kind == TokenInfo::TK_Eof)
101 return NextToken;
102 return consumeNextToken();
103 }
104
105 TokenInfo::TokenKind nextTokenKind() const { return NextToken.Kind; }
106
107private:
108 TokenInfo getNextToken() {
109 consumeWhitespace();
110 TokenInfo Result;
111 Result.Range.Start = currentLocation();
112
113 if (CodeCompletionLocation && CodeCompletionLocation <= Code.data()) {
114 Result.Kind = TokenInfo::TK_CodeCompletion;
115 Result.Text = StringRef(CodeCompletionLocation, 0);
116 CodeCompletionLocation = nullptr;
117 return Result;
118 }
119
120 if (Code.empty()) {
121 Result.Kind = TokenInfo::TK_Eof;
122 Result.Text = "";
123 return Result;
124 }
125
126 switch (Code[0]) {
127 case '#':
128 Code = Code.drop_until(F: [](char c) { return c == '\n'; });
129 return getNextToken();
130 case ',':
131 Result.Kind = TokenInfo::TK_Comma;
132 Result.Text = Code.substr(Start: 0, N: 1);
133 Code = Code.drop_front();
134 break;
135 case '.':
136 Result.Kind = TokenInfo::TK_Period;
137 Result.Text = Code.substr(Start: 0, N: 1);
138 Code = Code.drop_front();
139 break;
140 case '\n':
141 ++Line;
142 StartOfLine = Code.drop_front();
143 Result.Kind = TokenInfo::TK_NewLine;
144 Result.Text = Code.substr(Start: 0, N: 1);
145 Code = Code.drop_front();
146 break;
147 case '(':
148 Result.Kind = TokenInfo::TK_OpenParen;
149 Result.Text = Code.substr(Start: 0, N: 1);
150 Code = Code.drop_front();
151 break;
152 case ')':
153 Result.Kind = TokenInfo::TK_CloseParen;
154 Result.Text = Code.substr(Start: 0, N: 1);
155 Code = Code.drop_front();
156 break;
157
158 case '"':
159 case '\'':
160 // Parse a string literal.
161 consumeStringLiteral(Result: &Result);
162 break;
163
164 case '0': case '1': case '2': case '3': case '4':
165 case '5': case '6': case '7': case '8': case '9':
166 // Parse an unsigned and float literal.
167 consumeNumberLiteral(Result: &Result);
168 break;
169
170 default:
171 if (isAlphanumeric(c: Code[0])) {
172 // Parse an identifier
173 size_t TokenLength = 1;
174 while (true) {
175 // A code completion location in/immediately after an identifier will
176 // cause the portion of the identifier before the code completion
177 // location to become a code completion token.
178 if (CodeCompletionLocation == Code.data() + TokenLength) {
179 CodeCompletionLocation = nullptr;
180 Result.Kind = TokenInfo::TK_CodeCompletion;
181 Result.Text = Code.substr(Start: 0, N: TokenLength);
182 Code = Code.drop_front(N: TokenLength);
183 return Result;
184 }
185 if (TokenLength == Code.size() || !isAlphanumeric(c: Code[TokenLength]))
186 break;
187 ++TokenLength;
188 }
189 if (TokenLength == 4 && Code.starts_with(Prefix: "true")) {
190 Result.Kind = TokenInfo::TK_Literal;
191 Result.Value = true;
192 } else if (TokenLength == 5 && Code.starts_with(Prefix: "false")) {
193 Result.Kind = TokenInfo::TK_Literal;
194 Result.Value = false;
195 } else {
196 Result.Kind = TokenInfo::TK_Ident;
197 Result.Text = Code.substr(Start: 0, N: TokenLength);
198 }
199 Code = Code.drop_front(N: TokenLength);
200 } else {
201 Result.Kind = TokenInfo::TK_InvalidChar;
202 Result.Text = Code.substr(Start: 0, N: 1);
203 Code = Code.drop_front(N: 1);
204 }
205 break;
206 }
207
208 Result.Range.End = currentLocation();
209 return Result;
210 }
211
212 /// Consume an unsigned and float literal.
213 void consumeNumberLiteral(TokenInfo *Result) {
214 bool isFloatingLiteral = false;
215 unsigned Length = 1;
216 if (Code.size() > 1) {
217 // Consume the 'x' or 'b' radix modifier, if present.
218 switch (toLowercase(c: Code[1])) {
219 case 'x': case 'b': Length = 2;
220 }
221 }
222 while (Length < Code.size() && isHexDigit(c: Code[Length]))
223 ++Length;
224
225 // Try to recognize a floating point literal.
226 while (Length < Code.size()) {
227 char c = Code[Length];
228 if (c == '-' || c == '+' || c == '.' || isHexDigit(c)) {
229 isFloatingLiteral = true;
230 Length++;
231 } else {
232 break;
233 }
234 }
235
236 Result->Text = Code.substr(Start: 0, N: Length);
237 Code = Code.drop_front(N: Length);
238
239 if (isFloatingLiteral) {
240 char *end;
241 errno = 0;
242 std::string Text = Result->Text.str();
243 double doubleValue = strtod(nptr: Text.c_str(), endptr: &end);
244 if (*end == 0 && errno == 0) {
245 Result->Kind = TokenInfo::TK_Literal;
246 Result->Value = doubleValue;
247 return;
248 }
249 } else {
250 unsigned Value;
251 if (!Result->Text.getAsInteger(Radix: 0, Result&: Value)) {
252 Result->Kind = TokenInfo::TK_Literal;
253 Result->Value = Value;
254 return;
255 }
256 }
257
258 SourceRange Range;
259 Range.Start = Result->Range.Start;
260 Range.End = currentLocation();
261 Error->addError(Range, Error: Error->ET_ParserNumberError) << Result->Text;
262 Result->Kind = TokenInfo::TK_Error;
263 }
264
265 /// Consume a string literal.
266 ///
267 /// \c Code must be positioned at the start of the literal (the opening
268 /// quote). Consumed until it finds the same closing quote character.
269 void consumeStringLiteral(TokenInfo *Result) {
270 bool InEscape = false;
271 const char Marker = Code[0];
272 for (size_t Length = 1, Size = Code.size(); Length != Size; ++Length) {
273 if (InEscape) {
274 InEscape = false;
275 continue;
276 }
277 if (Code[Length] == '\\') {
278 InEscape = true;
279 continue;
280 }
281 if (Code[Length] == Marker) {
282 Result->Kind = TokenInfo::TK_Literal;
283 Result->Text = Code.substr(Start: 0, N: Length + 1);
284 Result->Value = Code.substr(Start: 1, N: Length - 1);
285 Code = Code.drop_front(N: Length + 1);
286 return;
287 }
288 }
289
290 StringRef ErrorText = Code;
291 Code = Code.drop_front(N: Code.size());
292 SourceRange Range;
293 Range.Start = Result->Range.Start;
294 Range.End = currentLocation();
295 Error->addError(Range, Error: Error->ET_ParserStringError) << ErrorText;
296 Result->Kind = TokenInfo::TK_Error;
297 }
298
299 /// Consume all leading whitespace from \c Code.
300 void consumeWhitespace() {
301 // Don't trim newlines.
302 Code = Code.ltrim(Chars: " \t\v\f\r");
303 }
304
305 SourceLocation currentLocation() {
306 SourceLocation Location;
307 Location.Line = Line;
308 Location.Column = Code.data() - StartOfLine.data() + 1;
309 return Location;
310 }
311
312 StringRef &Code;
313 StringRef StartOfLine;
314 unsigned Line = 1;
315 Diagnostics *Error;
316 TokenInfo NextToken;
317 const char *CodeCompletionLocation = nullptr;
318};
319
320Parser::Sema::~Sema() = default;
321
322std::vector<ArgKind> Parser::Sema::getAcceptedCompletionTypes(
323 llvm::ArrayRef<std::pair<MatcherCtor, unsigned>> Context) {
324 return {};
325}
326
327std::vector<MatcherCompletion>
328Parser::Sema::getMatcherCompletions(llvm::ArrayRef<ArgKind> AcceptedTypes) {
329 return {};
330}
331
332struct Parser::ScopedContextEntry {
333 Parser *P;
334
335 ScopedContextEntry(Parser *P, MatcherCtor C) : P(P) {
336 P->ContextStack.push_back(x: std::make_pair(x&: C, y: 0u));
337 }
338
339 ~ScopedContextEntry() {
340 P->ContextStack.pop_back();
341 }
342
343 void nextArg() {
344 ++P->ContextStack.back().second;
345 }
346};
347
348/// Parse expressions that start with an identifier.
349///
350/// This function can parse named values and matchers.
351/// In case of failure it will try to determine the user's intent to give
352/// an appropriate error message.
353bool Parser::parseIdentifierPrefixImpl(VariantValue *Value) {
354 const TokenInfo NameToken = Tokenizer->consumeNextToken();
355
356 if (Tokenizer->nextTokenKind() != TokenInfo::TK_OpenParen) {
357 // Parse as a named value.
358 if (const VariantValue NamedValue =
359 NamedValues ? NamedValues->lookup(Key: NameToken.Text)
360 : VariantValue()) {
361
362 if (Tokenizer->nextTokenKind() != TokenInfo::TK_Period) {
363 *Value = NamedValue;
364 return true;
365 }
366
367 std::string BindID;
368 Tokenizer->consumeNextToken();
369 TokenInfo ChainCallToken = Tokenizer->consumeNextToken();
370 if (ChainCallToken.Kind == TokenInfo::TK_CodeCompletion) {
371 addCompletion(CompToken: ChainCallToken, Completion: MatcherCompletion("bind(\"", "bind", 1));
372 return false;
373 }
374
375 if (ChainCallToken.Kind != TokenInfo::TK_Ident ||
376 (ChainCallToken.Text != TokenInfo::ID_Bind &&
377 ChainCallToken.Text != TokenInfo::ID_With)) {
378 Error->addError(Range: ChainCallToken.Range,
379 Error: Error->ET_ParserMalformedChainedExpr);
380 return false;
381 }
382 if (ChainCallToken.Text == TokenInfo::ID_With) {
383
384 Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error,
385 NameToken.Text, NameToken.Range);
386
387 Error->addError(Range: ChainCallToken.Range,
388 Error: Error->ET_RegistryMatcherNoWithSupport);
389 return false;
390 }
391 if (!parseBindID(BindID))
392 return false;
393
394 assert(NamedValue.isMatcher());
395 std::optional<DynTypedMatcher> Result =
396 NamedValue.getMatcher().getSingleMatcher();
397 if (Result) {
398 std::optional<DynTypedMatcher> Bound = Result->tryBind(ID: BindID);
399 if (Bound) {
400 *Value = VariantMatcher::SingleMatcher(Matcher: *Bound);
401 return true;
402 }
403 }
404 return false;
405 }
406
407 if (Tokenizer->nextTokenKind() == TokenInfo::TK_NewLine) {
408 Error->addError(Range: Tokenizer->peekNextToken().Range,
409 Error: Error->ET_ParserNoOpenParen)
410 << "NewLine";
411 return false;
412 }
413
414 // If the syntax is correct and the name is not a matcher either, report
415 // unknown named value.
416 if ((Tokenizer->nextTokenKind() == TokenInfo::TK_Comma ||
417 Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen ||
418 Tokenizer->nextTokenKind() == TokenInfo::TK_NewLine ||
419 Tokenizer->nextTokenKind() == TokenInfo::TK_Eof) &&
420 !S->lookupMatcherCtor(MatcherName: NameToken.Text)) {
421 Error->addError(Range: NameToken.Range, Error: Error->ET_RegistryValueNotFound)
422 << NameToken.Text;
423 return false;
424 }
425 // Otherwise, fallback to the matcher parser.
426 }
427
428 Tokenizer->SkipNewlines();
429
430 assert(NameToken.Kind == TokenInfo::TK_Ident);
431 TokenInfo OpenToken = Tokenizer->consumeNextToken();
432 if (OpenToken.Kind != TokenInfo::TK_OpenParen) {
433 Error->addError(Range: OpenToken.Range, Error: Error->ET_ParserNoOpenParen)
434 << OpenToken.Text;
435 return false;
436 }
437
438 std::optional<MatcherCtor> Ctor = S->lookupMatcherCtor(MatcherName: NameToken.Text);
439
440 // Parse as a matcher expression.
441 return parseMatcherExpressionImpl(NameToken, OpenToken, Ctor, Value);
442}
443
444bool Parser::parseBindID(std::string &BindID) {
445 // Parse the parenthesized argument to .bind("foo")
446 const TokenInfo OpenToken = Tokenizer->consumeNextToken();
447 const TokenInfo IDToken = Tokenizer->consumeNextTokenIgnoreNewlines();
448 const TokenInfo CloseToken = Tokenizer->consumeNextTokenIgnoreNewlines();
449
450 // TODO: We could use different error codes for each/some to be more
451 // explicit about the syntax error.
452 if (OpenToken.Kind != TokenInfo::TK_OpenParen) {
453 Error->addError(Range: OpenToken.Range, Error: Error->ET_ParserMalformedBindExpr);
454 return false;
455 }
456 if (IDToken.Kind != TokenInfo::TK_Literal || !IDToken.Value.isString()) {
457 Error->addError(Range: IDToken.Range, Error: Error->ET_ParserMalformedBindExpr);
458 return false;
459 }
460 if (CloseToken.Kind != TokenInfo::TK_CloseParen) {
461 Error->addError(Range: CloseToken.Range, Error: Error->ET_ParserMalformedBindExpr);
462 return false;
463 }
464 BindID = IDToken.Value.getString();
465 return true;
466}
467
468bool Parser::parseMatcherBuilder(MatcherCtor Ctor, const TokenInfo &NameToken,
469 const TokenInfo &OpenToken,
470 VariantValue *Value) {
471 std::vector<ParserValue> Args;
472 TokenInfo EndToken;
473
474 Tokenizer->SkipNewlines();
475
476 {
477 ScopedContextEntry SCE(this, Ctor);
478
479 while (Tokenizer->nextTokenKind() != TokenInfo::TK_Eof) {
480 if (Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen) {
481 // End of args.
482 EndToken = Tokenizer->consumeNextToken();
483 break;
484 }
485 if (!Args.empty()) {
486 // We must find a , token to continue.
487 TokenInfo CommaToken = Tokenizer->consumeNextToken();
488 if (CommaToken.Kind != TokenInfo::TK_Comma) {
489 Error->addError(Range: CommaToken.Range, Error: Error->ET_ParserNoComma)
490 << CommaToken.Text;
491 return false;
492 }
493 // Allow for a trailing , token and possibly a new line.
494 Tokenizer->SkipNewlines();
495 if (Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen) {
496 continue;
497 }
498 }
499
500 Diagnostics::Context Ctx(Diagnostics::Context::MatcherArg, Error,
501 NameToken.Text, NameToken.Range,
502 Args.size() + 1);
503 ParserValue ArgValue;
504 Tokenizer->SkipNewlines();
505
506 if (Tokenizer->peekNextToken().Kind == TokenInfo::TK_CodeCompletion) {
507 addExpressionCompletions();
508 return false;
509 }
510
511 TokenInfo NodeMatcherToken = Tokenizer->consumeNextToken();
512
513 if (NodeMatcherToken.Kind != TokenInfo::TK_Ident) {
514 Error->addError(Range: NameToken.Range, Error: Error->ET_ParserFailedToBuildMatcher)
515 << NameToken.Text;
516 return false;
517 }
518
519 ArgValue.Text = NodeMatcherToken.Text;
520 ArgValue.Range = NodeMatcherToken.Range;
521
522 std::optional<MatcherCtor> MappedMatcher =
523 S->lookupMatcherCtor(MatcherName: ArgValue.Text);
524
525 if (!MappedMatcher) {
526 Error->addError(Range: NodeMatcherToken.Range,
527 Error: Error->ET_RegistryMatcherNotFound)
528 << NodeMatcherToken.Text;
529 return false;
530 }
531
532 ASTNodeKind NK = S->nodeMatcherType(*MappedMatcher);
533
534 if (NK.isNone()) {
535 Error->addError(Range: NodeMatcherToken.Range,
536 Error: Error->ET_RegistryNonNodeMatcher)
537 << NodeMatcherToken.Text;
538 return false;
539 }
540
541 ArgValue.Value = NK;
542
543 Tokenizer->SkipNewlines();
544 Args.push_back(x: ArgValue);
545
546 SCE.nextArg();
547 }
548 }
549
550 if (EndToken.Kind == TokenInfo::TK_Eof) {
551 Error->addError(Range: OpenToken.Range, Error: Error->ET_ParserNoCloseParen);
552 return false;
553 }
554
555 internal::MatcherDescriptorPtr BuiltCtor =
556 S->buildMatcherCtor(Ctor, NameRange: NameToken.Range, Args, Error);
557
558 if (!BuiltCtor.get()) {
559 Error->addError(Range: NameToken.Range, Error: Error->ET_ParserFailedToBuildMatcher)
560 << NameToken.Text;
561 return false;
562 }
563
564 std::string BindID;
565 if (Tokenizer->peekNextToken().Kind == TokenInfo::TK_Period) {
566 Tokenizer->consumeNextToken();
567 TokenInfo ChainCallToken = Tokenizer->consumeNextToken();
568 if (ChainCallToken.Kind == TokenInfo::TK_CodeCompletion) {
569 addCompletion(CompToken: ChainCallToken, Completion: MatcherCompletion("bind(\"", "bind", 1));
570 addCompletion(CompToken: ChainCallToken, Completion: MatcherCompletion("with(", "with", 1));
571 return false;
572 }
573 if (ChainCallToken.Kind != TokenInfo::TK_Ident ||
574 (ChainCallToken.Text != TokenInfo::ID_Bind &&
575 ChainCallToken.Text != TokenInfo::ID_With)) {
576 Error->addError(Range: ChainCallToken.Range,
577 Error: Error->ET_ParserMalformedChainedExpr);
578 return false;
579 }
580 if (ChainCallToken.Text == TokenInfo::ID_Bind) {
581 if (!parseBindID(BindID))
582 return false;
583 Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error,
584 NameToken.Text, NameToken.Range);
585 SourceRange MatcherRange = NameToken.Range;
586 MatcherRange.End = ChainCallToken.Range.End;
587 VariantMatcher Result = S->actOnMatcherExpression(
588 Ctor: BuiltCtor.get(), NameRange: MatcherRange, BindID, Args: {}, Error);
589 if (Result.isNull())
590 return false;
591
592 *Value = Result;
593 return true;
594 } else if (ChainCallToken.Text == TokenInfo::ID_With) {
595 Tokenizer->SkipNewlines();
596
597 if (Tokenizer->nextTokenKind() != TokenInfo::TK_OpenParen) {
598 StringRef ErrTxt = Tokenizer->nextTokenKind() == TokenInfo::TK_Eof
599 ? StringRef("EOF")
600 : Tokenizer->peekNextToken().Text;
601 Error->addError(Range: Tokenizer->peekNextToken().Range,
602 Error: Error->ET_ParserNoOpenParen)
603 << ErrTxt;
604 return false;
605 }
606
607 TokenInfo WithOpenToken = Tokenizer->consumeNextToken();
608
609 return parseMatcherExpressionImpl(NameToken, OpenToken: WithOpenToken,
610 Ctor: BuiltCtor.get(), Value);
611 }
612 }
613
614 Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error,
615 NameToken.Text, NameToken.Range);
616 SourceRange MatcherRange = NameToken.Range;
617 MatcherRange.End = EndToken.Range.End;
618 VariantMatcher Result = S->actOnMatcherExpression(
619 Ctor: BuiltCtor.get(), NameRange: MatcherRange, BindID, Args: {}, Error);
620 if (Result.isNull())
621 return false;
622
623 *Value = Result;
624 return true;
625}
626
627/// Parse and validate a matcher expression.
628/// \return \c true on success, in which case \c Value has the matcher parsed.
629/// If the input is malformed, or some argument has an error, it
630/// returns \c false.
631bool Parser::parseMatcherExpressionImpl(const TokenInfo &NameToken,
632 const TokenInfo &OpenToken,
633 std::optional<MatcherCtor> Ctor,
634 VariantValue *Value) {
635 if (!Ctor) {
636 Error->addError(Range: NameToken.Range, Error: Error->ET_RegistryMatcherNotFound)
637 << NameToken.Text;
638 // Do not return here. We need to continue to give completion suggestions.
639 }
640
641 if (Ctor && *Ctor && S->isBuilderMatcher(*Ctor))
642 return parseMatcherBuilder(Ctor: *Ctor, NameToken, OpenToken, Value);
643
644 std::vector<ParserValue> Args;
645 TokenInfo EndToken;
646
647 Tokenizer->SkipNewlines();
648
649 {
650 ScopedContextEntry SCE(this, Ctor.value_or(u: nullptr));
651
652 while (Tokenizer->nextTokenKind() != TokenInfo::TK_Eof) {
653 if (Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen) {
654 // End of args.
655 EndToken = Tokenizer->consumeNextToken();
656 break;
657 }
658 if (!Args.empty()) {
659 // We must find a , token to continue.
660 const TokenInfo CommaToken = Tokenizer->consumeNextToken();
661 if (CommaToken.Kind != TokenInfo::TK_Comma) {
662 Error->addError(Range: CommaToken.Range, Error: Error->ET_ParserNoComma)
663 << CommaToken.Text;
664 return false;
665 }
666 // Allow for a trailing , token and possibly a new line.
667 Tokenizer->SkipNewlines();
668 if (Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen) {
669 continue;
670 }
671 }
672
673 Diagnostics::Context Ctx(Diagnostics::Context::MatcherArg, Error,
674 NameToken.Text, NameToken.Range,
675 Args.size() + 1);
676 ParserValue ArgValue;
677 Tokenizer->SkipNewlines();
678 ArgValue.Text = Tokenizer->peekNextToken().Text;
679 ArgValue.Range = Tokenizer->peekNextToken().Range;
680 if (!parseExpressionImpl(Value: &ArgValue.Value)) {
681 return false;
682 }
683
684 Tokenizer->SkipNewlines();
685 Args.push_back(x: ArgValue);
686 SCE.nextArg();
687 }
688 }
689
690 if (EndToken.Kind == TokenInfo::TK_Eof) {
691 Error->addError(Range: OpenToken.Range, Error: Error->ET_ParserNoCloseParen);
692 return false;
693 }
694
695 std::string BindID;
696 if (Tokenizer->peekNextToken().Kind == TokenInfo::TK_Period) {
697 Tokenizer->consumeNextToken();
698 TokenInfo ChainCallToken = Tokenizer->consumeNextToken();
699 if (ChainCallToken.Kind == TokenInfo::TK_CodeCompletion) {
700 addCompletion(CompToken: ChainCallToken, Completion: MatcherCompletion("bind(\"", "bind", 1));
701 return false;
702 }
703
704 if (ChainCallToken.Kind != TokenInfo::TK_Ident) {
705 Error->addError(Range: ChainCallToken.Range,
706 Error: Error->ET_ParserMalformedChainedExpr);
707 return false;
708 }
709 if (ChainCallToken.Text == TokenInfo::ID_With) {
710
711 Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error,
712 NameToken.Text, NameToken.Range);
713
714 Error->addError(Range: ChainCallToken.Range,
715 Error: Error->ET_RegistryMatcherNoWithSupport);
716 return false;
717 }
718 if (ChainCallToken.Text != TokenInfo::ID_Bind) {
719 Error->addError(Range: ChainCallToken.Range,
720 Error: Error->ET_ParserMalformedChainedExpr);
721 return false;
722 }
723 if (!parseBindID(BindID))
724 return false;
725 }
726
727 if (!Ctor)
728 return false;
729
730 // Merge the start and end infos.
731 Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error,
732 NameToken.Text, NameToken.Range);
733 SourceRange MatcherRange = NameToken.Range;
734 MatcherRange.End = EndToken.Range.End;
735 VariantMatcher Result = S->actOnMatcherExpression(
736 Ctor: *Ctor, NameRange: MatcherRange, BindID, Args, Error);
737 if (Result.isNull()) return false;
738
739 *Value = Result;
740 return true;
741}
742
743// If the prefix of this completion matches the completion token, add it to
744// Completions minus the prefix.
745void Parser::addCompletion(const TokenInfo &CompToken,
746 const MatcherCompletion& Completion) {
747 if (StringRef(Completion.TypedText).starts_with(Prefix: CompToken.Text) &&
748 Completion.Specificity > 0) {
749 Completions.emplace_back(args: Completion.TypedText.substr(pos: CompToken.Text.size()),
750 args: Completion.MatcherDecl, args: Completion.Specificity);
751 }
752}
753
754std::vector<MatcherCompletion> Parser::getNamedValueCompletions(
755 ArrayRef<ArgKind> AcceptedTypes) {
756 if (!NamedValues) return std::vector<MatcherCompletion>();
757 std::vector<MatcherCompletion> Result;
758 for (const auto &Entry : *NamedValues) {
759 unsigned Specificity;
760 if (Entry.getValue().isConvertibleTo(Kinds: AcceptedTypes, Specificity: &Specificity)) {
761 std::string Decl =
762 (Entry.getValue().getTypeAsString() + " " + Entry.getKey()).str();
763 Result.emplace_back(args: Entry.getKey(), args&: Decl, args&: Specificity);
764 }
765 }
766 return Result;
767}
768
769void Parser::addExpressionCompletions() {
770 const TokenInfo CompToken = Tokenizer->consumeNextTokenIgnoreNewlines();
771 assert(CompToken.Kind == TokenInfo::TK_CodeCompletion);
772
773 // We cannot complete code if there is an invalid element on the context
774 // stack.
775 for (ContextStackTy::iterator I = ContextStack.begin(),
776 E = ContextStack.end();
777 I != E; ++I) {
778 if (!I->first)
779 return;
780 }
781
782 auto AcceptedTypes = S->getAcceptedCompletionTypes(Context: ContextStack);
783 for (const auto &Completion : S->getMatcherCompletions(AcceptedTypes)) {
784 addCompletion(CompToken, Completion);
785 }
786
787 for (const auto &Completion : getNamedValueCompletions(AcceptedTypes)) {
788 addCompletion(CompToken, Completion);
789 }
790}
791
792/// Parse an <Expression>
793bool Parser::parseExpressionImpl(VariantValue *Value) {
794 switch (Tokenizer->nextTokenKind()) {
795 case TokenInfo::TK_Literal:
796 *Value = Tokenizer->consumeNextToken().Value;
797 return true;
798
799 case TokenInfo::TK_Ident:
800 return parseIdentifierPrefixImpl(Value);
801
802 case TokenInfo::TK_CodeCompletion:
803 addExpressionCompletions();
804 return false;
805
806 case TokenInfo::TK_Eof:
807 Error->addError(Range: Tokenizer->consumeNextToken().Range,
808 Error: Error->ET_ParserNoCode);
809 return false;
810
811 case TokenInfo::TK_Error:
812 // This error was already reported by the tokenizer.
813 return false;
814 case TokenInfo::TK_NewLine:
815 case TokenInfo::TK_OpenParen:
816 case TokenInfo::TK_CloseParen:
817 case TokenInfo::TK_Comma:
818 case TokenInfo::TK_Period:
819 case TokenInfo::TK_InvalidChar:
820 const TokenInfo Token = Tokenizer->consumeNextToken();
821 Error->addError(Range: Token.Range, Error: Error->ET_ParserInvalidToken)
822 << (Token.Kind == TokenInfo::TK_NewLine ? "NewLine" : Token.Text);
823 return false;
824 }
825
826 llvm_unreachable("Unknown token kind.");
827}
828
829static llvm::ManagedStatic<Parser::RegistrySema> DefaultRegistrySema;
830
831Parser::Parser(CodeTokenizer *Tokenizer, Sema *S,
832 const NamedValueMap *NamedValues, Diagnostics *Error)
833 : Tokenizer(Tokenizer), S(S ? S : &*DefaultRegistrySema),
834 NamedValues(NamedValues), Error(Error) {}
835
836Parser::RegistrySema::~RegistrySema() = default;
837
838std::optional<MatcherCtor>
839Parser::RegistrySema::lookupMatcherCtor(StringRef MatcherName) {
840 return Registry::lookupMatcherCtor(MatcherName);
841}
842
843VariantMatcher Parser::RegistrySema::actOnMatcherExpression(
844 MatcherCtor Ctor, SourceRange NameRange, StringRef BindID,
845 ArrayRef<ParserValue> Args, Diagnostics *Error) {
846 if (BindID.empty()) {
847 return Registry::constructMatcher(Ctor, NameRange, Args, Error);
848 } else {
849 return Registry::constructBoundMatcher(Ctor, NameRange, BindID, Args,
850 Error);
851 }
852}
853
854std::vector<ArgKind> Parser::RegistrySema::getAcceptedCompletionTypes(
855 ArrayRef<std::pair<MatcherCtor, unsigned>> Context) {
856 return Registry::getAcceptedCompletionTypes(Context);
857}
858
859std::vector<MatcherCompletion> Parser::RegistrySema::getMatcherCompletions(
860 ArrayRef<ArgKind> AcceptedTypes) {
861 return Registry::getMatcherCompletions(AcceptedTypes);
862}
863
864bool Parser::RegistrySema::isBuilderMatcher(MatcherCtor Ctor) const {
865 return Registry::isBuilderMatcher(Ctor);
866}
867
868ASTNodeKind Parser::RegistrySema::nodeMatcherType(MatcherCtor Ctor) const {
869 return Registry::nodeMatcherType(Ctor);
870}
871
872internal::MatcherDescriptorPtr
873Parser::RegistrySema::buildMatcherCtor(MatcherCtor Ctor, SourceRange NameRange,
874 ArrayRef<ParserValue> Args,
875 Diagnostics *Error) const {
876 return Registry::buildMatcherCtor(Ctor, NameRange, Args, Error);
877}
878
879bool Parser::parseExpression(StringRef &Code, Sema *S,
880 const NamedValueMap *NamedValues,
881 VariantValue *Value, Diagnostics *Error) {
882 CodeTokenizer Tokenizer(Code, Error);
883 if (!Parser(&Tokenizer, S, NamedValues, Error).parseExpressionImpl(Value))
884 return false;
885 auto NT = Tokenizer.peekNextToken();
886 if (NT.Kind != TokenInfo::TK_Eof && NT.Kind != TokenInfo::TK_NewLine) {
887 Error->addError(Range: Tokenizer.peekNextToken().Range,
888 Error: Error->ET_ParserTrailingCode);
889 return false;
890 }
891 return true;
892}
893
894std::vector<MatcherCompletion>
895Parser::completeExpression(StringRef &Code, unsigned CompletionOffset, Sema *S,
896 const NamedValueMap *NamedValues) {
897 Diagnostics Error;
898 CodeTokenizer Tokenizer(Code, &Error, CompletionOffset);
899 Parser P(&Tokenizer, S, NamedValues, &Error);
900 VariantValue Dummy;
901 P.parseExpressionImpl(Value: &Dummy);
902
903 // Sort by specificity, then by name.
904 llvm::sort(C&: P.Completions,
905 Comp: [](const MatcherCompletion &A, const MatcherCompletion &B) {
906 if (A.Specificity != B.Specificity)
907 return A.Specificity > B.Specificity;
908 return A.TypedText < B.TypedText;
909 });
910
911 return P.Completions;
912}
913
914std::optional<DynTypedMatcher>
915Parser::parseMatcherExpression(StringRef &Code, Sema *S,
916 const NamedValueMap *NamedValues,
917 Diagnostics *Error) {
918 VariantValue Value;
919 if (!parseExpression(Code, S, NamedValues, Value: &Value, Error))
920 return std::nullopt;
921 if (!Value.isMatcher()) {
922 Error->addError(Range: SourceRange(), Error: Error->ET_ParserNotAMatcher);
923 return std::nullopt;
924 }
925 std::optional<DynTypedMatcher> Result = Value.getMatcher().getSingleMatcher();
926 if (!Result) {
927 Error->addError(Range: SourceRange(), Error: Error->ET_ParserOverloadedType)
928 << Value.getTypeAsString();
929 }
930 return Result;
931}
932
933} // namespace dynamic
934} // namespace ast_matchers
935} // namespace clang
936