xref: /aosp_15_r20/external/clang/lib/ASTMatchers/Dynamic/Parser.cpp (revision 67e74705e28f6214e480b399dd47ea732279e315)
1*67e74705SXin Li //===--- Parser.cpp - Matcher expression parser -----*- C++ -*-===//
2*67e74705SXin Li //
3*67e74705SXin Li //                     The LLVM Compiler Infrastructure
4*67e74705SXin Li //
5*67e74705SXin Li // This file is distributed under the University of Illinois Open Source
6*67e74705SXin Li // License. See LICENSE.TXT for details.
7*67e74705SXin Li //
8*67e74705SXin Li //===----------------------------------------------------------------------===//
9*67e74705SXin Li ///
10*67e74705SXin Li /// \file
11*67e74705SXin Li /// \brief Recursive parser implementation for the matcher expression grammar.
12*67e74705SXin Li ///
13*67e74705SXin Li //===----------------------------------------------------------------------===//
14*67e74705SXin Li 
15*67e74705SXin Li #include "clang/ASTMatchers/Dynamic/Parser.h"
16*67e74705SXin Li #include "clang/ASTMatchers/Dynamic/Registry.h"
17*67e74705SXin Li #include "clang/Basic/CharInfo.h"
18*67e74705SXin Li #include "llvm/ADT/Optional.h"
19*67e74705SXin Li #include "llvm/ADT/Twine.h"
20*67e74705SXin Li #include "llvm/Support/ManagedStatic.h"
21*67e74705SXin Li #include <string>
22*67e74705SXin Li #include <vector>
23*67e74705SXin Li 
24*67e74705SXin Li namespace clang {
25*67e74705SXin Li namespace ast_matchers {
26*67e74705SXin Li namespace dynamic {
27*67e74705SXin Li 
28*67e74705SXin Li /// \brief Simple structure to hold information for one token from the parser.
29*67e74705SXin Li struct Parser::TokenInfo {
30*67e74705SXin Li   /// \brief Different possible tokens.
31*67e74705SXin Li   enum TokenKind {
32*67e74705SXin Li     TK_Eof,
33*67e74705SXin Li     TK_OpenParen,
34*67e74705SXin Li     TK_CloseParen,
35*67e74705SXin Li     TK_Comma,
36*67e74705SXin Li     TK_Period,
37*67e74705SXin Li     TK_Literal,
38*67e74705SXin Li     TK_Ident,
39*67e74705SXin Li     TK_InvalidChar,
40*67e74705SXin Li     TK_Error,
41*67e74705SXin Li     TK_CodeCompletion
42*67e74705SXin Li   };
43*67e74705SXin Li 
44*67e74705SXin Li   /// \brief Some known identifiers.
45*67e74705SXin Li   static const char* const ID_Bind;
46*67e74705SXin Li 
TokenInfoclang::ast_matchers::dynamic::Parser::TokenInfo47*67e74705SXin Li   TokenInfo() : Text(), Kind(TK_Eof), Range(), Value() {}
48*67e74705SXin Li 
49*67e74705SXin Li   StringRef Text;
50*67e74705SXin Li   TokenKind Kind;
51*67e74705SXin Li   SourceRange Range;
52*67e74705SXin Li   VariantValue Value;
53*67e74705SXin Li };
54*67e74705SXin Li 
55*67e74705SXin Li const char* const Parser::TokenInfo::ID_Bind = "bind";
56*67e74705SXin Li 
57*67e74705SXin Li /// \brief Simple tokenizer for the parser.
58*67e74705SXin Li class Parser::CodeTokenizer {
59*67e74705SXin Li public:
CodeTokenizer(StringRef MatcherCode,Diagnostics * Error)60*67e74705SXin Li   explicit CodeTokenizer(StringRef MatcherCode, Diagnostics *Error)
61*67e74705SXin Li       : Code(MatcherCode), StartOfLine(MatcherCode), Line(1), Error(Error),
62*67e74705SXin Li         CodeCompletionLocation(nullptr) {
63*67e74705SXin Li     NextToken = getNextToken();
64*67e74705SXin Li   }
65*67e74705SXin Li 
CodeTokenizer(StringRef MatcherCode,Diagnostics * Error,unsigned CodeCompletionOffset)66*67e74705SXin Li   CodeTokenizer(StringRef MatcherCode, Diagnostics *Error,
67*67e74705SXin Li                 unsigned CodeCompletionOffset)
68*67e74705SXin Li       : Code(MatcherCode), StartOfLine(MatcherCode), Line(1), Error(Error),
69*67e74705SXin Li         CodeCompletionLocation(MatcherCode.data() + CodeCompletionOffset) {
70*67e74705SXin Li     NextToken = getNextToken();
71*67e74705SXin Li   }
72*67e74705SXin Li 
73*67e74705SXin Li   /// \brief Returns but doesn't consume the next token.
peekNextToken() const74*67e74705SXin Li   const TokenInfo &peekNextToken() const { return NextToken; }
75*67e74705SXin Li 
76*67e74705SXin Li   /// \brief Consumes and returns the next token.
consumeNextToken()77*67e74705SXin Li   TokenInfo consumeNextToken() {
78*67e74705SXin Li     TokenInfo ThisToken = NextToken;
79*67e74705SXin Li     NextToken = getNextToken();
80*67e74705SXin Li     return ThisToken;
81*67e74705SXin Li   }
82*67e74705SXin Li 
nextTokenKind() const83*67e74705SXin Li   TokenInfo::TokenKind nextTokenKind() const { return NextToken.Kind; }
84*67e74705SXin Li 
85*67e74705SXin Li private:
getNextToken()86*67e74705SXin Li   TokenInfo getNextToken() {
87*67e74705SXin Li     consumeWhitespace();
88*67e74705SXin Li     TokenInfo Result;
89*67e74705SXin Li     Result.Range.Start = currentLocation();
90*67e74705SXin Li 
91*67e74705SXin Li     if (CodeCompletionLocation && CodeCompletionLocation <= Code.data()) {
92*67e74705SXin Li       Result.Kind = TokenInfo::TK_CodeCompletion;
93*67e74705SXin Li       Result.Text = StringRef(CodeCompletionLocation, 0);
94*67e74705SXin Li       CodeCompletionLocation = nullptr;
95*67e74705SXin Li       return Result;
96*67e74705SXin Li     }
97*67e74705SXin Li 
98*67e74705SXin Li     if (Code.empty()) {
99*67e74705SXin Li       Result.Kind = TokenInfo::TK_Eof;
100*67e74705SXin Li       Result.Text = "";
101*67e74705SXin Li       return Result;
102*67e74705SXin Li     }
103*67e74705SXin Li 
104*67e74705SXin Li     switch (Code[0]) {
105*67e74705SXin Li     case ',':
106*67e74705SXin Li       Result.Kind = TokenInfo::TK_Comma;
107*67e74705SXin Li       Result.Text = Code.substr(0, 1);
108*67e74705SXin Li       Code = Code.drop_front();
109*67e74705SXin Li       break;
110*67e74705SXin Li     case '.':
111*67e74705SXin Li       Result.Kind = TokenInfo::TK_Period;
112*67e74705SXin Li       Result.Text = Code.substr(0, 1);
113*67e74705SXin Li       Code = Code.drop_front();
114*67e74705SXin Li       break;
115*67e74705SXin Li     case '(':
116*67e74705SXin Li       Result.Kind = TokenInfo::TK_OpenParen;
117*67e74705SXin Li       Result.Text = Code.substr(0, 1);
118*67e74705SXin Li       Code = Code.drop_front();
119*67e74705SXin Li       break;
120*67e74705SXin Li     case ')':
121*67e74705SXin Li       Result.Kind = TokenInfo::TK_CloseParen;
122*67e74705SXin Li       Result.Text = Code.substr(0, 1);
123*67e74705SXin Li       Code = Code.drop_front();
124*67e74705SXin Li       break;
125*67e74705SXin Li 
126*67e74705SXin Li     case '"':
127*67e74705SXin Li     case '\'':
128*67e74705SXin Li       // Parse a string literal.
129*67e74705SXin Li       consumeStringLiteral(&Result);
130*67e74705SXin Li       break;
131*67e74705SXin Li 
132*67e74705SXin Li     case '0': case '1': case '2': case '3': case '4':
133*67e74705SXin Li     case '5': case '6': case '7': case '8': case '9':
134*67e74705SXin Li       // Parse an unsigned literal.
135*67e74705SXin Li       consumeUnsignedLiteral(&Result);
136*67e74705SXin Li       break;
137*67e74705SXin Li 
138*67e74705SXin Li     default:
139*67e74705SXin Li       if (isAlphanumeric(Code[0])) {
140*67e74705SXin Li         // Parse an identifier
141*67e74705SXin Li         size_t TokenLength = 1;
142*67e74705SXin Li         while (1) {
143*67e74705SXin Li           // A code completion location in/immediately after an identifier will
144*67e74705SXin Li           // cause the portion of the identifier before the code completion
145*67e74705SXin Li           // location to become a code completion token.
146*67e74705SXin Li           if (CodeCompletionLocation == Code.data() + TokenLength) {
147*67e74705SXin Li             CodeCompletionLocation = nullptr;
148*67e74705SXin Li             Result.Kind = TokenInfo::TK_CodeCompletion;
149*67e74705SXin Li             Result.Text = Code.substr(0, TokenLength);
150*67e74705SXin Li             Code = Code.drop_front(TokenLength);
151*67e74705SXin Li             return Result;
152*67e74705SXin Li           }
153*67e74705SXin Li           if (TokenLength == Code.size() || !isAlphanumeric(Code[TokenLength]))
154*67e74705SXin Li             break;
155*67e74705SXin Li           ++TokenLength;
156*67e74705SXin Li         }
157*67e74705SXin Li         Result.Kind = TokenInfo::TK_Ident;
158*67e74705SXin Li         Result.Text = Code.substr(0, TokenLength);
159*67e74705SXin Li         Code = Code.drop_front(TokenLength);
160*67e74705SXin Li       } else {
161*67e74705SXin Li         Result.Kind = TokenInfo::TK_InvalidChar;
162*67e74705SXin Li         Result.Text = Code.substr(0, 1);
163*67e74705SXin Li         Code = Code.drop_front(1);
164*67e74705SXin Li       }
165*67e74705SXin Li       break;
166*67e74705SXin Li     }
167*67e74705SXin Li 
168*67e74705SXin Li     Result.Range.End = currentLocation();
169*67e74705SXin Li     return Result;
170*67e74705SXin Li   }
171*67e74705SXin Li 
172*67e74705SXin Li   /// \brief Consume an unsigned literal.
consumeUnsignedLiteral(TokenInfo * Result)173*67e74705SXin Li   void consumeUnsignedLiteral(TokenInfo *Result) {
174*67e74705SXin Li     unsigned Length = 1;
175*67e74705SXin Li     if (Code.size() > 1) {
176*67e74705SXin Li       // Consume the 'x' or 'b' radix modifier, if present.
177*67e74705SXin Li       switch (toLowercase(Code[1])) {
178*67e74705SXin Li       case 'x': case 'b': Length = 2;
179*67e74705SXin Li       }
180*67e74705SXin Li     }
181*67e74705SXin Li     while (Length < Code.size() && isHexDigit(Code[Length]))
182*67e74705SXin Li       ++Length;
183*67e74705SXin Li 
184*67e74705SXin Li     Result->Text = Code.substr(0, Length);
185*67e74705SXin Li     Code = Code.drop_front(Length);
186*67e74705SXin Li 
187*67e74705SXin Li     unsigned Value;
188*67e74705SXin Li     if (!Result->Text.getAsInteger(0, Value)) {
189*67e74705SXin Li       Result->Kind = TokenInfo::TK_Literal;
190*67e74705SXin Li       Result->Value = Value;
191*67e74705SXin Li     } else {
192*67e74705SXin Li       SourceRange Range;
193*67e74705SXin Li       Range.Start = Result->Range.Start;
194*67e74705SXin Li       Range.End = currentLocation();
195*67e74705SXin Li       Error->addError(Range, Error->ET_ParserUnsignedError) << Result->Text;
196*67e74705SXin Li       Result->Kind = TokenInfo::TK_Error;
197*67e74705SXin Li     }
198*67e74705SXin Li   }
199*67e74705SXin Li 
200*67e74705SXin Li   /// \brief Consume a string literal.
201*67e74705SXin Li   ///
202*67e74705SXin Li   /// \c Code must be positioned at the start of the literal (the opening
203*67e74705SXin Li   /// quote). Consumed until it finds the same closing quote character.
consumeStringLiteral(TokenInfo * Result)204*67e74705SXin Li   void consumeStringLiteral(TokenInfo *Result) {
205*67e74705SXin Li     bool InEscape = false;
206*67e74705SXin Li     const char Marker = Code[0];
207*67e74705SXin Li     for (size_t Length = 1, Size = Code.size(); Length != Size; ++Length) {
208*67e74705SXin Li       if (InEscape) {
209*67e74705SXin Li         InEscape = false;
210*67e74705SXin Li         continue;
211*67e74705SXin Li       }
212*67e74705SXin Li       if (Code[Length] == '\\') {
213*67e74705SXin Li         InEscape = true;
214*67e74705SXin Li         continue;
215*67e74705SXin Li       }
216*67e74705SXin Li       if (Code[Length] == Marker) {
217*67e74705SXin Li         Result->Kind = TokenInfo::TK_Literal;
218*67e74705SXin Li         Result->Text = Code.substr(0, Length + 1);
219*67e74705SXin Li         Result->Value = Code.substr(1, Length - 1);
220*67e74705SXin Li         Code = Code.drop_front(Length + 1);
221*67e74705SXin Li         return;
222*67e74705SXin Li       }
223*67e74705SXin Li     }
224*67e74705SXin Li 
225*67e74705SXin Li     StringRef ErrorText = Code;
226*67e74705SXin Li     Code = Code.drop_front(Code.size());
227*67e74705SXin Li     SourceRange Range;
228*67e74705SXin Li     Range.Start = Result->Range.Start;
229*67e74705SXin Li     Range.End = currentLocation();
230*67e74705SXin Li     Error->addError(Range, Error->ET_ParserStringError) << ErrorText;
231*67e74705SXin Li     Result->Kind = TokenInfo::TK_Error;
232*67e74705SXin Li   }
233*67e74705SXin Li 
234*67e74705SXin Li   /// \brief Consume all leading whitespace from \c Code.
consumeWhitespace()235*67e74705SXin Li   void consumeWhitespace() {
236*67e74705SXin Li     while (!Code.empty() && isWhitespace(Code[0])) {
237*67e74705SXin Li       if (Code[0] == '\n') {
238*67e74705SXin Li         ++Line;
239*67e74705SXin Li         StartOfLine = Code.drop_front();
240*67e74705SXin Li       }
241*67e74705SXin Li       Code = Code.drop_front();
242*67e74705SXin Li     }
243*67e74705SXin Li   }
244*67e74705SXin Li 
currentLocation()245*67e74705SXin Li   SourceLocation currentLocation() {
246*67e74705SXin Li     SourceLocation Location;
247*67e74705SXin Li     Location.Line = Line;
248*67e74705SXin Li     Location.Column = Code.data() - StartOfLine.data() + 1;
249*67e74705SXin Li     return Location;
250*67e74705SXin Li   }
251*67e74705SXin Li 
252*67e74705SXin Li   StringRef Code;
253*67e74705SXin Li   StringRef StartOfLine;
254*67e74705SXin Li   unsigned Line;
255*67e74705SXin Li   Diagnostics *Error;
256*67e74705SXin Li   TokenInfo NextToken;
257*67e74705SXin Li   const char *CodeCompletionLocation;
258*67e74705SXin Li };
259*67e74705SXin Li 
~Sema()260*67e74705SXin Li Parser::Sema::~Sema() {}
261*67e74705SXin Li 
getAcceptedCompletionTypes(llvm::ArrayRef<std::pair<MatcherCtor,unsigned>> Context)262*67e74705SXin Li std::vector<ArgKind> Parser::Sema::getAcceptedCompletionTypes(
263*67e74705SXin Li     llvm::ArrayRef<std::pair<MatcherCtor, unsigned>> Context) {
264*67e74705SXin Li   return std::vector<ArgKind>();
265*67e74705SXin Li }
266*67e74705SXin Li 
267*67e74705SXin Li std::vector<MatcherCompletion>
getMatcherCompletions(llvm::ArrayRef<ArgKind> AcceptedTypes)268*67e74705SXin Li Parser::Sema::getMatcherCompletions(llvm::ArrayRef<ArgKind> AcceptedTypes) {
269*67e74705SXin Li   return std::vector<MatcherCompletion>();
270*67e74705SXin Li }
271*67e74705SXin Li 
272*67e74705SXin Li struct Parser::ScopedContextEntry {
273*67e74705SXin Li   Parser *P;
274*67e74705SXin Li 
ScopedContextEntryclang::ast_matchers::dynamic::Parser::ScopedContextEntry275*67e74705SXin Li   ScopedContextEntry(Parser *P, MatcherCtor C) : P(P) {
276*67e74705SXin Li     P->ContextStack.push_back(std::make_pair(C, 0u));
277*67e74705SXin Li   }
278*67e74705SXin Li 
~ScopedContextEntryclang::ast_matchers::dynamic::Parser::ScopedContextEntry279*67e74705SXin Li   ~ScopedContextEntry() {
280*67e74705SXin Li     P->ContextStack.pop_back();
281*67e74705SXin Li   }
282*67e74705SXin Li 
nextArgclang::ast_matchers::dynamic::Parser::ScopedContextEntry283*67e74705SXin Li   void nextArg() {
284*67e74705SXin Li     ++P->ContextStack.back().second;
285*67e74705SXin Li   }
286*67e74705SXin Li };
287*67e74705SXin Li 
288*67e74705SXin Li /// \brief Parse expressions that start with an identifier.
289*67e74705SXin Li ///
290*67e74705SXin Li /// This function can parse named values and matchers.
291*67e74705SXin Li /// In case of failure it will try to determine the user's intent to give
292*67e74705SXin Li /// an appropriate error message.
parseIdentifierPrefixImpl(VariantValue * Value)293*67e74705SXin Li bool Parser::parseIdentifierPrefixImpl(VariantValue *Value) {
294*67e74705SXin Li   const TokenInfo NameToken = Tokenizer->consumeNextToken();
295*67e74705SXin Li 
296*67e74705SXin Li   if (Tokenizer->nextTokenKind() != TokenInfo::TK_OpenParen) {
297*67e74705SXin Li     // Parse as a named value.
298*67e74705SXin Li     if (const VariantValue NamedValue =
299*67e74705SXin Li             NamedValues ? NamedValues->lookup(NameToken.Text)
300*67e74705SXin Li                         : VariantValue()) {
301*67e74705SXin Li       *Value = NamedValue;
302*67e74705SXin Li       return true;
303*67e74705SXin Li     }
304*67e74705SXin Li     // If the syntax is correct and the name is not a matcher either, report
305*67e74705SXin Li     // unknown named value.
306*67e74705SXin Li     if ((Tokenizer->nextTokenKind() == TokenInfo::TK_Comma ||
307*67e74705SXin Li          Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen ||
308*67e74705SXin Li          Tokenizer->nextTokenKind() == TokenInfo::TK_Eof) &&
309*67e74705SXin Li         !S->lookupMatcherCtor(NameToken.Text)) {
310*67e74705SXin Li       Error->addError(NameToken.Range, Error->ET_RegistryValueNotFound)
311*67e74705SXin Li           << NameToken.Text;
312*67e74705SXin Li       return false;
313*67e74705SXin Li     }
314*67e74705SXin Li     // Otherwise, fallback to the matcher parser.
315*67e74705SXin Li   }
316*67e74705SXin Li 
317*67e74705SXin Li   // Parse as a matcher expression.
318*67e74705SXin Li   return parseMatcherExpressionImpl(NameToken, Value);
319*67e74705SXin Li }
320*67e74705SXin Li 
321*67e74705SXin Li /// \brief Parse and validate a matcher expression.
322*67e74705SXin Li /// \return \c true on success, in which case \c Value has the matcher parsed.
323*67e74705SXin Li ///   If the input is malformed, or some argument has an error, it
324*67e74705SXin Li ///   returns \c false.
parseMatcherExpressionImpl(const TokenInfo & NameToken,VariantValue * Value)325*67e74705SXin Li bool Parser::parseMatcherExpressionImpl(const TokenInfo &NameToken,
326*67e74705SXin Li                                         VariantValue *Value) {
327*67e74705SXin Li   assert(NameToken.Kind == TokenInfo::TK_Ident);
328*67e74705SXin Li   const TokenInfo OpenToken = Tokenizer->consumeNextToken();
329*67e74705SXin Li   if (OpenToken.Kind != TokenInfo::TK_OpenParen) {
330*67e74705SXin Li     Error->addError(OpenToken.Range, Error->ET_ParserNoOpenParen)
331*67e74705SXin Li         << OpenToken.Text;
332*67e74705SXin Li     return false;
333*67e74705SXin Li   }
334*67e74705SXin Li 
335*67e74705SXin Li   llvm::Optional<MatcherCtor> Ctor = S->lookupMatcherCtor(NameToken.Text);
336*67e74705SXin Li 
337*67e74705SXin Li   if (!Ctor) {
338*67e74705SXin Li     Error->addError(NameToken.Range, Error->ET_RegistryMatcherNotFound)
339*67e74705SXin Li         << NameToken.Text;
340*67e74705SXin Li     // Do not return here. We need to continue to give completion suggestions.
341*67e74705SXin Li   }
342*67e74705SXin Li 
343*67e74705SXin Li   std::vector<ParserValue> Args;
344*67e74705SXin Li   TokenInfo EndToken;
345*67e74705SXin Li 
346*67e74705SXin Li   {
347*67e74705SXin Li     ScopedContextEntry SCE(this, Ctor ? *Ctor : nullptr);
348*67e74705SXin Li 
349*67e74705SXin Li     while (Tokenizer->nextTokenKind() != TokenInfo::TK_Eof) {
350*67e74705SXin Li       if (Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen) {
351*67e74705SXin Li         // End of args.
352*67e74705SXin Li         EndToken = Tokenizer->consumeNextToken();
353*67e74705SXin Li         break;
354*67e74705SXin Li       }
355*67e74705SXin Li       if (Args.size() > 0) {
356*67e74705SXin Li         // We must find a , token to continue.
357*67e74705SXin Li         const TokenInfo CommaToken = Tokenizer->consumeNextToken();
358*67e74705SXin Li         if (CommaToken.Kind != TokenInfo::TK_Comma) {
359*67e74705SXin Li           Error->addError(CommaToken.Range, Error->ET_ParserNoComma)
360*67e74705SXin Li               << CommaToken.Text;
361*67e74705SXin Li           return false;
362*67e74705SXin Li         }
363*67e74705SXin Li       }
364*67e74705SXin Li 
365*67e74705SXin Li       Diagnostics::Context Ctx(Diagnostics::Context::MatcherArg, Error,
366*67e74705SXin Li                                NameToken.Text, NameToken.Range,
367*67e74705SXin Li                                Args.size() + 1);
368*67e74705SXin Li       ParserValue ArgValue;
369*67e74705SXin Li       ArgValue.Text = Tokenizer->peekNextToken().Text;
370*67e74705SXin Li       ArgValue.Range = Tokenizer->peekNextToken().Range;
371*67e74705SXin Li       if (!parseExpressionImpl(&ArgValue.Value)) {
372*67e74705SXin Li         return false;
373*67e74705SXin Li       }
374*67e74705SXin Li 
375*67e74705SXin Li       Args.push_back(ArgValue);
376*67e74705SXin Li       SCE.nextArg();
377*67e74705SXin Li     }
378*67e74705SXin Li   }
379*67e74705SXin Li 
380*67e74705SXin Li   if (EndToken.Kind == TokenInfo::TK_Eof) {
381*67e74705SXin Li     Error->addError(OpenToken.Range, Error->ET_ParserNoCloseParen);
382*67e74705SXin Li     return false;
383*67e74705SXin Li   }
384*67e74705SXin Li 
385*67e74705SXin Li   std::string BindID;
386*67e74705SXin Li   if (Tokenizer->peekNextToken().Kind == TokenInfo::TK_Period) {
387*67e74705SXin Li     // Parse .bind("foo")
388*67e74705SXin Li     Tokenizer->consumeNextToken();  // consume the period.
389*67e74705SXin Li     const TokenInfo BindToken = Tokenizer->consumeNextToken();
390*67e74705SXin Li     if (BindToken.Kind == TokenInfo::TK_CodeCompletion) {
391*67e74705SXin Li       addCompletion(BindToken, MatcherCompletion("bind(\"", "bind", 1));
392*67e74705SXin Li       return false;
393*67e74705SXin Li     }
394*67e74705SXin Li 
395*67e74705SXin Li     const TokenInfo OpenToken = Tokenizer->consumeNextToken();
396*67e74705SXin Li     const TokenInfo IDToken = Tokenizer->consumeNextToken();
397*67e74705SXin Li     const TokenInfo CloseToken = Tokenizer->consumeNextToken();
398*67e74705SXin Li 
399*67e74705SXin Li     // TODO: We could use different error codes for each/some to be more
400*67e74705SXin Li     //       explicit about the syntax error.
401*67e74705SXin Li     if (BindToken.Kind != TokenInfo::TK_Ident ||
402*67e74705SXin Li         BindToken.Text != TokenInfo::ID_Bind) {
403*67e74705SXin Li       Error->addError(BindToken.Range, Error->ET_ParserMalformedBindExpr);
404*67e74705SXin Li       return false;
405*67e74705SXin Li     }
406*67e74705SXin Li     if (OpenToken.Kind != TokenInfo::TK_OpenParen) {
407*67e74705SXin Li       Error->addError(OpenToken.Range, Error->ET_ParserMalformedBindExpr);
408*67e74705SXin Li       return false;
409*67e74705SXin Li     }
410*67e74705SXin Li     if (IDToken.Kind != TokenInfo::TK_Literal || !IDToken.Value.isString()) {
411*67e74705SXin Li       Error->addError(IDToken.Range, Error->ET_ParserMalformedBindExpr);
412*67e74705SXin Li       return false;
413*67e74705SXin Li     }
414*67e74705SXin Li     if (CloseToken.Kind != TokenInfo::TK_CloseParen) {
415*67e74705SXin Li       Error->addError(CloseToken.Range, Error->ET_ParserMalformedBindExpr);
416*67e74705SXin Li       return false;
417*67e74705SXin Li     }
418*67e74705SXin Li     BindID = IDToken.Value.getString();
419*67e74705SXin Li   }
420*67e74705SXin Li 
421*67e74705SXin Li   if (!Ctor)
422*67e74705SXin Li     return false;
423*67e74705SXin Li 
424*67e74705SXin Li   // Merge the start and end infos.
425*67e74705SXin Li   Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error,
426*67e74705SXin Li                            NameToken.Text, NameToken.Range);
427*67e74705SXin Li   SourceRange MatcherRange = NameToken.Range;
428*67e74705SXin Li   MatcherRange.End = EndToken.Range.End;
429*67e74705SXin Li   VariantMatcher Result = S->actOnMatcherExpression(
430*67e74705SXin Li       *Ctor, MatcherRange, BindID, Args, Error);
431*67e74705SXin Li   if (Result.isNull()) return false;
432*67e74705SXin Li 
433*67e74705SXin Li   *Value = Result;
434*67e74705SXin Li   return true;
435*67e74705SXin Li }
436*67e74705SXin Li 
437*67e74705SXin Li // If the prefix of this completion matches the completion token, add it to
438*67e74705SXin Li // Completions minus the prefix.
addCompletion(const TokenInfo & CompToken,const MatcherCompletion & Completion)439*67e74705SXin Li void Parser::addCompletion(const TokenInfo &CompToken,
440*67e74705SXin Li                            const MatcherCompletion& Completion) {
441*67e74705SXin Li   if (StringRef(Completion.TypedText).startswith(CompToken.Text) &&
442*67e74705SXin Li       Completion.Specificity > 0) {
443*67e74705SXin Li     Completions.emplace_back(Completion.TypedText.substr(CompToken.Text.size()),
444*67e74705SXin Li                              Completion.MatcherDecl, Completion.Specificity);
445*67e74705SXin Li   }
446*67e74705SXin Li }
447*67e74705SXin Li 
getNamedValueCompletions(ArrayRef<ArgKind> AcceptedTypes)448*67e74705SXin Li std::vector<MatcherCompletion> Parser::getNamedValueCompletions(
449*67e74705SXin Li     ArrayRef<ArgKind> AcceptedTypes) {
450*67e74705SXin Li   if (!NamedValues) return std::vector<MatcherCompletion>();
451*67e74705SXin Li   std::vector<MatcherCompletion> Result;
452*67e74705SXin Li   for (const auto &Entry : *NamedValues) {
453*67e74705SXin Li     unsigned Specificity;
454*67e74705SXin Li     if (Entry.getValue().isConvertibleTo(AcceptedTypes, &Specificity)) {
455*67e74705SXin Li       std::string Decl =
456*67e74705SXin Li           (Entry.getValue().getTypeAsString() + " " + Entry.getKey()).str();
457*67e74705SXin Li       Result.emplace_back(Entry.getKey(), Decl, Specificity);
458*67e74705SXin Li     }
459*67e74705SXin Li   }
460*67e74705SXin Li   return Result;
461*67e74705SXin Li }
462*67e74705SXin Li 
addExpressionCompletions()463*67e74705SXin Li void Parser::addExpressionCompletions() {
464*67e74705SXin Li   const TokenInfo CompToken = Tokenizer->consumeNextToken();
465*67e74705SXin Li   assert(CompToken.Kind == TokenInfo::TK_CodeCompletion);
466*67e74705SXin Li 
467*67e74705SXin Li   // We cannot complete code if there is an invalid element on the context
468*67e74705SXin Li   // stack.
469*67e74705SXin Li   for (ContextStackTy::iterator I = ContextStack.begin(),
470*67e74705SXin Li                                 E = ContextStack.end();
471*67e74705SXin Li        I != E; ++I) {
472*67e74705SXin Li     if (!I->first)
473*67e74705SXin Li       return;
474*67e74705SXin Li   }
475*67e74705SXin Li 
476*67e74705SXin Li   auto AcceptedTypes = S->getAcceptedCompletionTypes(ContextStack);
477*67e74705SXin Li   for (const auto &Completion : S->getMatcherCompletions(AcceptedTypes)) {
478*67e74705SXin Li     addCompletion(CompToken, Completion);
479*67e74705SXin Li   }
480*67e74705SXin Li 
481*67e74705SXin Li   for (const auto &Completion : getNamedValueCompletions(AcceptedTypes)) {
482*67e74705SXin Li     addCompletion(CompToken, Completion);
483*67e74705SXin Li   }
484*67e74705SXin Li }
485*67e74705SXin Li 
486*67e74705SXin Li /// \brief Parse an <Expresssion>
parseExpressionImpl(VariantValue * Value)487*67e74705SXin Li bool Parser::parseExpressionImpl(VariantValue *Value) {
488*67e74705SXin Li   switch (Tokenizer->nextTokenKind()) {
489*67e74705SXin Li   case TokenInfo::TK_Literal:
490*67e74705SXin Li     *Value = Tokenizer->consumeNextToken().Value;
491*67e74705SXin Li     return true;
492*67e74705SXin Li 
493*67e74705SXin Li   case TokenInfo::TK_Ident:
494*67e74705SXin Li     return parseIdentifierPrefixImpl(Value);
495*67e74705SXin Li 
496*67e74705SXin Li   case TokenInfo::TK_CodeCompletion:
497*67e74705SXin Li     addExpressionCompletions();
498*67e74705SXin Li     return false;
499*67e74705SXin Li 
500*67e74705SXin Li   case TokenInfo::TK_Eof:
501*67e74705SXin Li     Error->addError(Tokenizer->consumeNextToken().Range,
502*67e74705SXin Li                     Error->ET_ParserNoCode);
503*67e74705SXin Li     return false;
504*67e74705SXin Li 
505*67e74705SXin Li   case TokenInfo::TK_Error:
506*67e74705SXin Li     // This error was already reported by the tokenizer.
507*67e74705SXin Li     return false;
508*67e74705SXin Li 
509*67e74705SXin Li   case TokenInfo::TK_OpenParen:
510*67e74705SXin Li   case TokenInfo::TK_CloseParen:
511*67e74705SXin Li   case TokenInfo::TK_Comma:
512*67e74705SXin Li   case TokenInfo::TK_Period:
513*67e74705SXin Li   case TokenInfo::TK_InvalidChar:
514*67e74705SXin Li     const TokenInfo Token = Tokenizer->consumeNextToken();
515*67e74705SXin Li     Error->addError(Token.Range, Error->ET_ParserInvalidToken) << Token.Text;
516*67e74705SXin Li     return false;
517*67e74705SXin Li   }
518*67e74705SXin Li 
519*67e74705SXin Li   llvm_unreachable("Unknown token kind.");
520*67e74705SXin Li }
521*67e74705SXin Li 
522*67e74705SXin Li static llvm::ManagedStatic<Parser::RegistrySema> DefaultRegistrySema;
523*67e74705SXin Li 
Parser(CodeTokenizer * Tokenizer,Sema * S,const NamedValueMap * NamedValues,Diagnostics * Error)524*67e74705SXin Li Parser::Parser(CodeTokenizer *Tokenizer, Sema *S,
525*67e74705SXin Li                const NamedValueMap *NamedValues, Diagnostics *Error)
526*67e74705SXin Li     : Tokenizer(Tokenizer), S(S ? S : &*DefaultRegistrySema),
527*67e74705SXin Li       NamedValues(NamedValues), Error(Error) {}
528*67e74705SXin Li 
~RegistrySema()529*67e74705SXin Li Parser::RegistrySema::~RegistrySema() {}
530*67e74705SXin Li 
531*67e74705SXin Li llvm::Optional<MatcherCtor>
lookupMatcherCtor(StringRef MatcherName)532*67e74705SXin Li Parser::RegistrySema::lookupMatcherCtor(StringRef MatcherName) {
533*67e74705SXin Li   return Registry::lookupMatcherCtor(MatcherName);
534*67e74705SXin Li }
535*67e74705SXin Li 
actOnMatcherExpression(MatcherCtor Ctor,SourceRange NameRange,StringRef BindID,ArrayRef<ParserValue> Args,Diagnostics * Error)536*67e74705SXin Li VariantMatcher Parser::RegistrySema::actOnMatcherExpression(
537*67e74705SXin Li     MatcherCtor Ctor, SourceRange NameRange, StringRef BindID,
538*67e74705SXin Li     ArrayRef<ParserValue> Args, Diagnostics *Error) {
539*67e74705SXin Li   if (BindID.empty()) {
540*67e74705SXin Li     return Registry::constructMatcher(Ctor, NameRange, Args, Error);
541*67e74705SXin Li   } else {
542*67e74705SXin Li     return Registry::constructBoundMatcher(Ctor, NameRange, BindID, Args,
543*67e74705SXin Li                                            Error);
544*67e74705SXin Li   }
545*67e74705SXin Li }
546*67e74705SXin Li 
getAcceptedCompletionTypes(ArrayRef<std::pair<MatcherCtor,unsigned>> Context)547*67e74705SXin Li std::vector<ArgKind> Parser::RegistrySema::getAcceptedCompletionTypes(
548*67e74705SXin Li     ArrayRef<std::pair<MatcherCtor, unsigned>> Context) {
549*67e74705SXin Li   return Registry::getAcceptedCompletionTypes(Context);
550*67e74705SXin Li }
551*67e74705SXin Li 
getMatcherCompletions(ArrayRef<ArgKind> AcceptedTypes)552*67e74705SXin Li std::vector<MatcherCompletion> Parser::RegistrySema::getMatcherCompletions(
553*67e74705SXin Li     ArrayRef<ArgKind> AcceptedTypes) {
554*67e74705SXin Li   return Registry::getMatcherCompletions(AcceptedTypes);
555*67e74705SXin Li }
556*67e74705SXin Li 
parseExpression(StringRef Code,Sema * S,const NamedValueMap * NamedValues,VariantValue * Value,Diagnostics * Error)557*67e74705SXin Li bool Parser::parseExpression(StringRef Code, Sema *S,
558*67e74705SXin Li                              const NamedValueMap *NamedValues,
559*67e74705SXin Li                              VariantValue *Value, Diagnostics *Error) {
560*67e74705SXin Li   CodeTokenizer Tokenizer(Code, Error);
561*67e74705SXin Li   if (!Parser(&Tokenizer, S, NamedValues, Error).parseExpressionImpl(Value))
562*67e74705SXin Li     return false;
563*67e74705SXin Li   if (Tokenizer.peekNextToken().Kind != TokenInfo::TK_Eof) {
564*67e74705SXin Li     Error->addError(Tokenizer.peekNextToken().Range,
565*67e74705SXin Li                     Error->ET_ParserTrailingCode);
566*67e74705SXin Li     return false;
567*67e74705SXin Li   }
568*67e74705SXin Li   return true;
569*67e74705SXin Li }
570*67e74705SXin Li 
571*67e74705SXin Li std::vector<MatcherCompletion>
completeExpression(StringRef Code,unsigned CompletionOffset,Sema * S,const NamedValueMap * NamedValues)572*67e74705SXin Li Parser::completeExpression(StringRef Code, unsigned CompletionOffset, Sema *S,
573*67e74705SXin Li                            const NamedValueMap *NamedValues) {
574*67e74705SXin Li   Diagnostics Error;
575*67e74705SXin Li   CodeTokenizer Tokenizer(Code, &Error, CompletionOffset);
576*67e74705SXin Li   Parser P(&Tokenizer, S, NamedValues, &Error);
577*67e74705SXin Li   VariantValue Dummy;
578*67e74705SXin Li   P.parseExpressionImpl(&Dummy);
579*67e74705SXin Li 
580*67e74705SXin Li   // Sort by specificity, then by name.
581*67e74705SXin Li   std::sort(P.Completions.begin(), P.Completions.end(),
582*67e74705SXin Li             [](const MatcherCompletion &A, const MatcherCompletion &B) {
583*67e74705SXin Li     if (A.Specificity != B.Specificity)
584*67e74705SXin Li       return A.Specificity > B.Specificity;
585*67e74705SXin Li     return A.TypedText < B.TypedText;
586*67e74705SXin Li   });
587*67e74705SXin Li 
588*67e74705SXin Li   return P.Completions;
589*67e74705SXin Li }
590*67e74705SXin Li 
591*67e74705SXin Li llvm::Optional<DynTypedMatcher>
parseMatcherExpression(StringRef Code,Sema * S,const NamedValueMap * NamedValues,Diagnostics * Error)592*67e74705SXin Li Parser::parseMatcherExpression(StringRef Code, Sema *S,
593*67e74705SXin Li                                const NamedValueMap *NamedValues,
594*67e74705SXin Li                                Diagnostics *Error) {
595*67e74705SXin Li   VariantValue Value;
596*67e74705SXin Li   if (!parseExpression(Code, S, NamedValues, &Value, Error))
597*67e74705SXin Li     return llvm::Optional<DynTypedMatcher>();
598*67e74705SXin Li   if (!Value.isMatcher()) {
599*67e74705SXin Li     Error->addError(SourceRange(), Error->ET_ParserNotAMatcher);
600*67e74705SXin Li     return llvm::Optional<DynTypedMatcher>();
601*67e74705SXin Li   }
602*67e74705SXin Li   llvm::Optional<DynTypedMatcher> Result =
603*67e74705SXin Li       Value.getMatcher().getSingleMatcher();
604*67e74705SXin Li   if (!Result.hasValue()) {
605*67e74705SXin Li     Error->addError(SourceRange(), Error->ET_ParserOverloadedType)
606*67e74705SXin Li         << Value.getTypeAsString();
607*67e74705SXin Li   }
608*67e74705SXin Li   return Result;
609*67e74705SXin Li }
610*67e74705SXin Li 
611*67e74705SXin Li }  // namespace dynamic
612*67e74705SXin Li }  // namespace ast_matchers
613*67e74705SXin Li }  // namespace clang
614