xref: /aosp_15_r20/external/clang/lib/Format/FormatTokenLexer.h (revision 67e74705e28f6214e480b399dd47ea732279e315)
1*67e74705SXin Li //===--- FormatTokenLexer.h - Format C++ code ----------------*- C++ ----*-===//
2*67e74705SXin Li //
3*67e74705SXin Li //                     The LLVM Compiler Infrastructure
4*67e74705SXin Li //
5*67e74705SXin Li // This file is distributed under the University of Illinois Open Source
6*67e74705SXin Li // License. See LICENSE.TXT for details.
7*67e74705SXin Li //
8*67e74705SXin Li //===----------------------------------------------------------------------===//
9*67e74705SXin Li ///
10*67e74705SXin Li /// \file
11*67e74705SXin Li /// \brief This file contains FormatTokenLexer, which tokenizes a source file
12*67e74705SXin Li /// into a token stream suitable for ClangFormat.
13*67e74705SXin Li ///
14*67e74705SXin Li //===----------------------------------------------------------------------===//
15*67e74705SXin Li 
16*67e74705SXin Li #ifndef LLVM_CLANG_LIB_FORMAT_FORMATTOKENLEXER_H
17*67e74705SXin Li #define LLVM_CLANG_LIB_FORMAT_FORMATTOKENLEXER_H
18*67e74705SXin Li 
19*67e74705SXin Li #include "Encoding.h"
20*67e74705SXin Li #include "FormatToken.h"
21*67e74705SXin Li #include "clang/Basic/SourceLocation.h"
22*67e74705SXin Li #include "clang/Basic/SourceManager.h"
23*67e74705SXin Li #include "clang/Format/Format.h"
24*67e74705SXin Li #include "llvm/Support/Regex.h"
25*67e74705SXin Li 
26*67e74705SXin Li namespace clang {
27*67e74705SXin Li namespace format {
28*67e74705SXin Li 
29*67e74705SXin Li class FormatTokenLexer {
30*67e74705SXin Li public:
31*67e74705SXin Li   FormatTokenLexer(const SourceManager &SourceMgr, FileID ID,
32*67e74705SXin Li                    const FormatStyle &Style, encoding::Encoding Encoding);
33*67e74705SXin Li 
34*67e74705SXin Li   ArrayRef<FormatToken *> lex();
35*67e74705SXin Li 
getKeywords()36*67e74705SXin Li   const AdditionalKeywords &getKeywords() { return Keywords; }
37*67e74705SXin Li 
38*67e74705SXin Li private:
39*67e74705SXin Li   void tryMergePreviousTokens();
40*67e74705SXin Li 
41*67e74705SXin Li   bool tryMergeLessLess();
42*67e74705SXin Li 
43*67e74705SXin Li   bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds, TokenType NewType);
44*67e74705SXin Li 
45*67e74705SXin Li   // Returns \c true if \p Tok can only be followed by an operand in JavaScript.
46*67e74705SXin Li   bool precedesOperand(FormatToken *Tok);
47*67e74705SXin Li 
48*67e74705SXin Li   bool canPrecedeRegexLiteral(FormatToken *Prev);
49*67e74705SXin Li 
50*67e74705SXin Li   // Tries to parse a JavaScript Regex literal starting at the current token,
51*67e74705SXin Li   // if that begins with a slash and is in a location where JavaScript allows
52*67e74705SXin Li   // regex literals. Changes the current token to a regex literal and updates
53*67e74705SXin Li   // its text if successful.
54*67e74705SXin Li   void tryParseJSRegexLiteral();
55*67e74705SXin Li 
56*67e74705SXin Li   void tryParseTemplateString();
57*67e74705SXin Li 
58*67e74705SXin Li   bool tryMerge_TMacro();
59*67e74705SXin Li 
60*67e74705SXin Li   bool tryMergeConflictMarkers();
61*67e74705SXin Li 
62*67e74705SXin Li   FormatToken *getStashedToken();
63*67e74705SXin Li 
64*67e74705SXin Li   FormatToken *getNextToken();
65*67e74705SXin Li 
66*67e74705SXin Li   FormatToken *FormatTok;
67*67e74705SXin Li   bool IsFirstToken;
68*67e74705SXin Li   bool GreaterStashed, LessStashed;
69*67e74705SXin Li   unsigned Column;
70*67e74705SXin Li   unsigned TrailingWhitespace;
71*67e74705SXin Li   std::unique_ptr<Lexer> Lex;
72*67e74705SXin Li   const SourceManager &SourceMgr;
73*67e74705SXin Li   FileID ID;
74*67e74705SXin Li   const FormatStyle &Style;
75*67e74705SXin Li   IdentifierTable IdentTable;
76*67e74705SXin Li   AdditionalKeywords Keywords;
77*67e74705SXin Li   encoding::Encoding Encoding;
78*67e74705SXin Li   llvm::SpecificBumpPtrAllocator<FormatToken> Allocator;
79*67e74705SXin Li   // Index (in 'Tokens') of the last token that starts a new line.
80*67e74705SXin Li   unsigned FirstInLineIndex;
81*67e74705SXin Li   SmallVector<FormatToken *, 16> Tokens;
82*67e74705SXin Li   SmallVector<IdentifierInfo *, 8> ForEachMacros;
83*67e74705SXin Li 
84*67e74705SXin Li   bool FormattingDisabled;
85*67e74705SXin Li 
86*67e74705SXin Li   llvm::Regex MacroBlockBeginRegex;
87*67e74705SXin Li   llvm::Regex MacroBlockEndRegex;
88*67e74705SXin Li 
89*67e74705SXin Li   void readRawToken(FormatToken &Tok);
90*67e74705SXin Li 
91*67e74705SXin Li   void resetLexer(unsigned Offset);
92*67e74705SXin Li };
93*67e74705SXin Li 
94*67e74705SXin Li } // namespace format
95*67e74705SXin Li } // namespace clang
96*67e74705SXin Li 
97*67e74705SXin Li #endif
98