1*67e74705SXin Li //===--- FormatTokenLexer.h - Format C++ code ----------------*- C++ ----*-===// 2*67e74705SXin Li // 3*67e74705SXin Li // The LLVM Compiler Infrastructure 4*67e74705SXin Li // 5*67e74705SXin Li // This file is distributed under the University of Illinois Open Source 6*67e74705SXin Li // License. See LICENSE.TXT for details. 7*67e74705SXin Li // 8*67e74705SXin Li //===----------------------------------------------------------------------===// 9*67e74705SXin Li /// 10*67e74705SXin Li /// \file 11*67e74705SXin Li /// \brief This file contains FormatTokenLexer, which tokenizes a source file 12*67e74705SXin Li /// into a token stream suitable for ClangFormat. 13*67e74705SXin Li /// 14*67e74705SXin Li //===----------------------------------------------------------------------===// 15*67e74705SXin Li 16*67e74705SXin Li #ifndef LLVM_CLANG_LIB_FORMAT_FORMATTOKENLEXER_H 17*67e74705SXin Li #define LLVM_CLANG_LIB_FORMAT_FORMATTOKENLEXER_H 18*67e74705SXin Li 19*67e74705SXin Li #include "Encoding.h" 20*67e74705SXin Li #include "FormatToken.h" 21*67e74705SXin Li #include "clang/Basic/SourceLocation.h" 22*67e74705SXin Li #include "clang/Basic/SourceManager.h" 23*67e74705SXin Li #include "clang/Format/Format.h" 24*67e74705SXin Li #include "llvm/Support/Regex.h" 25*67e74705SXin Li 26*67e74705SXin Li namespace clang { 27*67e74705SXin Li namespace format { 28*67e74705SXin Li 29*67e74705SXin Li class FormatTokenLexer { 30*67e74705SXin Li public: 31*67e74705SXin Li FormatTokenLexer(const SourceManager &SourceMgr, FileID ID, 32*67e74705SXin Li const FormatStyle &Style, encoding::Encoding Encoding); 33*67e74705SXin Li 34*67e74705SXin Li ArrayRef<FormatToken *> lex(); 35*67e74705SXin Li getKeywords()36*67e74705SXin Li const AdditionalKeywords &getKeywords() { return Keywords; } 37*67e74705SXin Li 38*67e74705SXin Li private: 39*67e74705SXin Li void tryMergePreviousTokens(); 40*67e74705SXin Li 41*67e74705SXin Li bool tryMergeLessLess(); 42*67e74705SXin Li 43*67e74705SXin Li bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds, TokenType NewType); 44*67e74705SXin Li 45*67e74705SXin Li // Returns \c true if \p Tok can only be followed by an operand in JavaScript. 46*67e74705SXin Li bool precedesOperand(FormatToken *Tok); 47*67e74705SXin Li 48*67e74705SXin Li bool canPrecedeRegexLiteral(FormatToken *Prev); 49*67e74705SXin Li 50*67e74705SXin Li // Tries to parse a JavaScript Regex literal starting at the current token, 51*67e74705SXin Li // if that begins with a slash and is in a location where JavaScript allows 52*67e74705SXin Li // regex literals. Changes the current token to a regex literal and updates 53*67e74705SXin Li // its text if successful. 54*67e74705SXin Li void tryParseJSRegexLiteral(); 55*67e74705SXin Li 56*67e74705SXin Li void tryParseTemplateString(); 57*67e74705SXin Li 58*67e74705SXin Li bool tryMerge_TMacro(); 59*67e74705SXin Li 60*67e74705SXin Li bool tryMergeConflictMarkers(); 61*67e74705SXin Li 62*67e74705SXin Li FormatToken *getStashedToken(); 63*67e74705SXin Li 64*67e74705SXin Li FormatToken *getNextToken(); 65*67e74705SXin Li 66*67e74705SXin Li FormatToken *FormatTok; 67*67e74705SXin Li bool IsFirstToken; 68*67e74705SXin Li bool GreaterStashed, LessStashed; 69*67e74705SXin Li unsigned Column; 70*67e74705SXin Li unsigned TrailingWhitespace; 71*67e74705SXin Li std::unique_ptr<Lexer> Lex; 72*67e74705SXin Li const SourceManager &SourceMgr; 73*67e74705SXin Li FileID ID; 74*67e74705SXin Li const FormatStyle &Style; 75*67e74705SXin Li IdentifierTable IdentTable; 76*67e74705SXin Li AdditionalKeywords Keywords; 77*67e74705SXin Li encoding::Encoding Encoding; 78*67e74705SXin Li llvm::SpecificBumpPtrAllocator<FormatToken> Allocator; 79*67e74705SXin Li // Index (in 'Tokens') of the last token that starts a new line. 80*67e74705SXin Li unsigned FirstInLineIndex; 81*67e74705SXin Li SmallVector<FormatToken *, 16> Tokens; 82*67e74705SXin Li SmallVector<IdentifierInfo *, 8> ForEachMacros; 83*67e74705SXin Li 84*67e74705SXin Li bool FormattingDisabled; 85*67e74705SXin Li 86*67e74705SXin Li llvm::Regex MacroBlockBeginRegex; 87*67e74705SXin Li llvm::Regex MacroBlockEndRegex; 88*67e74705SXin Li 89*67e74705SXin Li void readRawToken(FormatToken &Tok); 90*67e74705SXin Li 91*67e74705SXin Li void resetLexer(unsigned Offset); 92*67e74705SXin Li }; 93*67e74705SXin Li 94*67e74705SXin Li } // namespace format 95*67e74705SXin Li } // namespace clang 96*67e74705SXin Li 97*67e74705SXin Li #endif 98