1*67e74705SXin Li //===--- WhitespaceManager.h - Format C++ code ------------------*- C++ -*-===// 2*67e74705SXin Li // 3*67e74705SXin Li // The LLVM Compiler Infrastructure 4*67e74705SXin Li // 5*67e74705SXin Li // This file is distributed under the University of Illinois Open Source 6*67e74705SXin Li // License. See LICENSE.TXT for details. 7*67e74705SXin Li // 8*67e74705SXin Li //===----------------------------------------------------------------------===// 9*67e74705SXin Li /// 10*67e74705SXin Li /// \file 11*67e74705SXin Li /// \brief WhitespaceManager class manages whitespace around tokens and their 12*67e74705SXin Li /// replacements. 13*67e74705SXin Li /// 14*67e74705SXin Li //===----------------------------------------------------------------------===// 15*67e74705SXin Li 16*67e74705SXin Li #ifndef LLVM_CLANG_LIB_FORMAT_WHITESPACEMANAGER_H 17*67e74705SXin Li #define LLVM_CLANG_LIB_FORMAT_WHITESPACEMANAGER_H 18*67e74705SXin Li 19*67e74705SXin Li #include "TokenAnnotator.h" 20*67e74705SXin Li #include "clang/Basic/SourceManager.h" 21*67e74705SXin Li #include "clang/Format/Format.h" 22*67e74705SXin Li #include <string> 23*67e74705SXin Li 24*67e74705SXin Li namespace clang { 25*67e74705SXin Li namespace format { 26*67e74705SXin Li 27*67e74705SXin Li /// \brief Manages the whitespaces around tokens and their replacements. 28*67e74705SXin Li /// 29*67e74705SXin Li /// This includes special handling for certain constructs, e.g. the alignment of 30*67e74705SXin Li /// trailing line comments. 31*67e74705SXin Li /// 32*67e74705SXin Li /// To guarantee correctness of alignment operations, the \c WhitespaceManager 33*67e74705SXin Li /// must be informed about every token in the source file; for each token, there 34*67e74705SXin Li /// must be exactly one call to either \c replaceWhitespace or 35*67e74705SXin Li /// \c addUntouchableToken. 36*67e74705SXin Li /// 37*67e74705SXin Li /// There may be multiple calls to \c breakToken for a given token. 38*67e74705SXin Li class WhitespaceManager { 39*67e74705SXin Li public: WhitespaceManager(const SourceManager & SourceMgr,const FormatStyle & Style,bool UseCRLF)40*67e74705SXin Li WhitespaceManager(const SourceManager &SourceMgr, const FormatStyle &Style, 41*67e74705SXin Li bool UseCRLF) 42*67e74705SXin Li : SourceMgr(SourceMgr), Style(Style), UseCRLF(UseCRLF) {} 43*67e74705SXin Li 44*67e74705SXin Li /// \brief Prepares the \c WhitespaceManager for another run. 45*67e74705SXin Li void reset(); 46*67e74705SXin Li 47*67e74705SXin Li /// \brief Replaces the whitespace in front of \p Tok. Only call once for 48*67e74705SXin Li /// each \c AnnotatedToken. 49*67e74705SXin Li void replaceWhitespace(FormatToken &Tok, unsigned Newlines, 50*67e74705SXin Li unsigned IndentLevel, unsigned Spaces, 51*67e74705SXin Li unsigned StartOfTokenColumn, 52*67e74705SXin Li bool InPPDirective = false); 53*67e74705SXin Li 54*67e74705SXin Li /// \brief Adds information about an unchangeable token's whitespace. 55*67e74705SXin Li /// 56*67e74705SXin Li /// Needs to be called for every token for which \c replaceWhitespace 57*67e74705SXin Li /// was not called. 58*67e74705SXin Li void addUntouchableToken(const FormatToken &Tok, bool InPPDirective); 59*67e74705SXin Li 60*67e74705SXin Li /// \brief Inserts or replaces whitespace in the middle of a token. 61*67e74705SXin Li /// 62*67e74705SXin Li /// Inserts \p PreviousPostfix, \p Newlines, \p Spaces and \p CurrentPrefix 63*67e74705SXin Li /// (in this order) at \p Offset inside \p Tok, replacing \p ReplaceChars 64*67e74705SXin Li /// characters. 65*67e74705SXin Li /// 66*67e74705SXin Li /// Note: \p Spaces can be negative to retain information about initial 67*67e74705SXin Li /// relative column offset between a line of a block comment and the start of 68*67e74705SXin Li /// the comment. This negative offset may be compensated by trailing comment 69*67e74705SXin Li /// alignment here. In all other cases negative \p Spaces will be truncated to 70*67e74705SXin Li /// 0. 71*67e74705SXin Li /// 72*67e74705SXin Li /// When \p InPPDirective is true, escaped newlines are inserted. \p Spaces is 73*67e74705SXin Li /// used to align backslashes correctly. 74*67e74705SXin Li void replaceWhitespaceInToken(const FormatToken &Tok, unsigned Offset, 75*67e74705SXin Li unsigned ReplaceChars, 76*67e74705SXin Li StringRef PreviousPostfix, 77*67e74705SXin Li StringRef CurrentPrefix, bool InPPDirective, 78*67e74705SXin Li unsigned Newlines, unsigned IndentLevel, 79*67e74705SXin Li int Spaces); 80*67e74705SXin Li 81*67e74705SXin Li /// \brief Returns all the \c Replacements created during formatting. 82*67e74705SXin Li const tooling::Replacements &generateReplacements(); 83*67e74705SXin Li 84*67e74705SXin Li /// \brief Represents a change before a token, a break inside a token, 85*67e74705SXin Li /// or the layout of an unchanged token (or whitespace within). 86*67e74705SXin Li struct Change { 87*67e74705SXin Li /// \brief Functor to sort changes in original source order. 88*67e74705SXin Li class IsBeforeInFile { 89*67e74705SXin Li public: IsBeforeInFileChange90*67e74705SXin Li IsBeforeInFile(const SourceManager &SourceMgr) : SourceMgr(SourceMgr) {} 91*67e74705SXin Li bool operator()(const Change &C1, const Change &C2) const; 92*67e74705SXin Li 93*67e74705SXin Li private: 94*67e74705SXin Li const SourceManager &SourceMgr; 95*67e74705SXin Li }; 96*67e74705SXin Li ChangeChange97*67e74705SXin Li Change() {} 98*67e74705SXin Li 99*67e74705SXin Li /// \brief Creates a \c Change. 100*67e74705SXin Li /// 101*67e74705SXin Li /// The generated \c Change will replace the characters at 102*67e74705SXin Li /// \p OriginalWhitespaceRange with a concatenation of 103*67e74705SXin Li /// \p PreviousLinePostfix, \p NewlinesBefore line breaks, \p Spaces spaces 104*67e74705SXin Li /// and \p CurrentLinePrefix. 105*67e74705SXin Li /// 106*67e74705SXin Li /// \p StartOfTokenColumn and \p InPPDirective will be used to lay out 107*67e74705SXin Li /// trailing comments and escaped newlines. 108*67e74705SXin Li Change(bool CreateReplacement, SourceRange OriginalWhitespaceRange, 109*67e74705SXin Li unsigned IndentLevel, int Spaces, unsigned StartOfTokenColumn, 110*67e74705SXin Li unsigned NewlinesBefore, StringRef PreviousLinePostfix, 111*67e74705SXin Li StringRef CurrentLinePrefix, tok::TokenKind Kind, 112*67e74705SXin Li bool ContinuesPPDirective, bool IsStartOfDeclName, 113*67e74705SXin Li bool IsInsideToken); 114*67e74705SXin Li 115*67e74705SXin Li bool CreateReplacement; 116*67e74705SXin Li // Changes might be in the middle of a token, so we cannot just keep the 117*67e74705SXin Li // FormatToken around to query its information. 118*67e74705SXin Li SourceRange OriginalWhitespaceRange; 119*67e74705SXin Li unsigned StartOfTokenColumn; 120*67e74705SXin Li unsigned NewlinesBefore; 121*67e74705SXin Li std::string PreviousLinePostfix; 122*67e74705SXin Li std::string CurrentLinePrefix; 123*67e74705SXin Li // The kind of the token whose whitespace this change replaces, or in which 124*67e74705SXin Li // this change inserts whitespace. 125*67e74705SXin Li // FIXME: Currently this is not set correctly for breaks inside comments, as 126*67e74705SXin Li // the \c BreakableToken is still doing its own alignment. 127*67e74705SXin Li tok::TokenKind Kind; 128*67e74705SXin Li bool ContinuesPPDirective; 129*67e74705SXin Li bool IsStartOfDeclName; 130*67e74705SXin Li 131*67e74705SXin Li // The number of nested blocks the token is in. This is used to add tabs 132*67e74705SXin Li // only for the indentation, and not for alignment, when 133*67e74705SXin Li // UseTab = US_ForIndentation. 134*67e74705SXin Li unsigned IndentLevel; 135*67e74705SXin Li 136*67e74705SXin Li // The number of spaces in front of the token or broken part of the token. 137*67e74705SXin Li // This will be adapted when aligning tokens. 138*67e74705SXin Li // Can be negative to retain information about the initial relative offset 139*67e74705SXin Li // of the lines in a block comment. This is used when aligning trailing 140*67e74705SXin Li // comments. Uncompensated negative offset is truncated to 0. 141*67e74705SXin Li int Spaces; 142*67e74705SXin Li 143*67e74705SXin Li // If this change is inside of a token but not at the start of the token or 144*67e74705SXin Li // directly after a newline. 145*67e74705SXin Li bool IsInsideToken; 146*67e74705SXin Li 147*67e74705SXin Li // \c IsTrailingComment, \c TokenLength, \c PreviousEndOfTokenColumn and 148*67e74705SXin Li // \c EscapedNewlineColumn will be calculated in 149*67e74705SXin Li // \c calculateLineBreakInformation. 150*67e74705SXin Li bool IsTrailingComment; 151*67e74705SXin Li unsigned TokenLength; 152*67e74705SXin Li unsigned PreviousEndOfTokenColumn; 153*67e74705SXin Li unsigned EscapedNewlineColumn; 154*67e74705SXin Li 155*67e74705SXin Li // These fields are used to retain correct relative line indentation in a 156*67e74705SXin Li // block comment when aligning trailing comments. 157*67e74705SXin Li // 158*67e74705SXin Li // If this Change represents a continuation of a block comment, 159*67e74705SXin Li // \c StartOfBlockComment is pointer to the first Change in the block 160*67e74705SXin Li // comment. \c IndentationOffset is a relative column offset to this 161*67e74705SXin Li // change, so that the correct column can be reconstructed at the end of 162*67e74705SXin Li // the alignment process. 163*67e74705SXin Li const Change *StartOfBlockComment; 164*67e74705SXin Li int IndentationOffset; 165*67e74705SXin Li }; 166*67e74705SXin Li 167*67e74705SXin Li private: 168*67e74705SXin Li /// \brief Calculate \c IsTrailingComment, \c TokenLength for the last tokens 169*67e74705SXin Li /// or token parts in a line and \c PreviousEndOfTokenColumn and 170*67e74705SXin Li /// \c EscapedNewlineColumn for the first tokens or token parts in a line. 171*67e74705SXin Li void calculateLineBreakInformation(); 172*67e74705SXin Li 173*67e74705SXin Li /// \brief Align consecutive assignments over all \c Changes. 174*67e74705SXin Li void alignConsecutiveAssignments(); 175*67e74705SXin Li 176*67e74705SXin Li /// \brief Align consecutive declarations over all \c Changes. 177*67e74705SXin Li void alignConsecutiveDeclarations(); 178*67e74705SXin Li 179*67e74705SXin Li /// \brief Align trailing comments over all \c Changes. 180*67e74705SXin Li void alignTrailingComments(); 181*67e74705SXin Li 182*67e74705SXin Li /// \brief Align trailing comments from change \p Start to change \p End at 183*67e74705SXin Li /// the specified \p Column. 184*67e74705SXin Li void alignTrailingComments(unsigned Start, unsigned End, unsigned Column); 185*67e74705SXin Li 186*67e74705SXin Li /// \brief Align escaped newlines over all \c Changes. 187*67e74705SXin Li void alignEscapedNewlines(); 188*67e74705SXin Li 189*67e74705SXin Li /// \brief Align escaped newlines from change \p Start to change \p End at 190*67e74705SXin Li /// the specified \p Column. 191*67e74705SXin Li void alignEscapedNewlines(unsigned Start, unsigned End, unsigned Column); 192*67e74705SXin Li 193*67e74705SXin Li /// \brief Fill \c Replaces with the replacements for all effective changes. 194*67e74705SXin Li void generateChanges(); 195*67e74705SXin Li 196*67e74705SXin Li /// \brief Stores \p Text as the replacement for the whitespace in \p Range. 197*67e74705SXin Li void storeReplacement(SourceRange Range, StringRef Text); 198*67e74705SXin Li void appendNewlineText(std::string &Text, unsigned Newlines); 199*67e74705SXin Li void appendNewlineText(std::string &Text, unsigned Newlines, 200*67e74705SXin Li unsigned PreviousEndOfTokenColumn, 201*67e74705SXin Li unsigned EscapedNewlineColumn); 202*67e74705SXin Li void appendIndentText(std::string &Text, unsigned IndentLevel, 203*67e74705SXin Li unsigned Spaces, unsigned WhitespaceStartColumn); 204*67e74705SXin Li 205*67e74705SXin Li SmallVector<Change, 16> Changes; 206*67e74705SXin Li const SourceManager &SourceMgr; 207*67e74705SXin Li tooling::Replacements Replaces; 208*67e74705SXin Li const FormatStyle &Style; 209*67e74705SXin Li bool UseCRLF; 210*67e74705SXin Li }; 211*67e74705SXin Li 212*67e74705SXin Li } // namespace format 213*67e74705SXin Li } // namespace clang 214*67e74705SXin Li 215*67e74705SXin Li #endif 216