1*67e74705SXin Li //===--- ContinuationIndenter.h - Format C++ code ---------------*- C++ -*-===// 2*67e74705SXin Li // 3*67e74705SXin Li // The LLVM Compiler Infrastructure 4*67e74705SXin Li // 5*67e74705SXin Li // This file is distributed under the University of Illinois Open Source 6*67e74705SXin Li // License. See LICENSE.TXT for details. 7*67e74705SXin Li // 8*67e74705SXin Li //===----------------------------------------------------------------------===// 9*67e74705SXin Li /// 10*67e74705SXin Li /// \file 11*67e74705SXin Li /// \brief This file implements an indenter that manages the indentation of 12*67e74705SXin Li /// continuations. 13*67e74705SXin Li /// 14*67e74705SXin Li //===----------------------------------------------------------------------===// 15*67e74705SXin Li 16*67e74705SXin Li #ifndef LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H 17*67e74705SXin Li #define LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H 18*67e74705SXin Li 19*67e74705SXin Li #include "Encoding.h" 20*67e74705SXin Li #include "FormatToken.h" 21*67e74705SXin Li #include "clang/Format/Format.h" 22*67e74705SXin Li #include "llvm/Support/Regex.h" 23*67e74705SXin Li 24*67e74705SXin Li namespace clang { 25*67e74705SXin Li class SourceManager; 26*67e74705SXin Li 27*67e74705SXin Li namespace format { 28*67e74705SXin Li 29*67e74705SXin Li class AnnotatedLine; 30*67e74705SXin Li struct FormatToken; 31*67e74705SXin Li struct LineState; 32*67e74705SXin Li struct ParenState; 33*67e74705SXin Li class WhitespaceManager; 34*67e74705SXin Li 35*67e74705SXin Li class ContinuationIndenter { 36*67e74705SXin Li public: 37*67e74705SXin Li /// \brief Constructs a \c ContinuationIndenter to format \p Line starting in 38*67e74705SXin Li /// column \p FirstIndent. 39*67e74705SXin Li ContinuationIndenter(const FormatStyle &Style, 40*67e74705SXin Li const AdditionalKeywords &Keywords, 41*67e74705SXin Li const SourceManager &SourceMgr, 42*67e74705SXin Li WhitespaceManager &Whitespaces, 43*67e74705SXin Li encoding::Encoding Encoding, 44*67e74705SXin Li bool BinPackInconclusiveFunctions); 45*67e74705SXin Li 46*67e74705SXin Li /// \brief Get the initial state, i.e. the state after placing \p Line's 47*67e74705SXin Li /// first token at \p FirstIndent. 48*67e74705SXin Li LineState getInitialState(unsigned FirstIndent, const AnnotatedLine *Line, 49*67e74705SXin Li bool DryRun); 50*67e74705SXin Li 51*67e74705SXin Li // FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a 52*67e74705SXin Li // better home. 53*67e74705SXin Li /// \brief Returns \c true, if a line break after \p State is allowed. 54*67e74705SXin Li bool canBreak(const LineState &State); 55*67e74705SXin Li 56*67e74705SXin Li /// \brief Returns \c true, if a line break after \p State is mandatory. 57*67e74705SXin Li bool mustBreak(const LineState &State); 58*67e74705SXin Li 59*67e74705SXin Li /// \brief Appends the next token to \p State and updates information 60*67e74705SXin Li /// necessary for indentation. 61*67e74705SXin Li /// 62*67e74705SXin Li /// Puts the token on the current line if \p Newline is \c false and adds a 63*67e74705SXin Li /// line break and necessary indentation otherwise. 64*67e74705SXin Li /// 65*67e74705SXin Li /// If \p DryRun is \c false, also creates and stores the required 66*67e74705SXin Li /// \c Replacement. 67*67e74705SXin Li unsigned addTokenToState(LineState &State, bool Newline, bool DryRun, 68*67e74705SXin Li unsigned ExtraSpaces = 0); 69*67e74705SXin Li 70*67e74705SXin Li /// \brief Get the column limit for this line. This is the style's column 71*67e74705SXin Li /// limit, potentially reduced for preprocessor definitions. 72*67e74705SXin Li unsigned getColumnLimit(const LineState &State) const; 73*67e74705SXin Li 74*67e74705SXin Li private: 75*67e74705SXin Li /// \brief Mark the next token as consumed in \p State and modify its stacks 76*67e74705SXin Li /// accordingly. 77*67e74705SXin Li unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline); 78*67e74705SXin Li 79*67e74705SXin Li /// \brief Update 'State' according to the next token's fake left parentheses. 80*67e74705SXin Li void moveStatePastFakeLParens(LineState &State, bool Newline); 81*67e74705SXin Li /// \brief Update 'State' according to the next token's fake r_parens. 82*67e74705SXin Li void moveStatePastFakeRParens(LineState &State); 83*67e74705SXin Li 84*67e74705SXin Li /// \brief Update 'State' according to the next token being one of "(<{[". 85*67e74705SXin Li void moveStatePastScopeOpener(LineState &State, bool Newline); 86*67e74705SXin Li /// \brief Update 'State' according to the next token being one of ")>}]". 87*67e74705SXin Li void moveStatePastScopeCloser(LineState &State); 88*67e74705SXin Li /// \brief Update 'State' with the next token opening a nested block. 89*67e74705SXin Li void moveStateToNewBlock(LineState &State); 90*67e74705SXin Li 91*67e74705SXin Li /// \brief If the current token sticks out over the end of the line, break 92*67e74705SXin Li /// it if possible. 93*67e74705SXin Li /// 94*67e74705SXin Li /// \returns An extra penalty if a token was broken, otherwise 0. 95*67e74705SXin Li /// 96*67e74705SXin Li /// The returned penalty will cover the cost of the additional line breaks and 97*67e74705SXin Li /// column limit violation in all lines except for the last one. The penalty 98*67e74705SXin Li /// for the column limit violation in the last line (and in single line 99*67e74705SXin Li /// tokens) is handled in \c addNextStateToQueue. 100*67e74705SXin Li unsigned breakProtrudingToken(const FormatToken &Current, LineState &State, 101*67e74705SXin Li bool DryRun); 102*67e74705SXin Li 103*67e74705SXin Li /// \brief Appends the next token to \p State and updates information 104*67e74705SXin Li /// necessary for indentation. 105*67e74705SXin Li /// 106*67e74705SXin Li /// Puts the token on the current line. 107*67e74705SXin Li /// 108*67e74705SXin Li /// If \p DryRun is \c false, also creates and stores the required 109*67e74705SXin Li /// \c Replacement. 110*67e74705SXin Li void addTokenOnCurrentLine(LineState &State, bool DryRun, 111*67e74705SXin Li unsigned ExtraSpaces); 112*67e74705SXin Li 113*67e74705SXin Li /// \brief Appends the next token to \p State and updates information 114*67e74705SXin Li /// necessary for indentation. 115*67e74705SXin Li /// 116*67e74705SXin Li /// Adds a line break and necessary indentation. 117*67e74705SXin Li /// 118*67e74705SXin Li /// If \p DryRun is \c false, also creates and stores the required 119*67e74705SXin Li /// \c Replacement. 120*67e74705SXin Li unsigned addTokenOnNewLine(LineState &State, bool DryRun); 121*67e74705SXin Li 122*67e74705SXin Li /// \brief Calculate the new column for a line wrap before the next token. 123*67e74705SXin Li unsigned getNewLineColumn(const LineState &State); 124*67e74705SXin Li 125*67e74705SXin Li /// \brief Adds a multiline token to the \p State. 126*67e74705SXin Li /// 127*67e74705SXin Li /// \returns Extra penalty for the first line of the literal: last line is 128*67e74705SXin Li /// handled in \c addNextStateToQueue, and the penalty for other lines doesn't 129*67e74705SXin Li /// matter, as we don't change them. 130*67e74705SXin Li unsigned addMultilineToken(const FormatToken &Current, LineState &State); 131*67e74705SXin Li 132*67e74705SXin Li /// \brief Returns \c true if the next token starts a multiline string 133*67e74705SXin Li /// literal. 134*67e74705SXin Li /// 135*67e74705SXin Li /// This includes implicitly concatenated strings, strings that will be broken 136*67e74705SXin Li /// by clang-format and string literals with escaped newlines. 137*67e74705SXin Li bool nextIsMultilineString(const LineState &State); 138*67e74705SXin Li 139*67e74705SXin Li FormatStyle Style; 140*67e74705SXin Li const AdditionalKeywords &Keywords; 141*67e74705SXin Li const SourceManager &SourceMgr; 142*67e74705SXin Li WhitespaceManager &Whitespaces; 143*67e74705SXin Li encoding::Encoding Encoding; 144*67e74705SXin Li bool BinPackInconclusiveFunctions; 145*67e74705SXin Li llvm::Regex CommentPragmasRegex; 146*67e74705SXin Li }; 147*67e74705SXin Li 148*67e74705SXin Li struct ParenState { ParenStateParenState149*67e74705SXin Li ParenState(unsigned Indent, unsigned IndentLevel, unsigned LastSpace, 150*67e74705SXin Li bool AvoidBinPacking, bool NoLineBreak) 151*67e74705SXin Li : Indent(Indent), IndentLevel(IndentLevel), LastSpace(LastSpace), 152*67e74705SXin Li NestedBlockIndent(Indent), BreakBeforeClosingBrace(false), 153*67e74705SXin Li AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false), 154*67e74705SXin Li NoLineBreak(NoLineBreak), LastOperatorWrapped(true), 155*67e74705SXin Li ContainsLineBreak(false), ContainsUnwrappedBuilder(false), 156*67e74705SXin Li AlignColons(true), ObjCSelectorNameFound(false), 157*67e74705SXin Li HasMultipleNestedBlocks(false), NestedBlockInlined(false) {} 158*67e74705SXin Li 159*67e74705SXin Li /// \brief The position to which a specific parenthesis level needs to be 160*67e74705SXin Li /// indented. 161*67e74705SXin Li unsigned Indent; 162*67e74705SXin Li 163*67e74705SXin Li /// \brief The number of indentation levels of the block. 164*67e74705SXin Li unsigned IndentLevel; 165*67e74705SXin Li 166*67e74705SXin Li /// \brief The position of the last space on each level. 167*67e74705SXin Li /// 168*67e74705SXin Li /// Used e.g. to break like: 169*67e74705SXin Li /// functionCall(Parameter, otherCall( 170*67e74705SXin Li /// OtherParameter)); 171*67e74705SXin Li unsigned LastSpace; 172*67e74705SXin Li 173*67e74705SXin Li /// \brief If a block relative to this parenthesis level gets wrapped, indent 174*67e74705SXin Li /// it this much. 175*67e74705SXin Li unsigned NestedBlockIndent; 176*67e74705SXin Li 177*67e74705SXin Li /// \brief The position the first "<<" operator encountered on each level. 178*67e74705SXin Li /// 179*67e74705SXin Li /// Used to align "<<" operators. 0 if no such operator has been encountered 180*67e74705SXin Li /// on a level. 181*67e74705SXin Li unsigned FirstLessLess = 0; 182*67e74705SXin Li 183*67e74705SXin Li /// \brief The column of a \c ? in a conditional expression; 184*67e74705SXin Li unsigned QuestionColumn = 0; 185*67e74705SXin Li 186*67e74705SXin Li /// \brief The position of the colon in an ObjC method declaration/call. 187*67e74705SXin Li unsigned ColonPos = 0; 188*67e74705SXin Li 189*67e74705SXin Li /// \brief The start of the most recent function in a builder-type call. 190*67e74705SXin Li unsigned StartOfFunctionCall = 0; 191*67e74705SXin Li 192*67e74705SXin Li /// \brief Contains the start of array subscript expressions, so that they 193*67e74705SXin Li /// can be aligned. 194*67e74705SXin Li unsigned StartOfArraySubscripts = 0; 195*67e74705SXin Li 196*67e74705SXin Li /// \brief If a nested name specifier was broken over multiple lines, this 197*67e74705SXin Li /// contains the start column of the second line. Otherwise 0. 198*67e74705SXin Li unsigned NestedNameSpecifierContinuation = 0; 199*67e74705SXin Li 200*67e74705SXin Li /// \brief If a call expression was broken over multiple lines, this 201*67e74705SXin Li /// contains the start column of the second line. Otherwise 0. 202*67e74705SXin Li unsigned CallContinuation = 0; 203*67e74705SXin Li 204*67e74705SXin Li /// \brief The column of the first variable name in a variable declaration. 205*67e74705SXin Li /// 206*67e74705SXin Li /// Used to align further variables if necessary. 207*67e74705SXin Li unsigned VariablePos = 0; 208*67e74705SXin Li 209*67e74705SXin Li /// \brief Whether a newline needs to be inserted before the block's closing 210*67e74705SXin Li /// brace. 211*67e74705SXin Li /// 212*67e74705SXin Li /// We only want to insert a newline before the closing brace if there also 213*67e74705SXin Li /// was a newline after the beginning left brace. 214*67e74705SXin Li bool BreakBeforeClosingBrace : 1; 215*67e74705SXin Li 216*67e74705SXin Li /// \brief Avoid bin packing, i.e. multiple parameters/elements on multiple 217*67e74705SXin Li /// lines, in this context. 218*67e74705SXin Li bool AvoidBinPacking : 1; 219*67e74705SXin Li 220*67e74705SXin Li /// \brief Break after the next comma (or all the commas in this context if 221*67e74705SXin Li /// \c AvoidBinPacking is \c true). 222*67e74705SXin Li bool BreakBeforeParameter : 1; 223*67e74705SXin Li 224*67e74705SXin Li /// \brief Line breaking in this context would break a formatting rule. 225*67e74705SXin Li bool NoLineBreak : 1; 226*67e74705SXin Li 227*67e74705SXin Li /// \brief True if the last binary operator on this level was wrapped to the 228*67e74705SXin Li /// next line. 229*67e74705SXin Li bool LastOperatorWrapped : 1; 230*67e74705SXin Li 231*67e74705SXin Li /// \brief \c true if this \c ParenState already contains a line-break. 232*67e74705SXin Li /// 233*67e74705SXin Li /// The first line break in a certain \c ParenState causes extra penalty so 234*67e74705SXin Li /// that clang-format prefers similar breaks, i.e. breaks in the same 235*67e74705SXin Li /// parenthesis. 236*67e74705SXin Li bool ContainsLineBreak : 1; 237*67e74705SXin Li 238*67e74705SXin Li /// \brief \c true if this \c ParenState contains multiple segments of a 239*67e74705SXin Li /// builder-type call on one line. 240*67e74705SXin Li bool ContainsUnwrappedBuilder : 1; 241*67e74705SXin Li 242*67e74705SXin Li /// \brief \c true if the colons of the curren ObjC method expression should 243*67e74705SXin Li /// be aligned. 244*67e74705SXin Li /// 245*67e74705SXin Li /// Not considered for memoization as it will always have the same value at 246*67e74705SXin Li /// the same token. 247*67e74705SXin Li bool AlignColons : 1; 248*67e74705SXin Li 249*67e74705SXin Li /// \brief \c true if at least one selector name was found in the current 250*67e74705SXin Li /// ObjC method expression. 251*67e74705SXin Li /// 252*67e74705SXin Li /// Not considered for memoization as it will always have the same value at 253*67e74705SXin Li /// the same token. 254*67e74705SXin Li bool ObjCSelectorNameFound : 1; 255*67e74705SXin Li 256*67e74705SXin Li /// \brief \c true if there are multiple nested blocks inside these parens. 257*67e74705SXin Li /// 258*67e74705SXin Li /// Not considered for memoization as it will always have the same value at 259*67e74705SXin Li /// the same token. 260*67e74705SXin Li bool HasMultipleNestedBlocks : 1; 261*67e74705SXin Li 262*67e74705SXin Li // \brief The start of a nested block (e.g. lambda introducer in C++ or 263*67e74705SXin Li // "function" in JavaScript) is not wrapped to a new line. 264*67e74705SXin Li bool NestedBlockInlined : 1; 265*67e74705SXin Li 266*67e74705SXin Li bool operator<(const ParenState &Other) const { 267*67e74705SXin Li if (Indent != Other.Indent) 268*67e74705SXin Li return Indent < Other.Indent; 269*67e74705SXin Li if (LastSpace != Other.LastSpace) 270*67e74705SXin Li return LastSpace < Other.LastSpace; 271*67e74705SXin Li if (NestedBlockIndent != Other.NestedBlockIndent) 272*67e74705SXin Li return NestedBlockIndent < Other.NestedBlockIndent; 273*67e74705SXin Li if (FirstLessLess != Other.FirstLessLess) 274*67e74705SXin Li return FirstLessLess < Other.FirstLessLess; 275*67e74705SXin Li if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace) 276*67e74705SXin Li return BreakBeforeClosingBrace; 277*67e74705SXin Li if (QuestionColumn != Other.QuestionColumn) 278*67e74705SXin Li return QuestionColumn < Other.QuestionColumn; 279*67e74705SXin Li if (AvoidBinPacking != Other.AvoidBinPacking) 280*67e74705SXin Li return AvoidBinPacking; 281*67e74705SXin Li if (BreakBeforeParameter != Other.BreakBeforeParameter) 282*67e74705SXin Li return BreakBeforeParameter; 283*67e74705SXin Li if (NoLineBreak != Other.NoLineBreak) 284*67e74705SXin Li return NoLineBreak; 285*67e74705SXin Li if (LastOperatorWrapped != Other.LastOperatorWrapped) 286*67e74705SXin Li return LastOperatorWrapped; 287*67e74705SXin Li if (ColonPos != Other.ColonPos) 288*67e74705SXin Li return ColonPos < Other.ColonPos; 289*67e74705SXin Li if (StartOfFunctionCall != Other.StartOfFunctionCall) 290*67e74705SXin Li return StartOfFunctionCall < Other.StartOfFunctionCall; 291*67e74705SXin Li if (StartOfArraySubscripts != Other.StartOfArraySubscripts) 292*67e74705SXin Li return StartOfArraySubscripts < Other.StartOfArraySubscripts; 293*67e74705SXin Li if (CallContinuation != Other.CallContinuation) 294*67e74705SXin Li return CallContinuation < Other.CallContinuation; 295*67e74705SXin Li if (VariablePos != Other.VariablePos) 296*67e74705SXin Li return VariablePos < Other.VariablePos; 297*67e74705SXin Li if (ContainsLineBreak != Other.ContainsLineBreak) 298*67e74705SXin Li return ContainsLineBreak; 299*67e74705SXin Li if (ContainsUnwrappedBuilder != Other.ContainsUnwrappedBuilder) 300*67e74705SXin Li return ContainsUnwrappedBuilder; 301*67e74705SXin Li if (NestedBlockInlined != Other.NestedBlockInlined) 302*67e74705SXin Li return NestedBlockInlined; 303*67e74705SXin Li return false; 304*67e74705SXin Li } 305*67e74705SXin Li }; 306*67e74705SXin Li 307*67e74705SXin Li /// \brief The current state when indenting a unwrapped line. 308*67e74705SXin Li /// 309*67e74705SXin Li /// As the indenting tries different combinations this is copied by value. 310*67e74705SXin Li struct LineState { 311*67e74705SXin Li /// \brief The number of used columns in the current line. 312*67e74705SXin Li unsigned Column; 313*67e74705SXin Li 314*67e74705SXin Li /// \brief The token that needs to be next formatted. 315*67e74705SXin Li FormatToken *NextToken; 316*67e74705SXin Li 317*67e74705SXin Li /// \brief \c true if this line contains a continued for-loop section. 318*67e74705SXin Li bool LineContainsContinuedForLoopSection; 319*67e74705SXin Li 320*67e74705SXin Li /// \brief The \c NestingLevel at the start of this line. 321*67e74705SXin Li unsigned StartOfLineLevel; 322*67e74705SXin Li 323*67e74705SXin Li /// \brief The lowest \c NestingLevel on the current line. 324*67e74705SXin Li unsigned LowestLevelOnLine; 325*67e74705SXin Li 326*67e74705SXin Li /// \brief The start column of the string literal, if we're in a string 327*67e74705SXin Li /// literal sequence, 0 otherwise. 328*67e74705SXin Li unsigned StartOfStringLiteral; 329*67e74705SXin Li 330*67e74705SXin Li /// \brief A stack keeping track of properties applying to parenthesis 331*67e74705SXin Li /// levels. 332*67e74705SXin Li std::vector<ParenState> Stack; 333*67e74705SXin Li 334*67e74705SXin Li /// \brief Ignore the stack of \c ParenStates for state comparison. 335*67e74705SXin Li /// 336*67e74705SXin Li /// In long and deeply nested unwrapped lines, the current algorithm can 337*67e74705SXin Li /// be insufficient for finding the best formatting with a reasonable amount 338*67e74705SXin Li /// of time and memory. Setting this flag will effectively lead to the 339*67e74705SXin Li /// algorithm not analyzing some combinations. However, these combinations 340*67e74705SXin Li /// rarely contain the optimal solution: In short, accepting a higher 341*67e74705SXin Li /// penalty early would need to lead to different values in the \c 342*67e74705SXin Li /// ParenState stack (in an otherwise identical state) and these different 343*67e74705SXin Li /// values would need to lead to a significant amount of avoided penalty 344*67e74705SXin Li /// later. 345*67e74705SXin Li /// 346*67e74705SXin Li /// FIXME: Come up with a better algorithm instead. 347*67e74705SXin Li bool IgnoreStackForComparison; 348*67e74705SXin Li 349*67e74705SXin Li /// \brief The indent of the first token. 350*67e74705SXin Li unsigned FirstIndent; 351*67e74705SXin Li 352*67e74705SXin Li /// \brief The line that is being formatted. 353*67e74705SXin Li /// 354*67e74705SXin Li /// Does not need to be considered for memoization because it doesn't change. 355*67e74705SXin Li const AnnotatedLine *Line; 356*67e74705SXin Li 357*67e74705SXin Li /// \brief Comparison operator to be able to used \c LineState in \c map. 358*67e74705SXin Li bool operator<(const LineState &Other) const { 359*67e74705SXin Li if (NextToken != Other.NextToken) 360*67e74705SXin Li return NextToken < Other.NextToken; 361*67e74705SXin Li if (Column != Other.Column) 362*67e74705SXin Li return Column < Other.Column; 363*67e74705SXin Li if (LineContainsContinuedForLoopSection != 364*67e74705SXin Li Other.LineContainsContinuedForLoopSection) 365*67e74705SXin Li return LineContainsContinuedForLoopSection; 366*67e74705SXin Li if (StartOfLineLevel != Other.StartOfLineLevel) 367*67e74705SXin Li return StartOfLineLevel < Other.StartOfLineLevel; 368*67e74705SXin Li if (LowestLevelOnLine != Other.LowestLevelOnLine) 369*67e74705SXin Li return LowestLevelOnLine < Other.LowestLevelOnLine; 370*67e74705SXin Li if (StartOfStringLiteral != Other.StartOfStringLiteral) 371*67e74705SXin Li return StartOfStringLiteral < Other.StartOfStringLiteral; 372*67e74705SXin Li if (IgnoreStackForComparison || Other.IgnoreStackForComparison) 373*67e74705SXin Li return false; 374*67e74705SXin Li return Stack < Other.Stack; 375*67e74705SXin Li } 376*67e74705SXin Li }; 377*67e74705SXin Li 378*67e74705SXin Li } // end namespace format 379*67e74705SXin Li } // end namespace clang 380*67e74705SXin Li 381*67e74705SXin Li #endif 382