xref: /aosp_15_r20/external/clang/lib/Format/WhitespaceManager.h (revision 67e74705e28f6214e480b399dd47ea732279e315)
1*67e74705SXin Li //===--- WhitespaceManager.h - Format C++ code ------------------*- C++ -*-===//
2*67e74705SXin Li //
3*67e74705SXin Li //                     The LLVM Compiler Infrastructure
4*67e74705SXin Li //
5*67e74705SXin Li // This file is distributed under the University of Illinois Open Source
6*67e74705SXin Li // License. See LICENSE.TXT for details.
7*67e74705SXin Li //
8*67e74705SXin Li //===----------------------------------------------------------------------===//
9*67e74705SXin Li ///
10*67e74705SXin Li /// \file
11*67e74705SXin Li /// \brief WhitespaceManager class manages whitespace around tokens and their
12*67e74705SXin Li /// replacements.
13*67e74705SXin Li ///
14*67e74705SXin Li //===----------------------------------------------------------------------===//
15*67e74705SXin Li 
16*67e74705SXin Li #ifndef LLVM_CLANG_LIB_FORMAT_WHITESPACEMANAGER_H
17*67e74705SXin Li #define LLVM_CLANG_LIB_FORMAT_WHITESPACEMANAGER_H
18*67e74705SXin Li 
19*67e74705SXin Li #include "TokenAnnotator.h"
20*67e74705SXin Li #include "clang/Basic/SourceManager.h"
21*67e74705SXin Li #include "clang/Format/Format.h"
22*67e74705SXin Li #include <string>
23*67e74705SXin Li 
24*67e74705SXin Li namespace clang {
25*67e74705SXin Li namespace format {
26*67e74705SXin Li 
27*67e74705SXin Li /// \brief Manages the whitespaces around tokens and their replacements.
28*67e74705SXin Li ///
29*67e74705SXin Li /// This includes special handling for certain constructs, e.g. the alignment of
30*67e74705SXin Li /// trailing line comments.
31*67e74705SXin Li ///
32*67e74705SXin Li /// To guarantee correctness of alignment operations, the \c WhitespaceManager
33*67e74705SXin Li /// must be informed about every token in the source file; for each token, there
34*67e74705SXin Li /// must be exactly one call to either \c replaceWhitespace or
35*67e74705SXin Li /// \c addUntouchableToken.
36*67e74705SXin Li ///
37*67e74705SXin Li /// There may be multiple calls to \c breakToken for a given token.
38*67e74705SXin Li class WhitespaceManager {
39*67e74705SXin Li public:
WhitespaceManager(const SourceManager & SourceMgr,const FormatStyle & Style,bool UseCRLF)40*67e74705SXin Li   WhitespaceManager(const SourceManager &SourceMgr, const FormatStyle &Style,
41*67e74705SXin Li                     bool UseCRLF)
42*67e74705SXin Li       : SourceMgr(SourceMgr), Style(Style), UseCRLF(UseCRLF) {}
43*67e74705SXin Li 
44*67e74705SXin Li   /// \brief Prepares the \c WhitespaceManager for another run.
45*67e74705SXin Li   void reset();
46*67e74705SXin Li 
47*67e74705SXin Li   /// \brief Replaces the whitespace in front of \p Tok. Only call once for
48*67e74705SXin Li   /// each \c AnnotatedToken.
49*67e74705SXin Li   void replaceWhitespace(FormatToken &Tok, unsigned Newlines,
50*67e74705SXin Li                          unsigned IndentLevel, unsigned Spaces,
51*67e74705SXin Li                          unsigned StartOfTokenColumn,
52*67e74705SXin Li                          bool InPPDirective = false);
53*67e74705SXin Li 
54*67e74705SXin Li   /// \brief Adds information about an unchangeable token's whitespace.
55*67e74705SXin Li   ///
56*67e74705SXin Li   /// Needs to be called for every token for which \c replaceWhitespace
57*67e74705SXin Li   /// was not called.
58*67e74705SXin Li   void addUntouchableToken(const FormatToken &Tok, bool InPPDirective);
59*67e74705SXin Li 
60*67e74705SXin Li   /// \brief Inserts or replaces whitespace in the middle of a token.
61*67e74705SXin Li   ///
62*67e74705SXin Li   /// Inserts \p PreviousPostfix, \p Newlines, \p Spaces and \p CurrentPrefix
63*67e74705SXin Li   /// (in this order) at \p Offset inside \p Tok, replacing \p ReplaceChars
64*67e74705SXin Li   /// characters.
65*67e74705SXin Li   ///
66*67e74705SXin Li   /// Note: \p Spaces can be negative to retain information about initial
67*67e74705SXin Li   /// relative column offset between a line of a block comment and the start of
68*67e74705SXin Li   /// the comment. This negative offset may be compensated by trailing comment
69*67e74705SXin Li   /// alignment here. In all other cases negative \p Spaces will be truncated to
70*67e74705SXin Li   /// 0.
71*67e74705SXin Li   ///
72*67e74705SXin Li   /// When \p InPPDirective is true, escaped newlines are inserted. \p Spaces is
73*67e74705SXin Li   /// used to align backslashes correctly.
74*67e74705SXin Li   void replaceWhitespaceInToken(const FormatToken &Tok, unsigned Offset,
75*67e74705SXin Li                                 unsigned ReplaceChars,
76*67e74705SXin Li                                 StringRef PreviousPostfix,
77*67e74705SXin Li                                 StringRef CurrentPrefix, bool InPPDirective,
78*67e74705SXin Li                                 unsigned Newlines, unsigned IndentLevel,
79*67e74705SXin Li                                 int Spaces);
80*67e74705SXin Li 
81*67e74705SXin Li   /// \brief Returns all the \c Replacements created during formatting.
82*67e74705SXin Li   const tooling::Replacements &generateReplacements();
83*67e74705SXin Li 
84*67e74705SXin Li   /// \brief Represents a change before a token, a break inside a token,
85*67e74705SXin Li   /// or the layout of an unchanged token (or whitespace within).
86*67e74705SXin Li   struct Change {
87*67e74705SXin Li     /// \brief Functor to sort changes in original source order.
88*67e74705SXin Li     class IsBeforeInFile {
89*67e74705SXin Li     public:
IsBeforeInFileChange90*67e74705SXin Li       IsBeforeInFile(const SourceManager &SourceMgr) : SourceMgr(SourceMgr) {}
91*67e74705SXin Li       bool operator()(const Change &C1, const Change &C2) const;
92*67e74705SXin Li 
93*67e74705SXin Li     private:
94*67e74705SXin Li       const SourceManager &SourceMgr;
95*67e74705SXin Li     };
96*67e74705SXin Li 
ChangeChange97*67e74705SXin Li     Change() {}
98*67e74705SXin Li 
99*67e74705SXin Li     /// \brief Creates a \c Change.
100*67e74705SXin Li     ///
101*67e74705SXin Li     /// The generated \c Change will replace the characters at
102*67e74705SXin Li     /// \p OriginalWhitespaceRange with a concatenation of
103*67e74705SXin Li     /// \p PreviousLinePostfix, \p NewlinesBefore line breaks, \p Spaces spaces
104*67e74705SXin Li     /// and \p CurrentLinePrefix.
105*67e74705SXin Li     ///
106*67e74705SXin Li     /// \p StartOfTokenColumn and \p InPPDirective will be used to lay out
107*67e74705SXin Li     /// trailing comments and escaped newlines.
108*67e74705SXin Li     Change(bool CreateReplacement, SourceRange OriginalWhitespaceRange,
109*67e74705SXin Li            unsigned IndentLevel, int Spaces, unsigned StartOfTokenColumn,
110*67e74705SXin Li            unsigned NewlinesBefore, StringRef PreviousLinePostfix,
111*67e74705SXin Li            StringRef CurrentLinePrefix, tok::TokenKind Kind,
112*67e74705SXin Li            bool ContinuesPPDirective, bool IsStartOfDeclName,
113*67e74705SXin Li            bool IsInsideToken);
114*67e74705SXin Li 
115*67e74705SXin Li     bool CreateReplacement;
116*67e74705SXin Li     // Changes might be in the middle of a token, so we cannot just keep the
117*67e74705SXin Li     // FormatToken around to query its information.
118*67e74705SXin Li     SourceRange OriginalWhitespaceRange;
119*67e74705SXin Li     unsigned StartOfTokenColumn;
120*67e74705SXin Li     unsigned NewlinesBefore;
121*67e74705SXin Li     std::string PreviousLinePostfix;
122*67e74705SXin Li     std::string CurrentLinePrefix;
123*67e74705SXin Li     // The kind of the token whose whitespace this change replaces, or in which
124*67e74705SXin Li     // this change inserts whitespace.
125*67e74705SXin Li     // FIXME: Currently this is not set correctly for breaks inside comments, as
126*67e74705SXin Li     // the \c BreakableToken is still doing its own alignment.
127*67e74705SXin Li     tok::TokenKind Kind;
128*67e74705SXin Li     bool ContinuesPPDirective;
129*67e74705SXin Li     bool IsStartOfDeclName;
130*67e74705SXin Li 
131*67e74705SXin Li     // The number of nested blocks the token is in. This is used to add tabs
132*67e74705SXin Li     // only for the indentation, and not for alignment, when
133*67e74705SXin Li     // UseTab = US_ForIndentation.
134*67e74705SXin Li     unsigned IndentLevel;
135*67e74705SXin Li 
136*67e74705SXin Li     // The number of spaces in front of the token or broken part of the token.
137*67e74705SXin Li     // This will be adapted when aligning tokens.
138*67e74705SXin Li     // Can be negative to retain information about the initial relative offset
139*67e74705SXin Li     // of the lines in a block comment. This is used when aligning trailing
140*67e74705SXin Li     // comments. Uncompensated negative offset is truncated to 0.
141*67e74705SXin Li     int Spaces;
142*67e74705SXin Li 
143*67e74705SXin Li     // If this change is inside of a token but not at the start of the token or
144*67e74705SXin Li     // directly after a newline.
145*67e74705SXin Li     bool IsInsideToken;
146*67e74705SXin Li 
147*67e74705SXin Li     // \c IsTrailingComment, \c TokenLength, \c PreviousEndOfTokenColumn and
148*67e74705SXin Li     // \c EscapedNewlineColumn will be calculated in
149*67e74705SXin Li     // \c calculateLineBreakInformation.
150*67e74705SXin Li     bool IsTrailingComment;
151*67e74705SXin Li     unsigned TokenLength;
152*67e74705SXin Li     unsigned PreviousEndOfTokenColumn;
153*67e74705SXin Li     unsigned EscapedNewlineColumn;
154*67e74705SXin Li 
155*67e74705SXin Li     // These fields are used to retain correct relative line indentation in a
156*67e74705SXin Li     // block comment when aligning trailing comments.
157*67e74705SXin Li     //
158*67e74705SXin Li     // If this Change represents a continuation of a block comment,
159*67e74705SXin Li     // \c StartOfBlockComment is pointer to the first Change in the block
160*67e74705SXin Li     // comment. \c IndentationOffset is a relative column offset to this
161*67e74705SXin Li     // change, so that the correct column can be reconstructed at the end of
162*67e74705SXin Li     // the alignment process.
163*67e74705SXin Li     const Change *StartOfBlockComment;
164*67e74705SXin Li     int IndentationOffset;
165*67e74705SXin Li   };
166*67e74705SXin Li 
167*67e74705SXin Li private:
168*67e74705SXin Li   /// \brief Calculate \c IsTrailingComment, \c TokenLength for the last tokens
169*67e74705SXin Li   /// or token parts in a line and \c PreviousEndOfTokenColumn and
170*67e74705SXin Li   /// \c EscapedNewlineColumn for the first tokens or token parts in a line.
171*67e74705SXin Li   void calculateLineBreakInformation();
172*67e74705SXin Li 
173*67e74705SXin Li   /// \brief Align consecutive assignments over all \c Changes.
174*67e74705SXin Li   void alignConsecutiveAssignments();
175*67e74705SXin Li 
176*67e74705SXin Li   /// \brief Align consecutive declarations over all \c Changes.
177*67e74705SXin Li   void alignConsecutiveDeclarations();
178*67e74705SXin Li 
179*67e74705SXin Li   /// \brief Align trailing comments over all \c Changes.
180*67e74705SXin Li   void alignTrailingComments();
181*67e74705SXin Li 
182*67e74705SXin Li   /// \brief Align trailing comments from change \p Start to change \p End at
183*67e74705SXin Li   /// the specified \p Column.
184*67e74705SXin Li   void alignTrailingComments(unsigned Start, unsigned End, unsigned Column);
185*67e74705SXin Li 
186*67e74705SXin Li   /// \brief Align escaped newlines over all \c Changes.
187*67e74705SXin Li   void alignEscapedNewlines();
188*67e74705SXin Li 
189*67e74705SXin Li   /// \brief Align escaped newlines from change \p Start to change \p End at
190*67e74705SXin Li   /// the specified \p Column.
191*67e74705SXin Li   void alignEscapedNewlines(unsigned Start, unsigned End, unsigned Column);
192*67e74705SXin Li 
193*67e74705SXin Li   /// \brief Fill \c Replaces with the replacements for all effective changes.
194*67e74705SXin Li   void generateChanges();
195*67e74705SXin Li 
196*67e74705SXin Li   /// \brief Stores \p Text as the replacement for the whitespace in \p Range.
197*67e74705SXin Li   void storeReplacement(SourceRange Range, StringRef Text);
198*67e74705SXin Li   void appendNewlineText(std::string &Text, unsigned Newlines);
199*67e74705SXin Li   void appendNewlineText(std::string &Text, unsigned Newlines,
200*67e74705SXin Li                          unsigned PreviousEndOfTokenColumn,
201*67e74705SXin Li                          unsigned EscapedNewlineColumn);
202*67e74705SXin Li   void appendIndentText(std::string &Text, unsigned IndentLevel,
203*67e74705SXin Li                         unsigned Spaces, unsigned WhitespaceStartColumn);
204*67e74705SXin Li 
205*67e74705SXin Li   SmallVector<Change, 16> Changes;
206*67e74705SXin Li   const SourceManager &SourceMgr;
207*67e74705SXin Li   tooling::Replacements Replaces;
208*67e74705SXin Li   const FormatStyle &Style;
209*67e74705SXin Li   bool UseCRLF;
210*67e74705SXin Li };
211*67e74705SXin Li 
212*67e74705SXin Li } // namespace format
213*67e74705SXin Li } // namespace clang
214*67e74705SXin Li 
215*67e74705SXin Li #endif
216