xref: /aosp_15_r20/external/clang/lib/Rewrite/HTMLRewrite.cpp (revision 67e74705e28f6214e480b399dd47ea732279e315)
1*67e74705SXin Li //== HTMLRewrite.cpp - Translate source code into prettified HTML --*- C++ -*-//
2*67e74705SXin Li //
3*67e74705SXin Li //                     The LLVM Compiler Infrastructure
4*67e74705SXin Li //
5*67e74705SXin Li // This file is distributed under the University of Illinois Open Source
6*67e74705SXin Li // License. See LICENSE.TXT for details.
7*67e74705SXin Li //
8*67e74705SXin Li //===----------------------------------------------------------------------===//
9*67e74705SXin Li //
10*67e74705SXin Li //  This file defines the HTMLRewriter class, which is used to translate the
11*67e74705SXin Li //  text of a source file into prettified HTML.
12*67e74705SXin Li //
13*67e74705SXin Li //===----------------------------------------------------------------------===//
14*67e74705SXin Li 
15*67e74705SXin Li #include "clang/Rewrite/Core/HTMLRewrite.h"
16*67e74705SXin Li #include "clang/Basic/SourceManager.h"
17*67e74705SXin Li #include "clang/Lex/Preprocessor.h"
18*67e74705SXin Li #include "clang/Lex/TokenConcatenation.h"
19*67e74705SXin Li #include "clang/Rewrite/Core/Rewriter.h"
20*67e74705SXin Li #include "llvm/ADT/SmallString.h"
21*67e74705SXin Li #include "llvm/Support/ErrorHandling.h"
22*67e74705SXin Li #include "llvm/Support/MemoryBuffer.h"
23*67e74705SXin Li #include "llvm/Support/raw_ostream.h"
24*67e74705SXin Li #include <memory>
25*67e74705SXin Li using namespace clang;
26*67e74705SXin Li 
27*67e74705SXin Li 
28*67e74705SXin Li /// HighlightRange - Highlight a range in the source code with the specified
29*67e74705SXin Li /// start/end tags.  B/E must be in the same file.  This ensures that
30*67e74705SXin Li /// start/end tags are placed at the start/end of each line if the range is
31*67e74705SXin Li /// multiline.
HighlightRange(Rewriter & R,SourceLocation B,SourceLocation E,const char * StartTag,const char * EndTag)32*67e74705SXin Li void html::HighlightRange(Rewriter &R, SourceLocation B, SourceLocation E,
33*67e74705SXin Li                           const char *StartTag, const char *EndTag) {
34*67e74705SXin Li   SourceManager &SM = R.getSourceMgr();
35*67e74705SXin Li   B = SM.getExpansionLoc(B);
36*67e74705SXin Li   E = SM.getExpansionLoc(E);
37*67e74705SXin Li   FileID FID = SM.getFileID(B);
38*67e74705SXin Li   assert(SM.getFileID(E) == FID && "B/E not in the same file!");
39*67e74705SXin Li 
40*67e74705SXin Li   unsigned BOffset = SM.getFileOffset(B);
41*67e74705SXin Li   unsigned EOffset = SM.getFileOffset(E);
42*67e74705SXin Li 
43*67e74705SXin Li   // Include the whole end token in the range.
44*67e74705SXin Li   EOffset += Lexer::MeasureTokenLength(E, R.getSourceMgr(), R.getLangOpts());
45*67e74705SXin Li 
46*67e74705SXin Li   bool Invalid = false;
47*67e74705SXin Li   const char *BufferStart = SM.getBufferData(FID, &Invalid).data();
48*67e74705SXin Li   if (Invalid)
49*67e74705SXin Li     return;
50*67e74705SXin Li 
51*67e74705SXin Li   HighlightRange(R.getEditBuffer(FID), BOffset, EOffset,
52*67e74705SXin Li                  BufferStart, StartTag, EndTag);
53*67e74705SXin Li }
54*67e74705SXin Li 
55*67e74705SXin Li /// HighlightRange - This is the same as the above method, but takes
56*67e74705SXin Li /// decomposed file locations.
HighlightRange(RewriteBuffer & RB,unsigned B,unsigned E,const char * BufferStart,const char * StartTag,const char * EndTag)57*67e74705SXin Li void html::HighlightRange(RewriteBuffer &RB, unsigned B, unsigned E,
58*67e74705SXin Li                           const char *BufferStart,
59*67e74705SXin Li                           const char *StartTag, const char *EndTag) {
60*67e74705SXin Li   // Insert the tag at the absolute start/end of the range.
61*67e74705SXin Li   RB.InsertTextAfter(B, StartTag);
62*67e74705SXin Li   RB.InsertTextBefore(E, EndTag);
63*67e74705SXin Li 
64*67e74705SXin Li   // Scan the range to see if there is a \r or \n.  If so, and if the line is
65*67e74705SXin Li   // not blank, insert tags on that line as well.
66*67e74705SXin Li   bool HadOpenTag = true;
67*67e74705SXin Li 
68*67e74705SXin Li   unsigned LastNonWhiteSpace = B;
69*67e74705SXin Li   for (unsigned i = B; i != E; ++i) {
70*67e74705SXin Li     switch (BufferStart[i]) {
71*67e74705SXin Li     case '\r':
72*67e74705SXin Li     case '\n':
73*67e74705SXin Li       // Okay, we found a newline in the range.  If we have an open tag, we need
74*67e74705SXin Li       // to insert a close tag at the first non-whitespace before the newline.
75*67e74705SXin Li       if (HadOpenTag)
76*67e74705SXin Li         RB.InsertTextBefore(LastNonWhiteSpace+1, EndTag);
77*67e74705SXin Li 
78*67e74705SXin Li       // Instead of inserting an open tag immediately after the newline, we
79*67e74705SXin Li       // wait until we see a non-whitespace character.  This prevents us from
80*67e74705SXin Li       // inserting tags around blank lines, and also allows the open tag to
81*67e74705SXin Li       // be put *after* whitespace on a non-blank line.
82*67e74705SXin Li       HadOpenTag = false;
83*67e74705SXin Li       break;
84*67e74705SXin Li     case '\0':
85*67e74705SXin Li     case ' ':
86*67e74705SXin Li     case '\t':
87*67e74705SXin Li     case '\f':
88*67e74705SXin Li     case '\v':
89*67e74705SXin Li       // Ignore whitespace.
90*67e74705SXin Li       break;
91*67e74705SXin Li 
92*67e74705SXin Li     default:
93*67e74705SXin Li       // If there is no tag open, do it now.
94*67e74705SXin Li       if (!HadOpenTag) {
95*67e74705SXin Li         RB.InsertTextAfter(i, StartTag);
96*67e74705SXin Li         HadOpenTag = true;
97*67e74705SXin Li       }
98*67e74705SXin Li 
99*67e74705SXin Li       // Remember this character.
100*67e74705SXin Li       LastNonWhiteSpace = i;
101*67e74705SXin Li       break;
102*67e74705SXin Li     }
103*67e74705SXin Li   }
104*67e74705SXin Li }
105*67e74705SXin Li 
EscapeText(Rewriter & R,FileID FID,bool EscapeSpaces,bool ReplaceTabs)106*67e74705SXin Li void html::EscapeText(Rewriter &R, FileID FID,
107*67e74705SXin Li                       bool EscapeSpaces, bool ReplaceTabs) {
108*67e74705SXin Li 
109*67e74705SXin Li   const llvm::MemoryBuffer *Buf = R.getSourceMgr().getBuffer(FID);
110*67e74705SXin Li   const char* C = Buf->getBufferStart();
111*67e74705SXin Li   const char* FileEnd = Buf->getBufferEnd();
112*67e74705SXin Li 
113*67e74705SXin Li   assert (C <= FileEnd);
114*67e74705SXin Li 
115*67e74705SXin Li   RewriteBuffer &RB = R.getEditBuffer(FID);
116*67e74705SXin Li 
117*67e74705SXin Li   unsigned ColNo = 0;
118*67e74705SXin Li   for (unsigned FilePos = 0; C != FileEnd ; ++C, ++FilePos) {
119*67e74705SXin Li     switch (*C) {
120*67e74705SXin Li     default: ++ColNo; break;
121*67e74705SXin Li     case '\n':
122*67e74705SXin Li     case '\r':
123*67e74705SXin Li       ColNo = 0;
124*67e74705SXin Li       break;
125*67e74705SXin Li 
126*67e74705SXin Li     case ' ':
127*67e74705SXin Li       if (EscapeSpaces)
128*67e74705SXin Li         RB.ReplaceText(FilePos, 1, "&nbsp;");
129*67e74705SXin Li       ++ColNo;
130*67e74705SXin Li       break;
131*67e74705SXin Li     case '\f':
132*67e74705SXin Li       RB.ReplaceText(FilePos, 1, "<hr>");
133*67e74705SXin Li       ColNo = 0;
134*67e74705SXin Li       break;
135*67e74705SXin Li 
136*67e74705SXin Li     case '\t': {
137*67e74705SXin Li       if (!ReplaceTabs)
138*67e74705SXin Li         break;
139*67e74705SXin Li       unsigned NumSpaces = 8-(ColNo&7);
140*67e74705SXin Li       if (EscapeSpaces)
141*67e74705SXin Li         RB.ReplaceText(FilePos, 1,
142*67e74705SXin Li                        StringRef("&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;"
143*67e74705SXin Li                                        "&nbsp;&nbsp;&nbsp;", 6*NumSpaces));
144*67e74705SXin Li       else
145*67e74705SXin Li         RB.ReplaceText(FilePos, 1, StringRef("        ", NumSpaces));
146*67e74705SXin Li       ColNo += NumSpaces;
147*67e74705SXin Li       break;
148*67e74705SXin Li     }
149*67e74705SXin Li     case '<':
150*67e74705SXin Li       RB.ReplaceText(FilePos, 1, "&lt;");
151*67e74705SXin Li       ++ColNo;
152*67e74705SXin Li       break;
153*67e74705SXin Li 
154*67e74705SXin Li     case '>':
155*67e74705SXin Li       RB.ReplaceText(FilePos, 1, "&gt;");
156*67e74705SXin Li       ++ColNo;
157*67e74705SXin Li       break;
158*67e74705SXin Li 
159*67e74705SXin Li     case '&':
160*67e74705SXin Li       RB.ReplaceText(FilePos, 1, "&amp;");
161*67e74705SXin Li       ++ColNo;
162*67e74705SXin Li       break;
163*67e74705SXin Li     }
164*67e74705SXin Li   }
165*67e74705SXin Li }
166*67e74705SXin Li 
EscapeText(StringRef s,bool EscapeSpaces,bool ReplaceTabs)167*67e74705SXin Li std::string html::EscapeText(StringRef s, bool EscapeSpaces, bool ReplaceTabs) {
168*67e74705SXin Li 
169*67e74705SXin Li   unsigned len = s.size();
170*67e74705SXin Li   std::string Str;
171*67e74705SXin Li   llvm::raw_string_ostream os(Str);
172*67e74705SXin Li 
173*67e74705SXin Li   for (unsigned i = 0 ; i < len; ++i) {
174*67e74705SXin Li 
175*67e74705SXin Li     char c = s[i];
176*67e74705SXin Li     switch (c) {
177*67e74705SXin Li     default:
178*67e74705SXin Li       os << c; break;
179*67e74705SXin Li 
180*67e74705SXin Li     case ' ':
181*67e74705SXin Li       if (EscapeSpaces) os << "&nbsp;";
182*67e74705SXin Li       else os << ' ';
183*67e74705SXin Li       break;
184*67e74705SXin Li 
185*67e74705SXin Li     case '\t':
186*67e74705SXin Li       if (ReplaceTabs) {
187*67e74705SXin Li         if (EscapeSpaces)
188*67e74705SXin Li           for (unsigned i = 0; i < 4; ++i)
189*67e74705SXin Li             os << "&nbsp;";
190*67e74705SXin Li         else
191*67e74705SXin Li           for (unsigned i = 0; i < 4; ++i)
192*67e74705SXin Li             os << " ";
193*67e74705SXin Li       }
194*67e74705SXin Li       else
195*67e74705SXin Li         os << c;
196*67e74705SXin Li 
197*67e74705SXin Li       break;
198*67e74705SXin Li 
199*67e74705SXin Li     case '<': os << "&lt;"; break;
200*67e74705SXin Li     case '>': os << "&gt;"; break;
201*67e74705SXin Li     case '&': os << "&amp;"; break;
202*67e74705SXin Li     }
203*67e74705SXin Li   }
204*67e74705SXin Li 
205*67e74705SXin Li   return os.str();
206*67e74705SXin Li }
207*67e74705SXin Li 
AddLineNumber(RewriteBuffer & RB,unsigned LineNo,unsigned B,unsigned E)208*67e74705SXin Li static void AddLineNumber(RewriteBuffer &RB, unsigned LineNo,
209*67e74705SXin Li                           unsigned B, unsigned E) {
210*67e74705SXin Li   SmallString<256> Str;
211*67e74705SXin Li   llvm::raw_svector_ostream OS(Str);
212*67e74705SXin Li 
213*67e74705SXin Li   OS << "<tr><td class=\"num\" id=\"LN"
214*67e74705SXin Li      << LineNo << "\">"
215*67e74705SXin Li      << LineNo << "</td><td class=\"line\">";
216*67e74705SXin Li 
217*67e74705SXin Li   if (B == E) { // Handle empty lines.
218*67e74705SXin Li     OS << " </td></tr>";
219*67e74705SXin Li     RB.InsertTextBefore(B, OS.str());
220*67e74705SXin Li   } else {
221*67e74705SXin Li     RB.InsertTextBefore(B, OS.str());
222*67e74705SXin Li     RB.InsertTextBefore(E, "</td></tr>");
223*67e74705SXin Li   }
224*67e74705SXin Li }
225*67e74705SXin Li 
AddLineNumbers(Rewriter & R,FileID FID)226*67e74705SXin Li void html::AddLineNumbers(Rewriter& R, FileID FID) {
227*67e74705SXin Li 
228*67e74705SXin Li   const llvm::MemoryBuffer *Buf = R.getSourceMgr().getBuffer(FID);
229*67e74705SXin Li   const char* FileBeg = Buf->getBufferStart();
230*67e74705SXin Li   const char* FileEnd = Buf->getBufferEnd();
231*67e74705SXin Li   const char* C = FileBeg;
232*67e74705SXin Li   RewriteBuffer &RB = R.getEditBuffer(FID);
233*67e74705SXin Li 
234*67e74705SXin Li   assert (C <= FileEnd);
235*67e74705SXin Li 
236*67e74705SXin Li   unsigned LineNo = 0;
237*67e74705SXin Li   unsigned FilePos = 0;
238*67e74705SXin Li 
239*67e74705SXin Li   while (C != FileEnd) {
240*67e74705SXin Li 
241*67e74705SXin Li     ++LineNo;
242*67e74705SXin Li     unsigned LineStartPos = FilePos;
243*67e74705SXin Li     unsigned LineEndPos = FileEnd - FileBeg;
244*67e74705SXin Li 
245*67e74705SXin Li     assert (FilePos <= LineEndPos);
246*67e74705SXin Li     assert (C < FileEnd);
247*67e74705SXin Li 
248*67e74705SXin Li     // Scan until the newline (or end-of-file).
249*67e74705SXin Li 
250*67e74705SXin Li     while (C != FileEnd) {
251*67e74705SXin Li       char c = *C;
252*67e74705SXin Li       ++C;
253*67e74705SXin Li 
254*67e74705SXin Li       if (c == '\n') {
255*67e74705SXin Li         LineEndPos = FilePos++;
256*67e74705SXin Li         break;
257*67e74705SXin Li       }
258*67e74705SXin Li 
259*67e74705SXin Li       ++FilePos;
260*67e74705SXin Li     }
261*67e74705SXin Li 
262*67e74705SXin Li     AddLineNumber(RB, LineNo, LineStartPos, LineEndPos);
263*67e74705SXin Li   }
264*67e74705SXin Li 
265*67e74705SXin Li   // Add one big table tag that surrounds all of the code.
266*67e74705SXin Li   RB.InsertTextBefore(0, "<table class=\"code\">\n");
267*67e74705SXin Li   RB.InsertTextAfter(FileEnd - FileBeg, "</table>");
268*67e74705SXin Li }
269*67e74705SXin Li 
AddHeaderFooterInternalBuiltinCSS(Rewriter & R,FileID FID,const char * title)270*67e74705SXin Li void html::AddHeaderFooterInternalBuiltinCSS(Rewriter& R, FileID FID,
271*67e74705SXin Li                                              const char *title) {
272*67e74705SXin Li 
273*67e74705SXin Li   const llvm::MemoryBuffer *Buf = R.getSourceMgr().getBuffer(FID);
274*67e74705SXin Li   const char* FileStart = Buf->getBufferStart();
275*67e74705SXin Li   const char* FileEnd = Buf->getBufferEnd();
276*67e74705SXin Li 
277*67e74705SXin Li   SourceLocation StartLoc = R.getSourceMgr().getLocForStartOfFile(FID);
278*67e74705SXin Li   SourceLocation EndLoc = StartLoc.getLocWithOffset(FileEnd-FileStart);
279*67e74705SXin Li 
280*67e74705SXin Li   std::string s;
281*67e74705SXin Li   llvm::raw_string_ostream os(s);
282*67e74705SXin Li   os << "<!doctype html>\n" // Use HTML 5 doctype
283*67e74705SXin Li         "<html>\n<head>\n";
284*67e74705SXin Li 
285*67e74705SXin Li   if (title)
286*67e74705SXin Li     os << "<title>" << html::EscapeText(title) << "</title>\n";
287*67e74705SXin Li 
288*67e74705SXin Li   os << "<style type=\"text/css\">\n"
289*67e74705SXin Li       " body { color:#000000; background-color:#ffffff }\n"
290*67e74705SXin Li       " body { font-family:Helvetica, sans-serif; font-size:10pt }\n"
291*67e74705SXin Li       " h1 { font-size:14pt }\n"
292*67e74705SXin Li       " .code { border-collapse:collapse; width:100%; }\n"
293*67e74705SXin Li       " .code { font-family: \"Monospace\", monospace; font-size:10pt }\n"
294*67e74705SXin Li       " .code { line-height: 1.2em }\n"
295*67e74705SXin Li       " .comment { color: green; font-style: oblique }\n"
296*67e74705SXin Li       " .keyword { color: blue }\n"
297*67e74705SXin Li       " .string_literal { color: red }\n"
298*67e74705SXin Li       " .directive { color: darkmagenta }\n"
299*67e74705SXin Li       // Macro expansions.
300*67e74705SXin Li       " .expansion { display: none; }\n"
301*67e74705SXin Li       " .macro:hover .expansion { display: block; border: 2px solid #FF0000; "
302*67e74705SXin Li           "padding: 2px; background-color:#FFF0F0; font-weight: normal; "
303*67e74705SXin Li           "  -webkit-border-radius:5px;  -webkit-box-shadow:1px 1px 7px #000; "
304*67e74705SXin Li           "position: absolute; top: -1em; left:10em; z-index: 1 } \n"
305*67e74705SXin Li       " .macro { color: darkmagenta; background-color:LemonChiffon;"
306*67e74705SXin Li              // Macros are position: relative to provide base for expansions.
307*67e74705SXin Li              " position: relative }\n"
308*67e74705SXin Li       " .num { width:2.5em; padding-right:2ex; background-color:#eeeeee }\n"
309*67e74705SXin Li       " .num { text-align:right; font-size:8pt }\n"
310*67e74705SXin Li       " .num { color:#444444 }\n"
311*67e74705SXin Li       " .line { padding-left: 1ex; border-left: 3px solid #ccc }\n"
312*67e74705SXin Li       " .line { white-space: pre }\n"
313*67e74705SXin Li       " .msg { -webkit-box-shadow:1px 1px 7px #000 }\n"
314*67e74705SXin Li       " .msg { -webkit-border-radius:5px }\n"
315*67e74705SXin Li       " .msg { font-family:Helvetica, sans-serif; font-size:8pt }\n"
316*67e74705SXin Li       " .msg { float:left }\n"
317*67e74705SXin Li       " .msg { padding:0.25em 1ex 0.25em 1ex }\n"
318*67e74705SXin Li       " .msg { margin-top:10px; margin-bottom:10px }\n"
319*67e74705SXin Li       " .msg { font-weight:bold }\n"
320*67e74705SXin Li       " .msg { max-width:60em; word-wrap: break-word; white-space: pre-wrap }\n"
321*67e74705SXin Li       " .msgT { padding:0x; spacing:0x }\n"
322*67e74705SXin Li       " .msgEvent { background-color:#fff8b4; color:#000000 }\n"
323*67e74705SXin Li       " .msgControl { background-color:#bbbbbb; color:#000000 }\n"
324*67e74705SXin Li       " .mrange { background-color:#dfddf3 }\n"
325*67e74705SXin Li       " .mrange { border-bottom:1px solid #6F9DBE }\n"
326*67e74705SXin Li       " .PathIndex { font-weight: bold; padding:0px 5px; "
327*67e74705SXin Li         "margin-right:5px; }\n"
328*67e74705SXin Li       " .PathIndex { -webkit-border-radius:8px }\n"
329*67e74705SXin Li       " .PathIndexEvent { background-color:#bfba87 }\n"
330*67e74705SXin Li       " .PathIndexControl { background-color:#8c8c8c }\n"
331*67e74705SXin Li       " .PathNav a { text-decoration:none; font-size: larger }\n"
332*67e74705SXin Li       " .CodeInsertionHint { font-weight: bold; background-color: #10dd10 }\n"
333*67e74705SXin Li       " .CodeRemovalHint { background-color:#de1010 }\n"
334*67e74705SXin Li       " .CodeRemovalHint { border-bottom:1px solid #6F9DBE }\n"
335*67e74705SXin Li       " table.simpletable {\n"
336*67e74705SXin Li       "   padding: 5px;\n"
337*67e74705SXin Li       "   font-size:12pt;\n"
338*67e74705SXin Li       "   margin:20px;\n"
339*67e74705SXin Li       "   border-collapse: collapse; border-spacing: 0px;\n"
340*67e74705SXin Li       " }\n"
341*67e74705SXin Li       " td.rowname {\n"
342*67e74705SXin Li       "   text-align:right; font-weight:bold; color:#444444;\n"
343*67e74705SXin Li       "   padding-right:2ex; }\n"
344*67e74705SXin Li       "</style>\n</head>\n<body>";
345*67e74705SXin Li 
346*67e74705SXin Li   // Generate header
347*67e74705SXin Li   R.InsertTextBefore(StartLoc, os.str());
348*67e74705SXin Li   // Generate footer
349*67e74705SXin Li 
350*67e74705SXin Li   R.InsertTextAfter(EndLoc, "</body></html>\n");
351*67e74705SXin Li }
352*67e74705SXin Li 
353*67e74705SXin Li /// SyntaxHighlight - Relex the specified FileID and annotate the HTML with
354*67e74705SXin Li /// information about keywords, macro expansions etc.  This uses the macro
355*67e74705SXin Li /// table state from the end of the file, so it won't be perfectly perfect,
356*67e74705SXin Li /// but it will be reasonably close.
SyntaxHighlight(Rewriter & R,FileID FID,const Preprocessor & PP)357*67e74705SXin Li void html::SyntaxHighlight(Rewriter &R, FileID FID, const Preprocessor &PP) {
358*67e74705SXin Li   RewriteBuffer &RB = R.getEditBuffer(FID);
359*67e74705SXin Li 
360*67e74705SXin Li   const SourceManager &SM = PP.getSourceManager();
361*67e74705SXin Li   const llvm::MemoryBuffer *FromFile = SM.getBuffer(FID);
362*67e74705SXin Li   Lexer L(FID, FromFile, SM, PP.getLangOpts());
363*67e74705SXin Li   const char *BufferStart = L.getBuffer().data();
364*67e74705SXin Li 
365*67e74705SXin Li   // Inform the preprocessor that we want to retain comments as tokens, so we
366*67e74705SXin Li   // can highlight them.
367*67e74705SXin Li   L.SetCommentRetentionState(true);
368*67e74705SXin Li 
369*67e74705SXin Li   // Lex all the tokens in raw mode, to avoid entering #includes or expanding
370*67e74705SXin Li   // macros.
371*67e74705SXin Li   Token Tok;
372*67e74705SXin Li   L.LexFromRawLexer(Tok);
373*67e74705SXin Li 
374*67e74705SXin Li   while (Tok.isNot(tok::eof)) {
375*67e74705SXin Li     // Since we are lexing unexpanded tokens, all tokens are from the main
376*67e74705SXin Li     // FileID.
377*67e74705SXin Li     unsigned TokOffs = SM.getFileOffset(Tok.getLocation());
378*67e74705SXin Li     unsigned TokLen = Tok.getLength();
379*67e74705SXin Li     switch (Tok.getKind()) {
380*67e74705SXin Li     default: break;
381*67e74705SXin Li     case tok::identifier:
382*67e74705SXin Li       llvm_unreachable("tok::identifier in raw lexing mode!");
383*67e74705SXin Li     case tok::raw_identifier: {
384*67e74705SXin Li       // Fill in Result.IdentifierInfo and update the token kind,
385*67e74705SXin Li       // looking up the identifier in the identifier table.
386*67e74705SXin Li       PP.LookUpIdentifierInfo(Tok);
387*67e74705SXin Li 
388*67e74705SXin Li       // If this is a pp-identifier, for a keyword, highlight it as such.
389*67e74705SXin Li       if (Tok.isNot(tok::identifier))
390*67e74705SXin Li         HighlightRange(RB, TokOffs, TokOffs+TokLen, BufferStart,
391*67e74705SXin Li                        "<span class='keyword'>", "</span>");
392*67e74705SXin Li       break;
393*67e74705SXin Li     }
394*67e74705SXin Li     case tok::comment:
395*67e74705SXin Li       HighlightRange(RB, TokOffs, TokOffs+TokLen, BufferStart,
396*67e74705SXin Li                      "<span class='comment'>", "</span>");
397*67e74705SXin Li       break;
398*67e74705SXin Li     case tok::utf8_string_literal:
399*67e74705SXin Li       // Chop off the u part of u8 prefix
400*67e74705SXin Li       ++TokOffs;
401*67e74705SXin Li       --TokLen;
402*67e74705SXin Li       // FALL THROUGH to chop the 8
403*67e74705SXin Li     case tok::wide_string_literal:
404*67e74705SXin Li     case tok::utf16_string_literal:
405*67e74705SXin Li     case tok::utf32_string_literal:
406*67e74705SXin Li       // Chop off the L, u, U or 8 prefix
407*67e74705SXin Li       ++TokOffs;
408*67e74705SXin Li       --TokLen;
409*67e74705SXin Li       // FALL THROUGH.
410*67e74705SXin Li     case tok::string_literal:
411*67e74705SXin Li       // FIXME: Exclude the optional ud-suffix from the highlighted range.
412*67e74705SXin Li       HighlightRange(RB, TokOffs, TokOffs+TokLen, BufferStart,
413*67e74705SXin Li                      "<span class='string_literal'>", "</span>");
414*67e74705SXin Li       break;
415*67e74705SXin Li     case tok::hash: {
416*67e74705SXin Li       // If this is a preprocessor directive, all tokens to end of line are too.
417*67e74705SXin Li       if (!Tok.isAtStartOfLine())
418*67e74705SXin Li         break;
419*67e74705SXin Li 
420*67e74705SXin Li       // Eat all of the tokens until we get to the next one at the start of
421*67e74705SXin Li       // line.
422*67e74705SXin Li       unsigned TokEnd = TokOffs+TokLen;
423*67e74705SXin Li       L.LexFromRawLexer(Tok);
424*67e74705SXin Li       while (!Tok.isAtStartOfLine() && Tok.isNot(tok::eof)) {
425*67e74705SXin Li         TokEnd = SM.getFileOffset(Tok.getLocation())+Tok.getLength();
426*67e74705SXin Li         L.LexFromRawLexer(Tok);
427*67e74705SXin Li       }
428*67e74705SXin Li 
429*67e74705SXin Li       // Find end of line.  This is a hack.
430*67e74705SXin Li       HighlightRange(RB, TokOffs, TokEnd, BufferStart,
431*67e74705SXin Li                      "<span class='directive'>", "</span>");
432*67e74705SXin Li 
433*67e74705SXin Li       // Don't skip the next token.
434*67e74705SXin Li       continue;
435*67e74705SXin Li     }
436*67e74705SXin Li     }
437*67e74705SXin Li 
438*67e74705SXin Li     L.LexFromRawLexer(Tok);
439*67e74705SXin Li   }
440*67e74705SXin Li }
441*67e74705SXin Li 
442*67e74705SXin Li /// HighlightMacros - This uses the macro table state from the end of the
443*67e74705SXin Li /// file, to re-expand macros and insert (into the HTML) information about the
444*67e74705SXin Li /// macro expansions.  This won't be perfectly perfect, but it will be
445*67e74705SXin Li /// reasonably close.
HighlightMacros(Rewriter & R,FileID FID,const Preprocessor & PP)446*67e74705SXin Li void html::HighlightMacros(Rewriter &R, FileID FID, const Preprocessor& PP) {
447*67e74705SXin Li   // Re-lex the raw token stream into a token buffer.
448*67e74705SXin Li   const SourceManager &SM = PP.getSourceManager();
449*67e74705SXin Li   std::vector<Token> TokenStream;
450*67e74705SXin Li 
451*67e74705SXin Li   const llvm::MemoryBuffer *FromFile = SM.getBuffer(FID);
452*67e74705SXin Li   Lexer L(FID, FromFile, SM, PP.getLangOpts());
453*67e74705SXin Li 
454*67e74705SXin Li   // Lex all the tokens in raw mode, to avoid entering #includes or expanding
455*67e74705SXin Li   // macros.
456*67e74705SXin Li   while (1) {
457*67e74705SXin Li     Token Tok;
458*67e74705SXin Li     L.LexFromRawLexer(Tok);
459*67e74705SXin Li 
460*67e74705SXin Li     // If this is a # at the start of a line, discard it from the token stream.
461*67e74705SXin Li     // We don't want the re-preprocess step to see #defines, #includes or other
462*67e74705SXin Li     // preprocessor directives.
463*67e74705SXin Li     if (Tok.is(tok::hash) && Tok.isAtStartOfLine())
464*67e74705SXin Li       continue;
465*67e74705SXin Li 
466*67e74705SXin Li     // If this is a ## token, change its kind to unknown so that repreprocessing
467*67e74705SXin Li     // it will not produce an error.
468*67e74705SXin Li     if (Tok.is(tok::hashhash))
469*67e74705SXin Li       Tok.setKind(tok::unknown);
470*67e74705SXin Li 
471*67e74705SXin Li     // If this raw token is an identifier, the raw lexer won't have looked up
472*67e74705SXin Li     // the corresponding identifier info for it.  Do this now so that it will be
473*67e74705SXin Li     // macro expanded when we re-preprocess it.
474*67e74705SXin Li     if (Tok.is(tok::raw_identifier))
475*67e74705SXin Li       PP.LookUpIdentifierInfo(Tok);
476*67e74705SXin Li 
477*67e74705SXin Li     TokenStream.push_back(Tok);
478*67e74705SXin Li 
479*67e74705SXin Li     if (Tok.is(tok::eof)) break;
480*67e74705SXin Li   }
481*67e74705SXin Li 
482*67e74705SXin Li   // Temporarily change the diagnostics object so that we ignore any generated
483*67e74705SXin Li   // diagnostics from this pass.
484*67e74705SXin Li   DiagnosticsEngine TmpDiags(PP.getDiagnostics().getDiagnosticIDs(),
485*67e74705SXin Li                              &PP.getDiagnostics().getDiagnosticOptions(),
486*67e74705SXin Li                       new IgnoringDiagConsumer);
487*67e74705SXin Li 
488*67e74705SXin Li   // FIXME: This is a huge hack; we reuse the input preprocessor because we want
489*67e74705SXin Li   // its state, but we aren't actually changing it (we hope). This should really
490*67e74705SXin Li   // construct a copy of the preprocessor.
491*67e74705SXin Li   Preprocessor &TmpPP = const_cast<Preprocessor&>(PP);
492*67e74705SXin Li   DiagnosticsEngine *OldDiags = &TmpPP.getDiagnostics();
493*67e74705SXin Li   TmpPP.setDiagnostics(TmpDiags);
494*67e74705SXin Li 
495*67e74705SXin Li   // Inform the preprocessor that we don't want comments.
496*67e74705SXin Li   TmpPP.SetCommentRetentionState(false, false);
497*67e74705SXin Li 
498*67e74705SXin Li   // We don't want pragmas either. Although we filtered out #pragma, removing
499*67e74705SXin Li   // _Pragma and __pragma is much harder.
500*67e74705SXin Li   bool PragmasPreviouslyEnabled = TmpPP.getPragmasEnabled();
501*67e74705SXin Li   TmpPP.setPragmasEnabled(false);
502*67e74705SXin Li 
503*67e74705SXin Li   // Enter the tokens we just lexed.  This will cause them to be macro expanded
504*67e74705SXin Li   // but won't enter sub-files (because we removed #'s).
505*67e74705SXin Li   TmpPP.EnterTokenStream(TokenStream, false);
506*67e74705SXin Li 
507*67e74705SXin Li   TokenConcatenation ConcatInfo(TmpPP);
508*67e74705SXin Li 
509*67e74705SXin Li   // Lex all the tokens.
510*67e74705SXin Li   Token Tok;
511*67e74705SXin Li   TmpPP.Lex(Tok);
512*67e74705SXin Li   while (Tok.isNot(tok::eof)) {
513*67e74705SXin Li     // Ignore non-macro tokens.
514*67e74705SXin Li     if (!Tok.getLocation().isMacroID()) {
515*67e74705SXin Li       TmpPP.Lex(Tok);
516*67e74705SXin Li       continue;
517*67e74705SXin Li     }
518*67e74705SXin Li 
519*67e74705SXin Li     // Okay, we have the first token of a macro expansion: highlight the
520*67e74705SXin Li     // expansion by inserting a start tag before the macro expansion and
521*67e74705SXin Li     // end tag after it.
522*67e74705SXin Li     std::pair<SourceLocation, SourceLocation> LLoc =
523*67e74705SXin Li       SM.getExpansionRange(Tok.getLocation());
524*67e74705SXin Li 
525*67e74705SXin Li     // Ignore tokens whose instantiation location was not the main file.
526*67e74705SXin Li     if (SM.getFileID(LLoc.first) != FID) {
527*67e74705SXin Li       TmpPP.Lex(Tok);
528*67e74705SXin Li       continue;
529*67e74705SXin Li     }
530*67e74705SXin Li 
531*67e74705SXin Li     assert(SM.getFileID(LLoc.second) == FID &&
532*67e74705SXin Li            "Start and end of expansion must be in the same ultimate file!");
533*67e74705SXin Li 
534*67e74705SXin Li     std::string Expansion = EscapeText(TmpPP.getSpelling(Tok));
535*67e74705SXin Li     unsigned LineLen = Expansion.size();
536*67e74705SXin Li 
537*67e74705SXin Li     Token PrevPrevTok;
538*67e74705SXin Li     Token PrevTok = Tok;
539*67e74705SXin Li     // Okay, eat this token, getting the next one.
540*67e74705SXin Li     TmpPP.Lex(Tok);
541*67e74705SXin Li 
542*67e74705SXin Li     // Skip all the rest of the tokens that are part of this macro
543*67e74705SXin Li     // instantiation.  It would be really nice to pop up a window with all the
544*67e74705SXin Li     // spelling of the tokens or something.
545*67e74705SXin Li     while (!Tok.is(tok::eof) &&
546*67e74705SXin Li            SM.getExpansionLoc(Tok.getLocation()) == LLoc.first) {
547*67e74705SXin Li       // Insert a newline if the macro expansion is getting large.
548*67e74705SXin Li       if (LineLen > 60) {
549*67e74705SXin Li         Expansion += "<br>";
550*67e74705SXin Li         LineLen = 0;
551*67e74705SXin Li       }
552*67e74705SXin Li 
553*67e74705SXin Li       LineLen -= Expansion.size();
554*67e74705SXin Li 
555*67e74705SXin Li       // If the tokens were already space separated, or if they must be to avoid
556*67e74705SXin Li       // them being implicitly pasted, add a space between them.
557*67e74705SXin Li       if (Tok.hasLeadingSpace() ||
558*67e74705SXin Li           ConcatInfo.AvoidConcat(PrevPrevTok, PrevTok, Tok))
559*67e74705SXin Li         Expansion += ' ';
560*67e74705SXin Li 
561*67e74705SXin Li       // Escape any special characters in the token text.
562*67e74705SXin Li       Expansion += EscapeText(TmpPP.getSpelling(Tok));
563*67e74705SXin Li       LineLen += Expansion.size();
564*67e74705SXin Li 
565*67e74705SXin Li       PrevPrevTok = PrevTok;
566*67e74705SXin Li       PrevTok = Tok;
567*67e74705SXin Li       TmpPP.Lex(Tok);
568*67e74705SXin Li     }
569*67e74705SXin Li 
570*67e74705SXin Li 
571*67e74705SXin Li     // Insert the expansion as the end tag, so that multi-line macros all get
572*67e74705SXin Li     // highlighted.
573*67e74705SXin Li     Expansion = "<span class='expansion'>" + Expansion + "</span></span>";
574*67e74705SXin Li 
575*67e74705SXin Li     HighlightRange(R, LLoc.first, LLoc.second,
576*67e74705SXin Li                    "<span class='macro'>", Expansion.c_str());
577*67e74705SXin Li   }
578*67e74705SXin Li 
579*67e74705SXin Li   // Restore the preprocessor's old state.
580*67e74705SXin Li   TmpPP.setDiagnostics(*OldDiags);
581*67e74705SXin Li   TmpPP.setPragmasEnabled(PragmasPreviouslyEnabled);
582*67e74705SXin Li }
583