1*67e74705SXin Li //== HTMLRewrite.cpp - Translate source code into prettified HTML --*- C++ -*-//
2*67e74705SXin Li //
3*67e74705SXin Li // The LLVM Compiler Infrastructure
4*67e74705SXin Li //
5*67e74705SXin Li // This file is distributed under the University of Illinois Open Source
6*67e74705SXin Li // License. See LICENSE.TXT for details.
7*67e74705SXin Li //
8*67e74705SXin Li //===----------------------------------------------------------------------===//
9*67e74705SXin Li //
10*67e74705SXin Li // This file defines the HTMLRewriter class, which is used to translate the
11*67e74705SXin Li // text of a source file into prettified HTML.
12*67e74705SXin Li //
13*67e74705SXin Li //===----------------------------------------------------------------------===//
14*67e74705SXin Li
15*67e74705SXin Li #include "clang/Rewrite/Core/HTMLRewrite.h"
16*67e74705SXin Li #include "clang/Basic/SourceManager.h"
17*67e74705SXin Li #include "clang/Lex/Preprocessor.h"
18*67e74705SXin Li #include "clang/Lex/TokenConcatenation.h"
19*67e74705SXin Li #include "clang/Rewrite/Core/Rewriter.h"
20*67e74705SXin Li #include "llvm/ADT/SmallString.h"
21*67e74705SXin Li #include "llvm/Support/ErrorHandling.h"
22*67e74705SXin Li #include "llvm/Support/MemoryBuffer.h"
23*67e74705SXin Li #include "llvm/Support/raw_ostream.h"
24*67e74705SXin Li #include <memory>
25*67e74705SXin Li using namespace clang;
26*67e74705SXin Li
27*67e74705SXin Li
28*67e74705SXin Li /// HighlightRange - Highlight a range in the source code with the specified
29*67e74705SXin Li /// start/end tags. B/E must be in the same file. This ensures that
30*67e74705SXin Li /// start/end tags are placed at the start/end of each line if the range is
31*67e74705SXin Li /// multiline.
HighlightRange(Rewriter & R,SourceLocation B,SourceLocation E,const char * StartTag,const char * EndTag)32*67e74705SXin Li void html::HighlightRange(Rewriter &R, SourceLocation B, SourceLocation E,
33*67e74705SXin Li const char *StartTag, const char *EndTag) {
34*67e74705SXin Li SourceManager &SM = R.getSourceMgr();
35*67e74705SXin Li B = SM.getExpansionLoc(B);
36*67e74705SXin Li E = SM.getExpansionLoc(E);
37*67e74705SXin Li FileID FID = SM.getFileID(B);
38*67e74705SXin Li assert(SM.getFileID(E) == FID && "B/E not in the same file!");
39*67e74705SXin Li
40*67e74705SXin Li unsigned BOffset = SM.getFileOffset(B);
41*67e74705SXin Li unsigned EOffset = SM.getFileOffset(E);
42*67e74705SXin Li
43*67e74705SXin Li // Include the whole end token in the range.
44*67e74705SXin Li EOffset += Lexer::MeasureTokenLength(E, R.getSourceMgr(), R.getLangOpts());
45*67e74705SXin Li
46*67e74705SXin Li bool Invalid = false;
47*67e74705SXin Li const char *BufferStart = SM.getBufferData(FID, &Invalid).data();
48*67e74705SXin Li if (Invalid)
49*67e74705SXin Li return;
50*67e74705SXin Li
51*67e74705SXin Li HighlightRange(R.getEditBuffer(FID), BOffset, EOffset,
52*67e74705SXin Li BufferStart, StartTag, EndTag);
53*67e74705SXin Li }
54*67e74705SXin Li
55*67e74705SXin Li /// HighlightRange - This is the same as the above method, but takes
56*67e74705SXin Li /// decomposed file locations.
HighlightRange(RewriteBuffer & RB,unsigned B,unsigned E,const char * BufferStart,const char * StartTag,const char * EndTag)57*67e74705SXin Li void html::HighlightRange(RewriteBuffer &RB, unsigned B, unsigned E,
58*67e74705SXin Li const char *BufferStart,
59*67e74705SXin Li const char *StartTag, const char *EndTag) {
60*67e74705SXin Li // Insert the tag at the absolute start/end of the range.
61*67e74705SXin Li RB.InsertTextAfter(B, StartTag);
62*67e74705SXin Li RB.InsertTextBefore(E, EndTag);
63*67e74705SXin Li
64*67e74705SXin Li // Scan the range to see if there is a \r or \n. If so, and if the line is
65*67e74705SXin Li // not blank, insert tags on that line as well.
66*67e74705SXin Li bool HadOpenTag = true;
67*67e74705SXin Li
68*67e74705SXin Li unsigned LastNonWhiteSpace = B;
69*67e74705SXin Li for (unsigned i = B; i != E; ++i) {
70*67e74705SXin Li switch (BufferStart[i]) {
71*67e74705SXin Li case '\r':
72*67e74705SXin Li case '\n':
73*67e74705SXin Li // Okay, we found a newline in the range. If we have an open tag, we need
74*67e74705SXin Li // to insert a close tag at the first non-whitespace before the newline.
75*67e74705SXin Li if (HadOpenTag)
76*67e74705SXin Li RB.InsertTextBefore(LastNonWhiteSpace+1, EndTag);
77*67e74705SXin Li
78*67e74705SXin Li // Instead of inserting an open tag immediately after the newline, we
79*67e74705SXin Li // wait until we see a non-whitespace character. This prevents us from
80*67e74705SXin Li // inserting tags around blank lines, and also allows the open tag to
81*67e74705SXin Li // be put *after* whitespace on a non-blank line.
82*67e74705SXin Li HadOpenTag = false;
83*67e74705SXin Li break;
84*67e74705SXin Li case '\0':
85*67e74705SXin Li case ' ':
86*67e74705SXin Li case '\t':
87*67e74705SXin Li case '\f':
88*67e74705SXin Li case '\v':
89*67e74705SXin Li // Ignore whitespace.
90*67e74705SXin Li break;
91*67e74705SXin Li
92*67e74705SXin Li default:
93*67e74705SXin Li // If there is no tag open, do it now.
94*67e74705SXin Li if (!HadOpenTag) {
95*67e74705SXin Li RB.InsertTextAfter(i, StartTag);
96*67e74705SXin Li HadOpenTag = true;
97*67e74705SXin Li }
98*67e74705SXin Li
99*67e74705SXin Li // Remember this character.
100*67e74705SXin Li LastNonWhiteSpace = i;
101*67e74705SXin Li break;
102*67e74705SXin Li }
103*67e74705SXin Li }
104*67e74705SXin Li }
105*67e74705SXin Li
EscapeText(Rewriter & R,FileID FID,bool EscapeSpaces,bool ReplaceTabs)106*67e74705SXin Li void html::EscapeText(Rewriter &R, FileID FID,
107*67e74705SXin Li bool EscapeSpaces, bool ReplaceTabs) {
108*67e74705SXin Li
109*67e74705SXin Li const llvm::MemoryBuffer *Buf = R.getSourceMgr().getBuffer(FID);
110*67e74705SXin Li const char* C = Buf->getBufferStart();
111*67e74705SXin Li const char* FileEnd = Buf->getBufferEnd();
112*67e74705SXin Li
113*67e74705SXin Li assert (C <= FileEnd);
114*67e74705SXin Li
115*67e74705SXin Li RewriteBuffer &RB = R.getEditBuffer(FID);
116*67e74705SXin Li
117*67e74705SXin Li unsigned ColNo = 0;
118*67e74705SXin Li for (unsigned FilePos = 0; C != FileEnd ; ++C, ++FilePos) {
119*67e74705SXin Li switch (*C) {
120*67e74705SXin Li default: ++ColNo; break;
121*67e74705SXin Li case '\n':
122*67e74705SXin Li case '\r':
123*67e74705SXin Li ColNo = 0;
124*67e74705SXin Li break;
125*67e74705SXin Li
126*67e74705SXin Li case ' ':
127*67e74705SXin Li if (EscapeSpaces)
128*67e74705SXin Li RB.ReplaceText(FilePos, 1, " ");
129*67e74705SXin Li ++ColNo;
130*67e74705SXin Li break;
131*67e74705SXin Li case '\f':
132*67e74705SXin Li RB.ReplaceText(FilePos, 1, "<hr>");
133*67e74705SXin Li ColNo = 0;
134*67e74705SXin Li break;
135*67e74705SXin Li
136*67e74705SXin Li case '\t': {
137*67e74705SXin Li if (!ReplaceTabs)
138*67e74705SXin Li break;
139*67e74705SXin Li unsigned NumSpaces = 8-(ColNo&7);
140*67e74705SXin Li if (EscapeSpaces)
141*67e74705SXin Li RB.ReplaceText(FilePos, 1,
142*67e74705SXin Li StringRef(" "
143*67e74705SXin Li " ", 6*NumSpaces));
144*67e74705SXin Li else
145*67e74705SXin Li RB.ReplaceText(FilePos, 1, StringRef(" ", NumSpaces));
146*67e74705SXin Li ColNo += NumSpaces;
147*67e74705SXin Li break;
148*67e74705SXin Li }
149*67e74705SXin Li case '<':
150*67e74705SXin Li RB.ReplaceText(FilePos, 1, "<");
151*67e74705SXin Li ++ColNo;
152*67e74705SXin Li break;
153*67e74705SXin Li
154*67e74705SXin Li case '>':
155*67e74705SXin Li RB.ReplaceText(FilePos, 1, ">");
156*67e74705SXin Li ++ColNo;
157*67e74705SXin Li break;
158*67e74705SXin Li
159*67e74705SXin Li case '&':
160*67e74705SXin Li RB.ReplaceText(FilePos, 1, "&");
161*67e74705SXin Li ++ColNo;
162*67e74705SXin Li break;
163*67e74705SXin Li }
164*67e74705SXin Li }
165*67e74705SXin Li }
166*67e74705SXin Li
EscapeText(StringRef s,bool EscapeSpaces,bool ReplaceTabs)167*67e74705SXin Li std::string html::EscapeText(StringRef s, bool EscapeSpaces, bool ReplaceTabs) {
168*67e74705SXin Li
169*67e74705SXin Li unsigned len = s.size();
170*67e74705SXin Li std::string Str;
171*67e74705SXin Li llvm::raw_string_ostream os(Str);
172*67e74705SXin Li
173*67e74705SXin Li for (unsigned i = 0 ; i < len; ++i) {
174*67e74705SXin Li
175*67e74705SXin Li char c = s[i];
176*67e74705SXin Li switch (c) {
177*67e74705SXin Li default:
178*67e74705SXin Li os << c; break;
179*67e74705SXin Li
180*67e74705SXin Li case ' ':
181*67e74705SXin Li if (EscapeSpaces) os << " ";
182*67e74705SXin Li else os << ' ';
183*67e74705SXin Li break;
184*67e74705SXin Li
185*67e74705SXin Li case '\t':
186*67e74705SXin Li if (ReplaceTabs) {
187*67e74705SXin Li if (EscapeSpaces)
188*67e74705SXin Li for (unsigned i = 0; i < 4; ++i)
189*67e74705SXin Li os << " ";
190*67e74705SXin Li else
191*67e74705SXin Li for (unsigned i = 0; i < 4; ++i)
192*67e74705SXin Li os << " ";
193*67e74705SXin Li }
194*67e74705SXin Li else
195*67e74705SXin Li os << c;
196*67e74705SXin Li
197*67e74705SXin Li break;
198*67e74705SXin Li
199*67e74705SXin Li case '<': os << "<"; break;
200*67e74705SXin Li case '>': os << ">"; break;
201*67e74705SXin Li case '&': os << "&"; break;
202*67e74705SXin Li }
203*67e74705SXin Li }
204*67e74705SXin Li
205*67e74705SXin Li return os.str();
206*67e74705SXin Li }
207*67e74705SXin Li
AddLineNumber(RewriteBuffer & RB,unsigned LineNo,unsigned B,unsigned E)208*67e74705SXin Li static void AddLineNumber(RewriteBuffer &RB, unsigned LineNo,
209*67e74705SXin Li unsigned B, unsigned E) {
210*67e74705SXin Li SmallString<256> Str;
211*67e74705SXin Li llvm::raw_svector_ostream OS(Str);
212*67e74705SXin Li
213*67e74705SXin Li OS << "<tr><td class=\"num\" id=\"LN"
214*67e74705SXin Li << LineNo << "\">"
215*67e74705SXin Li << LineNo << "</td><td class=\"line\">";
216*67e74705SXin Li
217*67e74705SXin Li if (B == E) { // Handle empty lines.
218*67e74705SXin Li OS << " </td></tr>";
219*67e74705SXin Li RB.InsertTextBefore(B, OS.str());
220*67e74705SXin Li } else {
221*67e74705SXin Li RB.InsertTextBefore(B, OS.str());
222*67e74705SXin Li RB.InsertTextBefore(E, "</td></tr>");
223*67e74705SXin Li }
224*67e74705SXin Li }
225*67e74705SXin Li
AddLineNumbers(Rewriter & R,FileID FID)226*67e74705SXin Li void html::AddLineNumbers(Rewriter& R, FileID FID) {
227*67e74705SXin Li
228*67e74705SXin Li const llvm::MemoryBuffer *Buf = R.getSourceMgr().getBuffer(FID);
229*67e74705SXin Li const char* FileBeg = Buf->getBufferStart();
230*67e74705SXin Li const char* FileEnd = Buf->getBufferEnd();
231*67e74705SXin Li const char* C = FileBeg;
232*67e74705SXin Li RewriteBuffer &RB = R.getEditBuffer(FID);
233*67e74705SXin Li
234*67e74705SXin Li assert (C <= FileEnd);
235*67e74705SXin Li
236*67e74705SXin Li unsigned LineNo = 0;
237*67e74705SXin Li unsigned FilePos = 0;
238*67e74705SXin Li
239*67e74705SXin Li while (C != FileEnd) {
240*67e74705SXin Li
241*67e74705SXin Li ++LineNo;
242*67e74705SXin Li unsigned LineStartPos = FilePos;
243*67e74705SXin Li unsigned LineEndPos = FileEnd - FileBeg;
244*67e74705SXin Li
245*67e74705SXin Li assert (FilePos <= LineEndPos);
246*67e74705SXin Li assert (C < FileEnd);
247*67e74705SXin Li
248*67e74705SXin Li // Scan until the newline (or end-of-file).
249*67e74705SXin Li
250*67e74705SXin Li while (C != FileEnd) {
251*67e74705SXin Li char c = *C;
252*67e74705SXin Li ++C;
253*67e74705SXin Li
254*67e74705SXin Li if (c == '\n') {
255*67e74705SXin Li LineEndPos = FilePos++;
256*67e74705SXin Li break;
257*67e74705SXin Li }
258*67e74705SXin Li
259*67e74705SXin Li ++FilePos;
260*67e74705SXin Li }
261*67e74705SXin Li
262*67e74705SXin Li AddLineNumber(RB, LineNo, LineStartPos, LineEndPos);
263*67e74705SXin Li }
264*67e74705SXin Li
265*67e74705SXin Li // Add one big table tag that surrounds all of the code.
266*67e74705SXin Li RB.InsertTextBefore(0, "<table class=\"code\">\n");
267*67e74705SXin Li RB.InsertTextAfter(FileEnd - FileBeg, "</table>");
268*67e74705SXin Li }
269*67e74705SXin Li
AddHeaderFooterInternalBuiltinCSS(Rewriter & R,FileID FID,const char * title)270*67e74705SXin Li void html::AddHeaderFooterInternalBuiltinCSS(Rewriter& R, FileID FID,
271*67e74705SXin Li const char *title) {
272*67e74705SXin Li
273*67e74705SXin Li const llvm::MemoryBuffer *Buf = R.getSourceMgr().getBuffer(FID);
274*67e74705SXin Li const char* FileStart = Buf->getBufferStart();
275*67e74705SXin Li const char* FileEnd = Buf->getBufferEnd();
276*67e74705SXin Li
277*67e74705SXin Li SourceLocation StartLoc = R.getSourceMgr().getLocForStartOfFile(FID);
278*67e74705SXin Li SourceLocation EndLoc = StartLoc.getLocWithOffset(FileEnd-FileStart);
279*67e74705SXin Li
280*67e74705SXin Li std::string s;
281*67e74705SXin Li llvm::raw_string_ostream os(s);
282*67e74705SXin Li os << "<!doctype html>\n" // Use HTML 5 doctype
283*67e74705SXin Li "<html>\n<head>\n";
284*67e74705SXin Li
285*67e74705SXin Li if (title)
286*67e74705SXin Li os << "<title>" << html::EscapeText(title) << "</title>\n";
287*67e74705SXin Li
288*67e74705SXin Li os << "<style type=\"text/css\">\n"
289*67e74705SXin Li " body { color:#000000; background-color:#ffffff }\n"
290*67e74705SXin Li " body { font-family:Helvetica, sans-serif; font-size:10pt }\n"
291*67e74705SXin Li " h1 { font-size:14pt }\n"
292*67e74705SXin Li " .code { border-collapse:collapse; width:100%; }\n"
293*67e74705SXin Li " .code { font-family: \"Monospace\", monospace; font-size:10pt }\n"
294*67e74705SXin Li " .code { line-height: 1.2em }\n"
295*67e74705SXin Li " .comment { color: green; font-style: oblique }\n"
296*67e74705SXin Li " .keyword { color: blue }\n"
297*67e74705SXin Li " .string_literal { color: red }\n"
298*67e74705SXin Li " .directive { color: darkmagenta }\n"
299*67e74705SXin Li // Macro expansions.
300*67e74705SXin Li " .expansion { display: none; }\n"
301*67e74705SXin Li " .macro:hover .expansion { display: block; border: 2px solid #FF0000; "
302*67e74705SXin Li "padding: 2px; background-color:#FFF0F0; font-weight: normal; "
303*67e74705SXin Li " -webkit-border-radius:5px; -webkit-box-shadow:1px 1px 7px #000; "
304*67e74705SXin Li "position: absolute; top: -1em; left:10em; z-index: 1 } \n"
305*67e74705SXin Li " .macro { color: darkmagenta; background-color:LemonChiffon;"
306*67e74705SXin Li // Macros are position: relative to provide base for expansions.
307*67e74705SXin Li " position: relative }\n"
308*67e74705SXin Li " .num { width:2.5em; padding-right:2ex; background-color:#eeeeee }\n"
309*67e74705SXin Li " .num { text-align:right; font-size:8pt }\n"
310*67e74705SXin Li " .num { color:#444444 }\n"
311*67e74705SXin Li " .line { padding-left: 1ex; border-left: 3px solid #ccc }\n"
312*67e74705SXin Li " .line { white-space: pre }\n"
313*67e74705SXin Li " .msg { -webkit-box-shadow:1px 1px 7px #000 }\n"
314*67e74705SXin Li " .msg { -webkit-border-radius:5px }\n"
315*67e74705SXin Li " .msg { font-family:Helvetica, sans-serif; font-size:8pt }\n"
316*67e74705SXin Li " .msg { float:left }\n"
317*67e74705SXin Li " .msg { padding:0.25em 1ex 0.25em 1ex }\n"
318*67e74705SXin Li " .msg { margin-top:10px; margin-bottom:10px }\n"
319*67e74705SXin Li " .msg { font-weight:bold }\n"
320*67e74705SXin Li " .msg { max-width:60em; word-wrap: break-word; white-space: pre-wrap }\n"
321*67e74705SXin Li " .msgT { padding:0x; spacing:0x }\n"
322*67e74705SXin Li " .msgEvent { background-color:#fff8b4; color:#000000 }\n"
323*67e74705SXin Li " .msgControl { background-color:#bbbbbb; color:#000000 }\n"
324*67e74705SXin Li " .mrange { background-color:#dfddf3 }\n"
325*67e74705SXin Li " .mrange { border-bottom:1px solid #6F9DBE }\n"
326*67e74705SXin Li " .PathIndex { font-weight: bold; padding:0px 5px; "
327*67e74705SXin Li "margin-right:5px; }\n"
328*67e74705SXin Li " .PathIndex { -webkit-border-radius:8px }\n"
329*67e74705SXin Li " .PathIndexEvent { background-color:#bfba87 }\n"
330*67e74705SXin Li " .PathIndexControl { background-color:#8c8c8c }\n"
331*67e74705SXin Li " .PathNav a { text-decoration:none; font-size: larger }\n"
332*67e74705SXin Li " .CodeInsertionHint { font-weight: bold; background-color: #10dd10 }\n"
333*67e74705SXin Li " .CodeRemovalHint { background-color:#de1010 }\n"
334*67e74705SXin Li " .CodeRemovalHint { border-bottom:1px solid #6F9DBE }\n"
335*67e74705SXin Li " table.simpletable {\n"
336*67e74705SXin Li " padding: 5px;\n"
337*67e74705SXin Li " font-size:12pt;\n"
338*67e74705SXin Li " margin:20px;\n"
339*67e74705SXin Li " border-collapse: collapse; border-spacing: 0px;\n"
340*67e74705SXin Li " }\n"
341*67e74705SXin Li " td.rowname {\n"
342*67e74705SXin Li " text-align:right; font-weight:bold; color:#444444;\n"
343*67e74705SXin Li " padding-right:2ex; }\n"
344*67e74705SXin Li "</style>\n</head>\n<body>";
345*67e74705SXin Li
346*67e74705SXin Li // Generate header
347*67e74705SXin Li R.InsertTextBefore(StartLoc, os.str());
348*67e74705SXin Li // Generate footer
349*67e74705SXin Li
350*67e74705SXin Li R.InsertTextAfter(EndLoc, "</body></html>\n");
351*67e74705SXin Li }
352*67e74705SXin Li
353*67e74705SXin Li /// SyntaxHighlight - Relex the specified FileID and annotate the HTML with
354*67e74705SXin Li /// information about keywords, macro expansions etc. This uses the macro
355*67e74705SXin Li /// table state from the end of the file, so it won't be perfectly perfect,
356*67e74705SXin Li /// but it will be reasonably close.
SyntaxHighlight(Rewriter & R,FileID FID,const Preprocessor & PP)357*67e74705SXin Li void html::SyntaxHighlight(Rewriter &R, FileID FID, const Preprocessor &PP) {
358*67e74705SXin Li RewriteBuffer &RB = R.getEditBuffer(FID);
359*67e74705SXin Li
360*67e74705SXin Li const SourceManager &SM = PP.getSourceManager();
361*67e74705SXin Li const llvm::MemoryBuffer *FromFile = SM.getBuffer(FID);
362*67e74705SXin Li Lexer L(FID, FromFile, SM, PP.getLangOpts());
363*67e74705SXin Li const char *BufferStart = L.getBuffer().data();
364*67e74705SXin Li
365*67e74705SXin Li // Inform the preprocessor that we want to retain comments as tokens, so we
366*67e74705SXin Li // can highlight them.
367*67e74705SXin Li L.SetCommentRetentionState(true);
368*67e74705SXin Li
369*67e74705SXin Li // Lex all the tokens in raw mode, to avoid entering #includes or expanding
370*67e74705SXin Li // macros.
371*67e74705SXin Li Token Tok;
372*67e74705SXin Li L.LexFromRawLexer(Tok);
373*67e74705SXin Li
374*67e74705SXin Li while (Tok.isNot(tok::eof)) {
375*67e74705SXin Li // Since we are lexing unexpanded tokens, all tokens are from the main
376*67e74705SXin Li // FileID.
377*67e74705SXin Li unsigned TokOffs = SM.getFileOffset(Tok.getLocation());
378*67e74705SXin Li unsigned TokLen = Tok.getLength();
379*67e74705SXin Li switch (Tok.getKind()) {
380*67e74705SXin Li default: break;
381*67e74705SXin Li case tok::identifier:
382*67e74705SXin Li llvm_unreachable("tok::identifier in raw lexing mode!");
383*67e74705SXin Li case tok::raw_identifier: {
384*67e74705SXin Li // Fill in Result.IdentifierInfo and update the token kind,
385*67e74705SXin Li // looking up the identifier in the identifier table.
386*67e74705SXin Li PP.LookUpIdentifierInfo(Tok);
387*67e74705SXin Li
388*67e74705SXin Li // If this is a pp-identifier, for a keyword, highlight it as such.
389*67e74705SXin Li if (Tok.isNot(tok::identifier))
390*67e74705SXin Li HighlightRange(RB, TokOffs, TokOffs+TokLen, BufferStart,
391*67e74705SXin Li "<span class='keyword'>", "</span>");
392*67e74705SXin Li break;
393*67e74705SXin Li }
394*67e74705SXin Li case tok::comment:
395*67e74705SXin Li HighlightRange(RB, TokOffs, TokOffs+TokLen, BufferStart,
396*67e74705SXin Li "<span class='comment'>", "</span>");
397*67e74705SXin Li break;
398*67e74705SXin Li case tok::utf8_string_literal:
399*67e74705SXin Li // Chop off the u part of u8 prefix
400*67e74705SXin Li ++TokOffs;
401*67e74705SXin Li --TokLen;
402*67e74705SXin Li // FALL THROUGH to chop the 8
403*67e74705SXin Li case tok::wide_string_literal:
404*67e74705SXin Li case tok::utf16_string_literal:
405*67e74705SXin Li case tok::utf32_string_literal:
406*67e74705SXin Li // Chop off the L, u, U or 8 prefix
407*67e74705SXin Li ++TokOffs;
408*67e74705SXin Li --TokLen;
409*67e74705SXin Li // FALL THROUGH.
410*67e74705SXin Li case tok::string_literal:
411*67e74705SXin Li // FIXME: Exclude the optional ud-suffix from the highlighted range.
412*67e74705SXin Li HighlightRange(RB, TokOffs, TokOffs+TokLen, BufferStart,
413*67e74705SXin Li "<span class='string_literal'>", "</span>");
414*67e74705SXin Li break;
415*67e74705SXin Li case tok::hash: {
416*67e74705SXin Li // If this is a preprocessor directive, all tokens to end of line are too.
417*67e74705SXin Li if (!Tok.isAtStartOfLine())
418*67e74705SXin Li break;
419*67e74705SXin Li
420*67e74705SXin Li // Eat all of the tokens until we get to the next one at the start of
421*67e74705SXin Li // line.
422*67e74705SXin Li unsigned TokEnd = TokOffs+TokLen;
423*67e74705SXin Li L.LexFromRawLexer(Tok);
424*67e74705SXin Li while (!Tok.isAtStartOfLine() && Tok.isNot(tok::eof)) {
425*67e74705SXin Li TokEnd = SM.getFileOffset(Tok.getLocation())+Tok.getLength();
426*67e74705SXin Li L.LexFromRawLexer(Tok);
427*67e74705SXin Li }
428*67e74705SXin Li
429*67e74705SXin Li // Find end of line. This is a hack.
430*67e74705SXin Li HighlightRange(RB, TokOffs, TokEnd, BufferStart,
431*67e74705SXin Li "<span class='directive'>", "</span>");
432*67e74705SXin Li
433*67e74705SXin Li // Don't skip the next token.
434*67e74705SXin Li continue;
435*67e74705SXin Li }
436*67e74705SXin Li }
437*67e74705SXin Li
438*67e74705SXin Li L.LexFromRawLexer(Tok);
439*67e74705SXin Li }
440*67e74705SXin Li }
441*67e74705SXin Li
442*67e74705SXin Li /// HighlightMacros - This uses the macro table state from the end of the
443*67e74705SXin Li /// file, to re-expand macros and insert (into the HTML) information about the
444*67e74705SXin Li /// macro expansions. This won't be perfectly perfect, but it will be
445*67e74705SXin Li /// reasonably close.
HighlightMacros(Rewriter & R,FileID FID,const Preprocessor & PP)446*67e74705SXin Li void html::HighlightMacros(Rewriter &R, FileID FID, const Preprocessor& PP) {
447*67e74705SXin Li // Re-lex the raw token stream into a token buffer.
448*67e74705SXin Li const SourceManager &SM = PP.getSourceManager();
449*67e74705SXin Li std::vector<Token> TokenStream;
450*67e74705SXin Li
451*67e74705SXin Li const llvm::MemoryBuffer *FromFile = SM.getBuffer(FID);
452*67e74705SXin Li Lexer L(FID, FromFile, SM, PP.getLangOpts());
453*67e74705SXin Li
454*67e74705SXin Li // Lex all the tokens in raw mode, to avoid entering #includes or expanding
455*67e74705SXin Li // macros.
456*67e74705SXin Li while (1) {
457*67e74705SXin Li Token Tok;
458*67e74705SXin Li L.LexFromRawLexer(Tok);
459*67e74705SXin Li
460*67e74705SXin Li // If this is a # at the start of a line, discard it from the token stream.
461*67e74705SXin Li // We don't want the re-preprocess step to see #defines, #includes or other
462*67e74705SXin Li // preprocessor directives.
463*67e74705SXin Li if (Tok.is(tok::hash) && Tok.isAtStartOfLine())
464*67e74705SXin Li continue;
465*67e74705SXin Li
466*67e74705SXin Li // If this is a ## token, change its kind to unknown so that repreprocessing
467*67e74705SXin Li // it will not produce an error.
468*67e74705SXin Li if (Tok.is(tok::hashhash))
469*67e74705SXin Li Tok.setKind(tok::unknown);
470*67e74705SXin Li
471*67e74705SXin Li // If this raw token is an identifier, the raw lexer won't have looked up
472*67e74705SXin Li // the corresponding identifier info for it. Do this now so that it will be
473*67e74705SXin Li // macro expanded when we re-preprocess it.
474*67e74705SXin Li if (Tok.is(tok::raw_identifier))
475*67e74705SXin Li PP.LookUpIdentifierInfo(Tok);
476*67e74705SXin Li
477*67e74705SXin Li TokenStream.push_back(Tok);
478*67e74705SXin Li
479*67e74705SXin Li if (Tok.is(tok::eof)) break;
480*67e74705SXin Li }
481*67e74705SXin Li
482*67e74705SXin Li // Temporarily change the diagnostics object so that we ignore any generated
483*67e74705SXin Li // diagnostics from this pass.
484*67e74705SXin Li DiagnosticsEngine TmpDiags(PP.getDiagnostics().getDiagnosticIDs(),
485*67e74705SXin Li &PP.getDiagnostics().getDiagnosticOptions(),
486*67e74705SXin Li new IgnoringDiagConsumer);
487*67e74705SXin Li
488*67e74705SXin Li // FIXME: This is a huge hack; we reuse the input preprocessor because we want
489*67e74705SXin Li // its state, but we aren't actually changing it (we hope). This should really
490*67e74705SXin Li // construct a copy of the preprocessor.
491*67e74705SXin Li Preprocessor &TmpPP = const_cast<Preprocessor&>(PP);
492*67e74705SXin Li DiagnosticsEngine *OldDiags = &TmpPP.getDiagnostics();
493*67e74705SXin Li TmpPP.setDiagnostics(TmpDiags);
494*67e74705SXin Li
495*67e74705SXin Li // Inform the preprocessor that we don't want comments.
496*67e74705SXin Li TmpPP.SetCommentRetentionState(false, false);
497*67e74705SXin Li
498*67e74705SXin Li // We don't want pragmas either. Although we filtered out #pragma, removing
499*67e74705SXin Li // _Pragma and __pragma is much harder.
500*67e74705SXin Li bool PragmasPreviouslyEnabled = TmpPP.getPragmasEnabled();
501*67e74705SXin Li TmpPP.setPragmasEnabled(false);
502*67e74705SXin Li
503*67e74705SXin Li // Enter the tokens we just lexed. This will cause them to be macro expanded
504*67e74705SXin Li // but won't enter sub-files (because we removed #'s).
505*67e74705SXin Li TmpPP.EnterTokenStream(TokenStream, false);
506*67e74705SXin Li
507*67e74705SXin Li TokenConcatenation ConcatInfo(TmpPP);
508*67e74705SXin Li
509*67e74705SXin Li // Lex all the tokens.
510*67e74705SXin Li Token Tok;
511*67e74705SXin Li TmpPP.Lex(Tok);
512*67e74705SXin Li while (Tok.isNot(tok::eof)) {
513*67e74705SXin Li // Ignore non-macro tokens.
514*67e74705SXin Li if (!Tok.getLocation().isMacroID()) {
515*67e74705SXin Li TmpPP.Lex(Tok);
516*67e74705SXin Li continue;
517*67e74705SXin Li }
518*67e74705SXin Li
519*67e74705SXin Li // Okay, we have the first token of a macro expansion: highlight the
520*67e74705SXin Li // expansion by inserting a start tag before the macro expansion and
521*67e74705SXin Li // end tag after it.
522*67e74705SXin Li std::pair<SourceLocation, SourceLocation> LLoc =
523*67e74705SXin Li SM.getExpansionRange(Tok.getLocation());
524*67e74705SXin Li
525*67e74705SXin Li // Ignore tokens whose instantiation location was not the main file.
526*67e74705SXin Li if (SM.getFileID(LLoc.first) != FID) {
527*67e74705SXin Li TmpPP.Lex(Tok);
528*67e74705SXin Li continue;
529*67e74705SXin Li }
530*67e74705SXin Li
531*67e74705SXin Li assert(SM.getFileID(LLoc.second) == FID &&
532*67e74705SXin Li "Start and end of expansion must be in the same ultimate file!");
533*67e74705SXin Li
534*67e74705SXin Li std::string Expansion = EscapeText(TmpPP.getSpelling(Tok));
535*67e74705SXin Li unsigned LineLen = Expansion.size();
536*67e74705SXin Li
537*67e74705SXin Li Token PrevPrevTok;
538*67e74705SXin Li Token PrevTok = Tok;
539*67e74705SXin Li // Okay, eat this token, getting the next one.
540*67e74705SXin Li TmpPP.Lex(Tok);
541*67e74705SXin Li
542*67e74705SXin Li // Skip all the rest of the tokens that are part of this macro
543*67e74705SXin Li // instantiation. It would be really nice to pop up a window with all the
544*67e74705SXin Li // spelling of the tokens or something.
545*67e74705SXin Li while (!Tok.is(tok::eof) &&
546*67e74705SXin Li SM.getExpansionLoc(Tok.getLocation()) == LLoc.first) {
547*67e74705SXin Li // Insert a newline if the macro expansion is getting large.
548*67e74705SXin Li if (LineLen > 60) {
549*67e74705SXin Li Expansion += "<br>";
550*67e74705SXin Li LineLen = 0;
551*67e74705SXin Li }
552*67e74705SXin Li
553*67e74705SXin Li LineLen -= Expansion.size();
554*67e74705SXin Li
555*67e74705SXin Li // If the tokens were already space separated, or if they must be to avoid
556*67e74705SXin Li // them being implicitly pasted, add a space between them.
557*67e74705SXin Li if (Tok.hasLeadingSpace() ||
558*67e74705SXin Li ConcatInfo.AvoidConcat(PrevPrevTok, PrevTok, Tok))
559*67e74705SXin Li Expansion += ' ';
560*67e74705SXin Li
561*67e74705SXin Li // Escape any special characters in the token text.
562*67e74705SXin Li Expansion += EscapeText(TmpPP.getSpelling(Tok));
563*67e74705SXin Li LineLen += Expansion.size();
564*67e74705SXin Li
565*67e74705SXin Li PrevPrevTok = PrevTok;
566*67e74705SXin Li PrevTok = Tok;
567*67e74705SXin Li TmpPP.Lex(Tok);
568*67e74705SXin Li }
569*67e74705SXin Li
570*67e74705SXin Li
571*67e74705SXin Li // Insert the expansion as the end tag, so that multi-line macros all get
572*67e74705SXin Li // highlighted.
573*67e74705SXin Li Expansion = "<span class='expansion'>" + Expansion + "</span></span>";
574*67e74705SXin Li
575*67e74705SXin Li HighlightRange(R, LLoc.first, LLoc.second,
576*67e74705SXin Li "<span class='macro'>", Expansion.c_str());
577*67e74705SXin Li }
578*67e74705SXin Li
579*67e74705SXin Li // Restore the preprocessor's old state.
580*67e74705SXin Li TmpPP.setDiagnostics(*OldDiags);
581*67e74705SXin Li TmpPP.setPragmasEnabled(PragmasPreviouslyEnabled);
582*67e74705SXin Li }
583