xref: /aosp_15_r20/external/clang/lib/Lex/PTHLexer.cpp (revision 67e74705e28f6214e480b399dd47ea732279e315)
1*67e74705SXin Li //===--- PTHLexer.cpp - Lex from a token stream ---------------------------===//
2*67e74705SXin Li //
3*67e74705SXin Li //                     The LLVM Compiler Infrastructure
4*67e74705SXin Li //
5*67e74705SXin Li // This file is distributed under the University of Illinois Open Source
6*67e74705SXin Li // License. See LICENSE.TXT for details.
7*67e74705SXin Li //
8*67e74705SXin Li //===----------------------------------------------------------------------===//
9*67e74705SXin Li //
10*67e74705SXin Li // This file implements the PTHLexer interface.
11*67e74705SXin Li //
12*67e74705SXin Li //===----------------------------------------------------------------------===//
13*67e74705SXin Li 
14*67e74705SXin Li #include "clang/Lex/PTHLexer.h"
15*67e74705SXin Li #include "clang/Basic/FileManager.h"
16*67e74705SXin Li #include "clang/Basic/FileSystemStatCache.h"
17*67e74705SXin Li #include "clang/Basic/IdentifierTable.h"
18*67e74705SXin Li #include "clang/Basic/TokenKinds.h"
19*67e74705SXin Li #include "clang/Lex/LexDiagnostic.h"
20*67e74705SXin Li #include "clang/Lex/PTHManager.h"
21*67e74705SXin Li #include "clang/Lex/Preprocessor.h"
22*67e74705SXin Li #include "clang/Lex/Token.h"
23*67e74705SXin Li #include "llvm/ADT/StringExtras.h"
24*67e74705SXin Li #include "llvm/ADT/StringMap.h"
25*67e74705SXin Li #include "llvm/Support/EndianStream.h"
26*67e74705SXin Li #include "llvm/Support/MemoryBuffer.h"
27*67e74705SXin Li #include <memory>
28*67e74705SXin Li #include <system_error>
29*67e74705SXin Li using namespace clang;
30*67e74705SXin Li 
31*67e74705SXin Li static const unsigned StoredTokenSize = 1 + 1 + 2 + 4 + 4;
32*67e74705SXin Li 
33*67e74705SXin Li //===----------------------------------------------------------------------===//
34*67e74705SXin Li // PTHLexer methods.
35*67e74705SXin Li //===----------------------------------------------------------------------===//
36*67e74705SXin Li 
PTHLexer(Preprocessor & PP,FileID FID,const unsigned char * D,const unsigned char * ppcond,PTHManager & PM)37*67e74705SXin Li PTHLexer::PTHLexer(Preprocessor &PP, FileID FID, const unsigned char *D,
38*67e74705SXin Li                    const unsigned char *ppcond, PTHManager &PM)
39*67e74705SXin Li   : PreprocessorLexer(&PP, FID), TokBuf(D), CurPtr(D), LastHashTokPtr(nullptr),
40*67e74705SXin Li     PPCond(ppcond), CurPPCondPtr(ppcond), PTHMgr(PM) {
41*67e74705SXin Li 
42*67e74705SXin Li   FileStartLoc = PP.getSourceManager().getLocForStartOfFile(FID);
43*67e74705SXin Li }
44*67e74705SXin Li 
Lex(Token & Tok)45*67e74705SXin Li bool PTHLexer::Lex(Token& Tok) {
46*67e74705SXin Li   //===--------------------------------------==//
47*67e74705SXin Li   // Read the raw token data.
48*67e74705SXin Li   //===--------------------------------------==//
49*67e74705SXin Li   using namespace llvm::support;
50*67e74705SXin Li 
51*67e74705SXin Li   // Shadow CurPtr into an automatic variable.
52*67e74705SXin Li   const unsigned char *CurPtrShadow = CurPtr;
53*67e74705SXin Li 
54*67e74705SXin Li   // Read in the data for the token.
55*67e74705SXin Li   unsigned Word0 = endian::readNext<uint32_t, little, aligned>(CurPtrShadow);
56*67e74705SXin Li   uint32_t IdentifierID =
57*67e74705SXin Li       endian::readNext<uint32_t, little, aligned>(CurPtrShadow);
58*67e74705SXin Li   uint32_t FileOffset =
59*67e74705SXin Li       endian::readNext<uint32_t, little, aligned>(CurPtrShadow);
60*67e74705SXin Li 
61*67e74705SXin Li   tok::TokenKind TKind = (tok::TokenKind) (Word0 & 0xFF);
62*67e74705SXin Li   Token::TokenFlags TFlags = (Token::TokenFlags) ((Word0 >> 8) & 0xFF);
63*67e74705SXin Li   uint32_t Len = Word0 >> 16;
64*67e74705SXin Li 
65*67e74705SXin Li   CurPtr = CurPtrShadow;
66*67e74705SXin Li 
67*67e74705SXin Li   //===--------------------------------------==//
68*67e74705SXin Li   // Construct the token itself.
69*67e74705SXin Li   //===--------------------------------------==//
70*67e74705SXin Li 
71*67e74705SXin Li   Tok.startToken();
72*67e74705SXin Li   Tok.setKind(TKind);
73*67e74705SXin Li   Tok.setFlag(TFlags);
74*67e74705SXin Li   assert(!LexingRawMode);
75*67e74705SXin Li   Tok.setLocation(FileStartLoc.getLocWithOffset(FileOffset));
76*67e74705SXin Li   Tok.setLength(Len);
77*67e74705SXin Li 
78*67e74705SXin Li   // Handle identifiers.
79*67e74705SXin Li   if (Tok.isLiteral()) {
80*67e74705SXin Li     Tok.setLiteralData((const char*) (PTHMgr.SpellingBase + IdentifierID));
81*67e74705SXin Li   }
82*67e74705SXin Li   else if (IdentifierID) {
83*67e74705SXin Li     MIOpt.ReadToken();
84*67e74705SXin Li     IdentifierInfo *II = PTHMgr.GetIdentifierInfo(IdentifierID-1);
85*67e74705SXin Li 
86*67e74705SXin Li     Tok.setIdentifierInfo(II);
87*67e74705SXin Li 
88*67e74705SXin Li     // Change the kind of this identifier to the appropriate token kind, e.g.
89*67e74705SXin Li     // turning "for" into a keyword.
90*67e74705SXin Li     Tok.setKind(II->getTokenID());
91*67e74705SXin Li 
92*67e74705SXin Li     if (II->isHandleIdentifierCase())
93*67e74705SXin Li       return PP->HandleIdentifier(Tok);
94*67e74705SXin Li 
95*67e74705SXin Li     return true;
96*67e74705SXin Li   }
97*67e74705SXin Li 
98*67e74705SXin Li   //===--------------------------------------==//
99*67e74705SXin Li   // Process the token.
100*67e74705SXin Li   //===--------------------------------------==//
101*67e74705SXin Li   if (TKind == tok::eof) {
102*67e74705SXin Li     // Save the end-of-file token.
103*67e74705SXin Li     EofToken = Tok;
104*67e74705SXin Li 
105*67e74705SXin Li     assert(!ParsingPreprocessorDirective);
106*67e74705SXin Li     assert(!LexingRawMode);
107*67e74705SXin Li 
108*67e74705SXin Li     return LexEndOfFile(Tok);
109*67e74705SXin Li   }
110*67e74705SXin Li 
111*67e74705SXin Li   if (TKind == tok::hash && Tok.isAtStartOfLine()) {
112*67e74705SXin Li     LastHashTokPtr = CurPtr - StoredTokenSize;
113*67e74705SXin Li     assert(!LexingRawMode);
114*67e74705SXin Li     PP->HandleDirective(Tok);
115*67e74705SXin Li 
116*67e74705SXin Li     return false;
117*67e74705SXin Li   }
118*67e74705SXin Li 
119*67e74705SXin Li   if (TKind == tok::eod) {
120*67e74705SXin Li     assert(ParsingPreprocessorDirective);
121*67e74705SXin Li     ParsingPreprocessorDirective = false;
122*67e74705SXin Li     return true;
123*67e74705SXin Li   }
124*67e74705SXin Li 
125*67e74705SXin Li   MIOpt.ReadToken();
126*67e74705SXin Li   return true;
127*67e74705SXin Li }
128*67e74705SXin Li 
LexEndOfFile(Token & Result)129*67e74705SXin Li bool PTHLexer::LexEndOfFile(Token &Result) {
130*67e74705SXin Li   // If we hit the end of the file while parsing a preprocessor directive,
131*67e74705SXin Li   // end the preprocessor directive first.  The next token returned will
132*67e74705SXin Li   // then be the end of file.
133*67e74705SXin Li   if (ParsingPreprocessorDirective) {
134*67e74705SXin Li     ParsingPreprocessorDirective = false; // Done parsing the "line".
135*67e74705SXin Li     return true;  // Have a token.
136*67e74705SXin Li   }
137*67e74705SXin Li 
138*67e74705SXin Li   assert(!LexingRawMode);
139*67e74705SXin Li 
140*67e74705SXin Li   // If we are in a #if directive, emit an error.
141*67e74705SXin Li   while (!ConditionalStack.empty()) {
142*67e74705SXin Li     if (PP->getCodeCompletionFileLoc() != FileStartLoc)
143*67e74705SXin Li       PP->Diag(ConditionalStack.back().IfLoc,
144*67e74705SXin Li                diag::err_pp_unterminated_conditional);
145*67e74705SXin Li     ConditionalStack.pop_back();
146*67e74705SXin Li   }
147*67e74705SXin Li 
148*67e74705SXin Li   // Finally, let the preprocessor handle this.
149*67e74705SXin Li   return PP->HandleEndOfFile(Result);
150*67e74705SXin Li }
151*67e74705SXin Li 
152*67e74705SXin Li // FIXME: We can just grab the last token instead of storing a copy
153*67e74705SXin Li // into EofToken.
getEOF(Token & Tok)154*67e74705SXin Li void PTHLexer::getEOF(Token& Tok) {
155*67e74705SXin Li   assert(EofToken.is(tok::eof));
156*67e74705SXin Li   Tok = EofToken;
157*67e74705SXin Li }
158*67e74705SXin Li 
DiscardToEndOfLine()159*67e74705SXin Li void PTHLexer::DiscardToEndOfLine() {
160*67e74705SXin Li   assert(ParsingPreprocessorDirective && ParsingFilename == false &&
161*67e74705SXin Li          "Must be in a preprocessing directive!");
162*67e74705SXin Li 
163*67e74705SXin Li   // We assume that if the preprocessor wishes to discard to the end of
164*67e74705SXin Li   // the line that it also means to end the current preprocessor directive.
165*67e74705SXin Li   ParsingPreprocessorDirective = false;
166*67e74705SXin Li 
167*67e74705SXin Li   // Skip tokens by only peeking at their token kind and the flags.
168*67e74705SXin Li   // We don't need to actually reconstruct full tokens from the token buffer.
169*67e74705SXin Li   // This saves some copies and it also reduces IdentifierInfo* lookup.
170*67e74705SXin Li   const unsigned char* p = CurPtr;
171*67e74705SXin Li   while (1) {
172*67e74705SXin Li     // Read the token kind.  Are we at the end of the file?
173*67e74705SXin Li     tok::TokenKind x = (tok::TokenKind) (uint8_t) *p;
174*67e74705SXin Li     if (x == tok::eof) break;
175*67e74705SXin Li 
176*67e74705SXin Li     // Read the token flags.  Are we at the start of the next line?
177*67e74705SXin Li     Token::TokenFlags y = (Token::TokenFlags) (uint8_t) p[1];
178*67e74705SXin Li     if (y & Token::StartOfLine) break;
179*67e74705SXin Li 
180*67e74705SXin Li     // Skip to the next token.
181*67e74705SXin Li     p += StoredTokenSize;
182*67e74705SXin Li   }
183*67e74705SXin Li 
184*67e74705SXin Li   CurPtr = p;
185*67e74705SXin Li }
186*67e74705SXin Li 
187*67e74705SXin Li /// SkipBlock - Used by Preprocessor to skip the current conditional block.
SkipBlock()188*67e74705SXin Li bool PTHLexer::SkipBlock() {
189*67e74705SXin Li   using namespace llvm::support;
190*67e74705SXin Li   assert(CurPPCondPtr && "No cached PP conditional information.");
191*67e74705SXin Li   assert(LastHashTokPtr && "No known '#' token.");
192*67e74705SXin Li 
193*67e74705SXin Li   const unsigned char *HashEntryI = nullptr;
194*67e74705SXin Li   uint32_t TableIdx;
195*67e74705SXin Li 
196*67e74705SXin Li   do {
197*67e74705SXin Li     // Read the token offset from the side-table.
198*67e74705SXin Li     uint32_t Offset = endian::readNext<uint32_t, little, aligned>(CurPPCondPtr);
199*67e74705SXin Li 
200*67e74705SXin Li     // Read the target table index from the side-table.
201*67e74705SXin Li     TableIdx = endian::readNext<uint32_t, little, aligned>(CurPPCondPtr);
202*67e74705SXin Li 
203*67e74705SXin Li     // Compute the actual memory address of the '#' token data for this entry.
204*67e74705SXin Li     HashEntryI = TokBuf + Offset;
205*67e74705SXin Li 
206*67e74705SXin Li     // Optmization: "Sibling jumping".  #if...#else...#endif blocks can
207*67e74705SXin Li     //  contain nested blocks.  In the side-table we can jump over these
208*67e74705SXin Li     //  nested blocks instead of doing a linear search if the next "sibling"
209*67e74705SXin Li     //  entry is not at a location greater than LastHashTokPtr.
210*67e74705SXin Li     if (HashEntryI < LastHashTokPtr && TableIdx) {
211*67e74705SXin Li       // In the side-table we are still at an entry for a '#' token that
212*67e74705SXin Li       // is earlier than the last one we saw.  Check if the location we would
213*67e74705SXin Li       // stride gets us closer.
214*67e74705SXin Li       const unsigned char* NextPPCondPtr =
215*67e74705SXin Li         PPCond + TableIdx*(sizeof(uint32_t)*2);
216*67e74705SXin Li       assert(NextPPCondPtr >= CurPPCondPtr);
217*67e74705SXin Li       // Read where we should jump to.
218*67e74705SXin Li       const unsigned char *HashEntryJ =
219*67e74705SXin Li           TokBuf + endian::readNext<uint32_t, little, aligned>(NextPPCondPtr);
220*67e74705SXin Li 
221*67e74705SXin Li       if (HashEntryJ <= LastHashTokPtr) {
222*67e74705SXin Li         // Jump directly to the next entry in the side table.
223*67e74705SXin Li         HashEntryI = HashEntryJ;
224*67e74705SXin Li         TableIdx = endian::readNext<uint32_t, little, aligned>(NextPPCondPtr);
225*67e74705SXin Li         CurPPCondPtr = NextPPCondPtr;
226*67e74705SXin Li       }
227*67e74705SXin Li     }
228*67e74705SXin Li   }
229*67e74705SXin Li   while (HashEntryI < LastHashTokPtr);
230*67e74705SXin Li   assert(HashEntryI == LastHashTokPtr && "No PP-cond entry found for '#'");
231*67e74705SXin Li   assert(TableIdx && "No jumping from #endifs.");
232*67e74705SXin Li 
233*67e74705SXin Li   // Update our side-table iterator.
234*67e74705SXin Li   const unsigned char* NextPPCondPtr = PPCond + TableIdx*(sizeof(uint32_t)*2);
235*67e74705SXin Li   assert(NextPPCondPtr >= CurPPCondPtr);
236*67e74705SXin Li   CurPPCondPtr = NextPPCondPtr;
237*67e74705SXin Li 
238*67e74705SXin Li   // Read where we should jump to.
239*67e74705SXin Li   HashEntryI =
240*67e74705SXin Li       TokBuf + endian::readNext<uint32_t, little, aligned>(NextPPCondPtr);
241*67e74705SXin Li   uint32_t NextIdx = endian::readNext<uint32_t, little, aligned>(NextPPCondPtr);
242*67e74705SXin Li 
243*67e74705SXin Li   // By construction NextIdx will be zero if this is a #endif.  This is useful
244*67e74705SXin Li   // to know to obviate lexing another token.
245*67e74705SXin Li   bool isEndif = NextIdx == 0;
246*67e74705SXin Li 
247*67e74705SXin Li   // This case can occur when we see something like this:
248*67e74705SXin Li   //
249*67e74705SXin Li   //  #if ...
250*67e74705SXin Li   //   /* a comment or nothing */
251*67e74705SXin Li   //  #elif
252*67e74705SXin Li   //
253*67e74705SXin Li   // If we are skipping the first #if block it will be the case that CurPtr
254*67e74705SXin Li   // already points 'elif'.  Just return.
255*67e74705SXin Li 
256*67e74705SXin Li   if (CurPtr > HashEntryI) {
257*67e74705SXin Li     assert(CurPtr == HashEntryI + StoredTokenSize);
258*67e74705SXin Li     // Did we reach a #endif?  If so, go ahead and consume that token as well.
259*67e74705SXin Li     if (isEndif)
260*67e74705SXin Li       CurPtr += StoredTokenSize * 2;
261*67e74705SXin Li     else
262*67e74705SXin Li       LastHashTokPtr = HashEntryI;
263*67e74705SXin Li 
264*67e74705SXin Li     return isEndif;
265*67e74705SXin Li   }
266*67e74705SXin Li 
267*67e74705SXin Li   // Otherwise, we need to advance.  Update CurPtr to point to the '#' token.
268*67e74705SXin Li   CurPtr = HashEntryI;
269*67e74705SXin Li 
270*67e74705SXin Li   // Update the location of the last observed '#'.  This is useful if we
271*67e74705SXin Li   // are skipping multiple blocks.
272*67e74705SXin Li   LastHashTokPtr = CurPtr;
273*67e74705SXin Li 
274*67e74705SXin Li   // Skip the '#' token.
275*67e74705SXin Li   assert(((tok::TokenKind)*CurPtr) == tok::hash);
276*67e74705SXin Li   CurPtr += StoredTokenSize;
277*67e74705SXin Li 
278*67e74705SXin Li   // Did we reach a #endif?  If so, go ahead and consume that token as well.
279*67e74705SXin Li   if (isEndif) {
280*67e74705SXin Li     CurPtr += StoredTokenSize * 2;
281*67e74705SXin Li   }
282*67e74705SXin Li 
283*67e74705SXin Li   return isEndif;
284*67e74705SXin Li }
285*67e74705SXin Li 
getSourceLocation()286*67e74705SXin Li SourceLocation PTHLexer::getSourceLocation() {
287*67e74705SXin Li   // getSourceLocation is not on the hot path.  It is used to get the location
288*67e74705SXin Li   // of the next token when transitioning back to this lexer when done
289*67e74705SXin Li   // handling a #included file.  Just read the necessary data from the token
290*67e74705SXin Li   // data buffer to construct the SourceLocation object.
291*67e74705SXin Li   // NOTE: This is a virtual function; hence it is defined out-of-line.
292*67e74705SXin Li   using namespace llvm::support;
293*67e74705SXin Li 
294*67e74705SXin Li   const unsigned char *OffsetPtr = CurPtr + (StoredTokenSize - 4);
295*67e74705SXin Li   uint32_t Offset = endian::readNext<uint32_t, little, aligned>(OffsetPtr);
296*67e74705SXin Li   return FileStartLoc.getLocWithOffset(Offset);
297*67e74705SXin Li }
298*67e74705SXin Li 
299*67e74705SXin Li //===----------------------------------------------------------------------===//
300*67e74705SXin Li // PTH file lookup: map from strings to file data.
301*67e74705SXin Li //===----------------------------------------------------------------------===//
302*67e74705SXin Li 
303*67e74705SXin Li /// PTHFileLookup - This internal data structure is used by the PTHManager
304*67e74705SXin Li ///  to map from FileEntry objects managed by FileManager to offsets within
305*67e74705SXin Li ///  the PTH file.
306*67e74705SXin Li namespace {
307*67e74705SXin Li class PTHFileData {
308*67e74705SXin Li   const uint32_t TokenOff;
309*67e74705SXin Li   const uint32_t PPCondOff;
310*67e74705SXin Li public:
PTHFileData(uint32_t tokenOff,uint32_t ppCondOff)311*67e74705SXin Li   PTHFileData(uint32_t tokenOff, uint32_t ppCondOff)
312*67e74705SXin Li     : TokenOff(tokenOff), PPCondOff(ppCondOff) {}
313*67e74705SXin Li 
getTokenOffset() const314*67e74705SXin Li   uint32_t getTokenOffset() const { return TokenOff; }
getPPCondOffset() const315*67e74705SXin Li   uint32_t getPPCondOffset() const { return PPCondOff; }
316*67e74705SXin Li };
317*67e74705SXin Li 
318*67e74705SXin Li 
319*67e74705SXin Li class PTHFileLookupCommonTrait {
320*67e74705SXin Li public:
321*67e74705SXin Li   typedef std::pair<unsigned char, const char*> internal_key_type;
322*67e74705SXin Li   typedef unsigned hash_value_type;
323*67e74705SXin Li   typedef unsigned offset_type;
324*67e74705SXin Li 
ComputeHash(internal_key_type x)325*67e74705SXin Li   static hash_value_type ComputeHash(internal_key_type x) {
326*67e74705SXin Li     return llvm::HashString(x.second);
327*67e74705SXin Li   }
328*67e74705SXin Li 
329*67e74705SXin Li   static std::pair<unsigned, unsigned>
ReadKeyDataLength(const unsigned char * & d)330*67e74705SXin Li   ReadKeyDataLength(const unsigned char*& d) {
331*67e74705SXin Li     using namespace llvm::support;
332*67e74705SXin Li     unsigned keyLen =
333*67e74705SXin Li         (unsigned)endian::readNext<uint16_t, little, unaligned>(d);
334*67e74705SXin Li     unsigned dataLen = (unsigned) *(d++);
335*67e74705SXin Li     return std::make_pair(keyLen, dataLen);
336*67e74705SXin Li   }
337*67e74705SXin Li 
ReadKey(const unsigned char * d,unsigned)338*67e74705SXin Li   static internal_key_type ReadKey(const unsigned char* d, unsigned) {
339*67e74705SXin Li     unsigned char k = *(d++); // Read the entry kind.
340*67e74705SXin Li     return std::make_pair(k, (const char*) d);
341*67e74705SXin Li   }
342*67e74705SXin Li };
343*67e74705SXin Li 
344*67e74705SXin Li } // end anonymous namespace
345*67e74705SXin Li 
346*67e74705SXin Li class PTHManager::PTHFileLookupTrait : public PTHFileLookupCommonTrait {
347*67e74705SXin Li public:
348*67e74705SXin Li   typedef const FileEntry* external_key_type;
349*67e74705SXin Li   typedef PTHFileData      data_type;
350*67e74705SXin Li 
GetInternalKey(const FileEntry * FE)351*67e74705SXin Li   static internal_key_type GetInternalKey(const FileEntry* FE) {
352*67e74705SXin Li     return std::make_pair((unsigned char) 0x1, FE->getName());
353*67e74705SXin Li   }
354*67e74705SXin Li 
EqualKey(internal_key_type a,internal_key_type b)355*67e74705SXin Li   static bool EqualKey(internal_key_type a, internal_key_type b) {
356*67e74705SXin Li     return a.first == b.first && strcmp(a.second, b.second) == 0;
357*67e74705SXin Li   }
358*67e74705SXin Li 
ReadData(const internal_key_type & k,const unsigned char * d,unsigned)359*67e74705SXin Li   static PTHFileData ReadData(const internal_key_type& k,
360*67e74705SXin Li                               const unsigned char* d, unsigned) {
361*67e74705SXin Li     assert(k.first == 0x1 && "Only file lookups can match!");
362*67e74705SXin Li     using namespace llvm::support;
363*67e74705SXin Li     uint32_t x = endian::readNext<uint32_t, little, unaligned>(d);
364*67e74705SXin Li     uint32_t y = endian::readNext<uint32_t, little, unaligned>(d);
365*67e74705SXin Li     return PTHFileData(x, y);
366*67e74705SXin Li   }
367*67e74705SXin Li };
368*67e74705SXin Li 
369*67e74705SXin Li class PTHManager::PTHStringLookupTrait {
370*67e74705SXin Li public:
371*67e74705SXin Li   typedef uint32_t data_type;
372*67e74705SXin Li   typedef const std::pair<const char*, unsigned> external_key_type;
373*67e74705SXin Li   typedef external_key_type internal_key_type;
374*67e74705SXin Li   typedef uint32_t hash_value_type;
375*67e74705SXin Li   typedef unsigned offset_type;
376*67e74705SXin Li 
EqualKey(const internal_key_type & a,const internal_key_type & b)377*67e74705SXin Li   static bool EqualKey(const internal_key_type& a,
378*67e74705SXin Li                        const internal_key_type& b) {
379*67e74705SXin Li     return (a.second == b.second) ? memcmp(a.first, b.first, a.second) == 0
380*67e74705SXin Li                                   : false;
381*67e74705SXin Li   }
382*67e74705SXin Li 
ComputeHash(const internal_key_type & a)383*67e74705SXin Li   static hash_value_type ComputeHash(const internal_key_type& a) {
384*67e74705SXin Li     return llvm::HashString(StringRef(a.first, a.second));
385*67e74705SXin Li   }
386*67e74705SXin Li 
387*67e74705SXin Li   // This hopefully will just get inlined and removed by the optimizer.
388*67e74705SXin Li   static const internal_key_type&
GetInternalKey(const external_key_type & x)389*67e74705SXin Li   GetInternalKey(const external_key_type& x) { return x; }
390*67e74705SXin Li 
391*67e74705SXin Li   static std::pair<unsigned, unsigned>
ReadKeyDataLength(const unsigned char * & d)392*67e74705SXin Li   ReadKeyDataLength(const unsigned char*& d) {
393*67e74705SXin Li     using namespace llvm::support;
394*67e74705SXin Li     return std::make_pair(
395*67e74705SXin Li         (unsigned)endian::readNext<uint16_t, little, unaligned>(d),
396*67e74705SXin Li         sizeof(uint32_t));
397*67e74705SXin Li   }
398*67e74705SXin Li 
399*67e74705SXin Li   static std::pair<const char*, unsigned>
ReadKey(const unsigned char * d,unsigned n)400*67e74705SXin Li   ReadKey(const unsigned char* d, unsigned n) {
401*67e74705SXin Li       assert(n >= 2 && d[n-1] == '\0');
402*67e74705SXin Li       return std::make_pair((const char*) d, n-1);
403*67e74705SXin Li     }
404*67e74705SXin Li 
ReadData(const internal_key_type & k,const unsigned char * d,unsigned)405*67e74705SXin Li   static uint32_t ReadData(const internal_key_type& k, const unsigned char* d,
406*67e74705SXin Li                            unsigned) {
407*67e74705SXin Li     using namespace llvm::support;
408*67e74705SXin Li     return endian::readNext<uint32_t, little, unaligned>(d);
409*67e74705SXin Li   }
410*67e74705SXin Li };
411*67e74705SXin Li 
412*67e74705SXin Li //===----------------------------------------------------------------------===//
413*67e74705SXin Li // PTHManager methods.
414*67e74705SXin Li //===----------------------------------------------------------------------===//
415*67e74705SXin Li 
PTHManager(std::unique_ptr<const llvm::MemoryBuffer> buf,std::unique_ptr<PTHFileLookup> fileLookup,const unsigned char * idDataTable,std::unique_ptr<IdentifierInfo * [],llvm::FreeDeleter> perIDCache,std::unique_ptr<PTHStringIdLookup> stringIdLookup,unsigned numIds,const unsigned char * spellingBase,const char * originalSourceFile)416*67e74705SXin Li PTHManager::PTHManager(
417*67e74705SXin Li     std::unique_ptr<const llvm::MemoryBuffer> buf,
418*67e74705SXin Li     std::unique_ptr<PTHFileLookup> fileLookup, const unsigned char *idDataTable,
419*67e74705SXin Li     std::unique_ptr<IdentifierInfo *[], llvm::FreeDeleter> perIDCache,
420*67e74705SXin Li     std::unique_ptr<PTHStringIdLookup> stringIdLookup, unsigned numIds,
421*67e74705SXin Li     const unsigned char *spellingBase, const char *originalSourceFile)
422*67e74705SXin Li     : Buf(std::move(buf)), PerIDCache(std::move(perIDCache)),
423*67e74705SXin Li       FileLookup(std::move(fileLookup)), IdDataTable(idDataTable),
424*67e74705SXin Li       StringIdLookup(std::move(stringIdLookup)), NumIds(numIds), PP(nullptr),
425*67e74705SXin Li       SpellingBase(spellingBase), OriginalSourceFile(originalSourceFile) {}
426*67e74705SXin Li 
~PTHManager()427*67e74705SXin Li PTHManager::~PTHManager() {
428*67e74705SXin Li }
429*67e74705SXin Li 
InvalidPTH(DiagnosticsEngine & Diags,const char * Msg)430*67e74705SXin Li static void InvalidPTH(DiagnosticsEngine &Diags, const char *Msg) {
431*67e74705SXin Li   Diags.Report(Diags.getCustomDiagID(DiagnosticsEngine::Error, "%0")) << Msg;
432*67e74705SXin Li }
433*67e74705SXin Li 
Create(StringRef file,DiagnosticsEngine & Diags)434*67e74705SXin Li PTHManager *PTHManager::Create(StringRef file, DiagnosticsEngine &Diags) {
435*67e74705SXin Li   // Memory map the PTH file.
436*67e74705SXin Li   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> FileOrErr =
437*67e74705SXin Li       llvm::MemoryBuffer::getFile(file);
438*67e74705SXin Li 
439*67e74705SXin Li   if (!FileOrErr) {
440*67e74705SXin Li     // FIXME: Add ec.message() to this diag.
441*67e74705SXin Li     Diags.Report(diag::err_invalid_pth_file) << file;
442*67e74705SXin Li     return nullptr;
443*67e74705SXin Li   }
444*67e74705SXin Li   std::unique_ptr<llvm::MemoryBuffer> File = std::move(FileOrErr.get());
445*67e74705SXin Li 
446*67e74705SXin Li   using namespace llvm::support;
447*67e74705SXin Li 
448*67e74705SXin Li   // Get the buffer ranges and check if there are at least three 32-bit
449*67e74705SXin Li   // words at the end of the file.
450*67e74705SXin Li   const unsigned char *BufBeg = (const unsigned char*)File->getBufferStart();
451*67e74705SXin Li   const unsigned char *BufEnd = (const unsigned char*)File->getBufferEnd();
452*67e74705SXin Li 
453*67e74705SXin Li   // Check the prologue of the file.
454*67e74705SXin Li   if ((BufEnd - BufBeg) < (signed)(sizeof("cfe-pth") + 4 + 4) ||
455*67e74705SXin Li       memcmp(BufBeg, "cfe-pth", sizeof("cfe-pth")) != 0) {
456*67e74705SXin Li     Diags.Report(diag::err_invalid_pth_file) << file;
457*67e74705SXin Li     return nullptr;
458*67e74705SXin Li   }
459*67e74705SXin Li 
460*67e74705SXin Li   // Read the PTH version.
461*67e74705SXin Li   const unsigned char *p = BufBeg + (sizeof("cfe-pth"));
462*67e74705SXin Li   unsigned Version = endian::readNext<uint32_t, little, aligned>(p);
463*67e74705SXin Li 
464*67e74705SXin Li   if (Version < PTHManager::Version) {
465*67e74705SXin Li     InvalidPTH(Diags,
466*67e74705SXin Li         Version < PTHManager::Version
467*67e74705SXin Li         ? "PTH file uses an older PTH format that is no longer supported"
468*67e74705SXin Li         : "PTH file uses a newer PTH format that cannot be read");
469*67e74705SXin Li     return nullptr;
470*67e74705SXin Li   }
471*67e74705SXin Li 
472*67e74705SXin Li   // Compute the address of the index table at the end of the PTH file.
473*67e74705SXin Li   const unsigned char *PrologueOffset = p;
474*67e74705SXin Li 
475*67e74705SXin Li   if (PrologueOffset >= BufEnd) {
476*67e74705SXin Li     Diags.Report(diag::err_invalid_pth_file) << file;
477*67e74705SXin Li     return nullptr;
478*67e74705SXin Li   }
479*67e74705SXin Li 
480*67e74705SXin Li   // Construct the file lookup table.  This will be used for mapping from
481*67e74705SXin Li   // FileEntry*'s to cached tokens.
482*67e74705SXin Li   const unsigned char* FileTableOffset = PrologueOffset + sizeof(uint32_t)*2;
483*67e74705SXin Li   const unsigned char *FileTable =
484*67e74705SXin Li       BufBeg + endian::readNext<uint32_t, little, aligned>(FileTableOffset);
485*67e74705SXin Li 
486*67e74705SXin Li   if (!(FileTable > BufBeg && FileTable < BufEnd)) {
487*67e74705SXin Li     Diags.Report(diag::err_invalid_pth_file) << file;
488*67e74705SXin Li     return nullptr; // FIXME: Proper error diagnostic?
489*67e74705SXin Li   }
490*67e74705SXin Li 
491*67e74705SXin Li   std::unique_ptr<PTHFileLookup> FL(PTHFileLookup::Create(FileTable, BufBeg));
492*67e74705SXin Li 
493*67e74705SXin Li   // Warn if the PTH file is empty.  We still want to create a PTHManager
494*67e74705SXin Li   // as the PTH could be used with -include-pth.
495*67e74705SXin Li   if (FL->isEmpty())
496*67e74705SXin Li     InvalidPTH(Diags, "PTH file contains no cached source data");
497*67e74705SXin Li 
498*67e74705SXin Li   // Get the location of the table mapping from persistent ids to the
499*67e74705SXin Li   // data needed to reconstruct identifiers.
500*67e74705SXin Li   const unsigned char* IDTableOffset = PrologueOffset + sizeof(uint32_t)*0;
501*67e74705SXin Li   const unsigned char *IData =
502*67e74705SXin Li       BufBeg + endian::readNext<uint32_t, little, aligned>(IDTableOffset);
503*67e74705SXin Li 
504*67e74705SXin Li   if (!(IData >= BufBeg && IData < BufEnd)) {
505*67e74705SXin Li     Diags.Report(diag::err_invalid_pth_file) << file;
506*67e74705SXin Li     return nullptr;
507*67e74705SXin Li   }
508*67e74705SXin Li 
509*67e74705SXin Li   // Get the location of the hashtable mapping between strings and
510*67e74705SXin Li   // persistent IDs.
511*67e74705SXin Li   const unsigned char* StringIdTableOffset = PrologueOffset + sizeof(uint32_t)*1;
512*67e74705SXin Li   const unsigned char *StringIdTable =
513*67e74705SXin Li       BufBeg + endian::readNext<uint32_t, little, aligned>(StringIdTableOffset);
514*67e74705SXin Li   if (!(StringIdTable >= BufBeg && StringIdTable < BufEnd)) {
515*67e74705SXin Li     Diags.Report(diag::err_invalid_pth_file) << file;
516*67e74705SXin Li     return nullptr;
517*67e74705SXin Li   }
518*67e74705SXin Li 
519*67e74705SXin Li   std::unique_ptr<PTHStringIdLookup> SL(
520*67e74705SXin Li       PTHStringIdLookup::Create(StringIdTable, BufBeg));
521*67e74705SXin Li 
522*67e74705SXin Li   // Get the location of the spelling cache.
523*67e74705SXin Li   const unsigned char* spellingBaseOffset = PrologueOffset + sizeof(uint32_t)*3;
524*67e74705SXin Li   const unsigned char *spellingBase =
525*67e74705SXin Li       BufBeg + endian::readNext<uint32_t, little, aligned>(spellingBaseOffset);
526*67e74705SXin Li   if (!(spellingBase >= BufBeg && spellingBase < BufEnd)) {
527*67e74705SXin Li     Diags.Report(diag::err_invalid_pth_file) << file;
528*67e74705SXin Li     return nullptr;
529*67e74705SXin Li   }
530*67e74705SXin Li 
531*67e74705SXin Li   // Get the number of IdentifierInfos and pre-allocate the identifier cache.
532*67e74705SXin Li   uint32_t NumIds = endian::readNext<uint32_t, little, aligned>(IData);
533*67e74705SXin Li 
534*67e74705SXin Li   // Pre-allocate the persistent ID -> IdentifierInfo* cache.  We use calloc()
535*67e74705SXin Li   // so that we in the best case only zero out memory once when the OS returns
536*67e74705SXin Li   // us new pages.
537*67e74705SXin Li   std::unique_ptr<IdentifierInfo *[], llvm::FreeDeleter> PerIDCache;
538*67e74705SXin Li 
539*67e74705SXin Li   if (NumIds) {
540*67e74705SXin Li     PerIDCache.reset((IdentifierInfo **)calloc(NumIds, sizeof(PerIDCache[0])));
541*67e74705SXin Li     if (!PerIDCache) {
542*67e74705SXin Li       InvalidPTH(Diags, "Could not allocate memory for processing PTH file");
543*67e74705SXin Li       return nullptr;
544*67e74705SXin Li     }
545*67e74705SXin Li   }
546*67e74705SXin Li 
547*67e74705SXin Li   // Compute the address of the original source file.
548*67e74705SXin Li   const unsigned char* originalSourceBase = PrologueOffset + sizeof(uint32_t)*4;
549*67e74705SXin Li   unsigned len =
550*67e74705SXin Li       endian::readNext<uint16_t, little, unaligned>(originalSourceBase);
551*67e74705SXin Li   if (!len) originalSourceBase = nullptr;
552*67e74705SXin Li 
553*67e74705SXin Li   // Create the new PTHManager.
554*67e74705SXin Li   return new PTHManager(std::move(File), std::move(FL), IData,
555*67e74705SXin Li                         std::move(PerIDCache), std::move(SL), NumIds,
556*67e74705SXin Li                         spellingBase, (const char *)originalSourceBase);
557*67e74705SXin Li }
558*67e74705SXin Li 
LazilyCreateIdentifierInfo(unsigned PersistentID)559*67e74705SXin Li IdentifierInfo* PTHManager::LazilyCreateIdentifierInfo(unsigned PersistentID) {
560*67e74705SXin Li   using namespace llvm::support;
561*67e74705SXin Li   // Look in the PTH file for the string data for the IdentifierInfo object.
562*67e74705SXin Li   const unsigned char* TableEntry = IdDataTable + sizeof(uint32_t)*PersistentID;
563*67e74705SXin Li   const unsigned char *IDData =
564*67e74705SXin Li       (const unsigned char *)Buf->getBufferStart() +
565*67e74705SXin Li       endian::readNext<uint32_t, little, aligned>(TableEntry);
566*67e74705SXin Li   assert(IDData < (const unsigned char*)Buf->getBufferEnd());
567*67e74705SXin Li 
568*67e74705SXin Li   // Allocate the object.
569*67e74705SXin Li   std::pair<IdentifierInfo,const unsigned char*> *Mem =
570*67e74705SXin Li     Alloc.Allocate<std::pair<IdentifierInfo,const unsigned char*> >();
571*67e74705SXin Li 
572*67e74705SXin Li   Mem->second = IDData;
573*67e74705SXin Li   assert(IDData[0] != '\0');
574*67e74705SXin Li   IdentifierInfo *II = new ((void*) Mem) IdentifierInfo();
575*67e74705SXin Li 
576*67e74705SXin Li   // Store the new IdentifierInfo in the cache.
577*67e74705SXin Li   PerIDCache[PersistentID] = II;
578*67e74705SXin Li   assert(II->getNameStart() && II->getNameStart()[0] != '\0');
579*67e74705SXin Li   return II;
580*67e74705SXin Li }
581*67e74705SXin Li 
get(StringRef Name)582*67e74705SXin Li IdentifierInfo* PTHManager::get(StringRef Name) {
583*67e74705SXin Li   // Double check our assumption that the last character isn't '\0'.
584*67e74705SXin Li   assert(Name.empty() || Name.back() != '\0');
585*67e74705SXin Li   PTHStringIdLookup::iterator I =
586*67e74705SXin Li       StringIdLookup->find(std::make_pair(Name.data(), Name.size()));
587*67e74705SXin Li   if (I == StringIdLookup->end()) // No identifier found?
588*67e74705SXin Li     return nullptr;
589*67e74705SXin Li 
590*67e74705SXin Li   // Match found.  Return the identifier!
591*67e74705SXin Li   assert(*I > 0);
592*67e74705SXin Li   return GetIdentifierInfo(*I-1);
593*67e74705SXin Li }
594*67e74705SXin Li 
CreateLexer(FileID FID)595*67e74705SXin Li PTHLexer *PTHManager::CreateLexer(FileID FID) {
596*67e74705SXin Li   const FileEntry *FE = PP->getSourceManager().getFileEntryForID(FID);
597*67e74705SXin Li   if (!FE)
598*67e74705SXin Li     return nullptr;
599*67e74705SXin Li 
600*67e74705SXin Li   using namespace llvm::support;
601*67e74705SXin Li 
602*67e74705SXin Li   // Lookup the FileEntry object in our file lookup data structure.  It will
603*67e74705SXin Li   // return a variant that indicates whether or not there is an offset within
604*67e74705SXin Li   // the PTH file that contains cached tokens.
605*67e74705SXin Li   PTHFileLookup::iterator I = FileLookup->find(FE);
606*67e74705SXin Li 
607*67e74705SXin Li   if (I == FileLookup->end()) // No tokens available?
608*67e74705SXin Li     return nullptr;
609*67e74705SXin Li 
610*67e74705SXin Li   const PTHFileData& FileData = *I;
611*67e74705SXin Li 
612*67e74705SXin Li   const unsigned char *BufStart = (const unsigned char *)Buf->getBufferStart();
613*67e74705SXin Li   // Compute the offset of the token data within the buffer.
614*67e74705SXin Li   const unsigned char* data = BufStart + FileData.getTokenOffset();
615*67e74705SXin Li 
616*67e74705SXin Li   // Get the location of pp-conditional table.
617*67e74705SXin Li   const unsigned char* ppcond = BufStart + FileData.getPPCondOffset();
618*67e74705SXin Li   uint32_t Len = endian::readNext<uint32_t, little, aligned>(ppcond);
619*67e74705SXin Li   if (Len == 0) ppcond = nullptr;
620*67e74705SXin Li 
621*67e74705SXin Li   assert(PP && "No preprocessor set yet!");
622*67e74705SXin Li   return new PTHLexer(*PP, FID, data, ppcond, *this);
623*67e74705SXin Li }
624*67e74705SXin Li 
625*67e74705SXin Li //===----------------------------------------------------------------------===//
626*67e74705SXin Li // 'stat' caching.
627*67e74705SXin Li //===----------------------------------------------------------------------===//
628*67e74705SXin Li 
629*67e74705SXin Li namespace {
630*67e74705SXin Li class PTHStatData {
631*67e74705SXin Li public:
632*67e74705SXin Li   const bool HasData;
633*67e74705SXin Li   uint64_t Size;
634*67e74705SXin Li   time_t ModTime;
635*67e74705SXin Li   llvm::sys::fs::UniqueID UniqueID;
636*67e74705SXin Li   bool IsDirectory;
637*67e74705SXin Li 
PTHStatData(uint64_t Size,time_t ModTime,llvm::sys::fs::UniqueID UniqueID,bool IsDirectory)638*67e74705SXin Li   PTHStatData(uint64_t Size, time_t ModTime, llvm::sys::fs::UniqueID UniqueID,
639*67e74705SXin Li               bool IsDirectory)
640*67e74705SXin Li       : HasData(true), Size(Size), ModTime(ModTime), UniqueID(UniqueID),
641*67e74705SXin Li         IsDirectory(IsDirectory) {}
642*67e74705SXin Li 
PTHStatData()643*67e74705SXin Li   PTHStatData() : HasData(false) {}
644*67e74705SXin Li };
645*67e74705SXin Li 
646*67e74705SXin Li class PTHStatLookupTrait : public PTHFileLookupCommonTrait {
647*67e74705SXin Li public:
648*67e74705SXin Li   typedef const char* external_key_type;  // const char*
649*67e74705SXin Li   typedef PTHStatData data_type;
650*67e74705SXin Li 
GetInternalKey(const char * path)651*67e74705SXin Li   static internal_key_type GetInternalKey(const char *path) {
652*67e74705SXin Li     // The key 'kind' doesn't matter here because it is ignored in EqualKey.
653*67e74705SXin Li     return std::make_pair((unsigned char) 0x0, path);
654*67e74705SXin Li   }
655*67e74705SXin Li 
EqualKey(internal_key_type a,internal_key_type b)656*67e74705SXin Li   static bool EqualKey(internal_key_type a, internal_key_type b) {
657*67e74705SXin Li     // When doing 'stat' lookups we don't care about the kind of 'a' and 'b',
658*67e74705SXin Li     // just the paths.
659*67e74705SXin Li     return strcmp(a.second, b.second) == 0;
660*67e74705SXin Li   }
661*67e74705SXin Li 
ReadData(const internal_key_type & k,const unsigned char * d,unsigned)662*67e74705SXin Li   static data_type ReadData(const internal_key_type& k, const unsigned char* d,
663*67e74705SXin Li                             unsigned) {
664*67e74705SXin Li 
665*67e74705SXin Li     if (k.first /* File or Directory */) {
666*67e74705SXin Li       bool IsDirectory = true;
667*67e74705SXin Li       if (k.first == 0x1 /* File */) {
668*67e74705SXin Li         IsDirectory = false;
669*67e74705SXin Li         d += 4 * 2; // Skip the first 2 words.
670*67e74705SXin Li       }
671*67e74705SXin Li 
672*67e74705SXin Li       using namespace llvm::support;
673*67e74705SXin Li 
674*67e74705SXin Li       uint64_t File = endian::readNext<uint64_t, little, unaligned>(d);
675*67e74705SXin Li       uint64_t Device = endian::readNext<uint64_t, little, unaligned>(d);
676*67e74705SXin Li       llvm::sys::fs::UniqueID UniqueID(Device, File);
677*67e74705SXin Li       time_t ModTime = endian::readNext<uint64_t, little, unaligned>(d);
678*67e74705SXin Li       uint64_t Size = endian::readNext<uint64_t, little, unaligned>(d);
679*67e74705SXin Li       return data_type(Size, ModTime, UniqueID, IsDirectory);
680*67e74705SXin Li     }
681*67e74705SXin Li 
682*67e74705SXin Li     // Negative stat.  Don't read anything.
683*67e74705SXin Li     return data_type();
684*67e74705SXin Li   }
685*67e74705SXin Li };
686*67e74705SXin Li } // end anonymous namespace
687*67e74705SXin Li 
688*67e74705SXin Li namespace clang {
689*67e74705SXin Li class PTHStatCache : public FileSystemStatCache {
690*67e74705SXin Li   typedef llvm::OnDiskChainedHashTable<PTHStatLookupTrait> CacheTy;
691*67e74705SXin Li   CacheTy Cache;
692*67e74705SXin Li 
693*67e74705SXin Li public:
PTHStatCache(PTHManager::PTHFileLookup & FL)694*67e74705SXin Li   PTHStatCache(PTHManager::PTHFileLookup &FL)
695*67e74705SXin Li       : Cache(FL.getNumBuckets(), FL.getNumEntries(), FL.getBuckets(),
696*67e74705SXin Li               FL.getBase()) {}
697*67e74705SXin Li 
getStat(const char * Path,FileData & Data,bool isFile,std::unique_ptr<vfs::File> * F,vfs::FileSystem & FS)698*67e74705SXin Li   LookupResult getStat(const char *Path, FileData &Data, bool isFile,
699*67e74705SXin Li                        std::unique_ptr<vfs::File> *F,
700*67e74705SXin Li                        vfs::FileSystem &FS) override {
701*67e74705SXin Li     // Do the lookup for the file's data in the PTH file.
702*67e74705SXin Li     CacheTy::iterator I = Cache.find(Path);
703*67e74705SXin Li 
704*67e74705SXin Li     // If we don't get a hit in the PTH file just forward to 'stat'.
705*67e74705SXin Li     if (I == Cache.end())
706*67e74705SXin Li       return statChained(Path, Data, isFile, F, FS);
707*67e74705SXin Li 
708*67e74705SXin Li     const PTHStatData &D = *I;
709*67e74705SXin Li 
710*67e74705SXin Li     if (!D.HasData)
711*67e74705SXin Li       return CacheMissing;
712*67e74705SXin Li 
713*67e74705SXin Li     Data.Name = Path;
714*67e74705SXin Li     Data.Size = D.Size;
715*67e74705SXin Li     Data.ModTime = D.ModTime;
716*67e74705SXin Li     Data.UniqueID = D.UniqueID;
717*67e74705SXin Li     Data.IsDirectory = D.IsDirectory;
718*67e74705SXin Li     Data.IsNamedPipe = false;
719*67e74705SXin Li     Data.InPCH = true;
720*67e74705SXin Li 
721*67e74705SXin Li     return CacheExists;
722*67e74705SXin Li   }
723*67e74705SXin Li };
724*67e74705SXin Li }
725*67e74705SXin Li 
createStatCache()726*67e74705SXin Li std::unique_ptr<FileSystemStatCache> PTHManager::createStatCache() {
727*67e74705SXin Li   return llvm::make_unique<PTHStatCache>(*FileLookup);
728*67e74705SXin Li }
729