1*67e74705SXin Li //===--- PTHLexer.cpp - Lex from a token stream ---------------------------===//
2*67e74705SXin Li //
3*67e74705SXin Li // The LLVM Compiler Infrastructure
4*67e74705SXin Li //
5*67e74705SXin Li // This file is distributed under the University of Illinois Open Source
6*67e74705SXin Li // License. See LICENSE.TXT for details.
7*67e74705SXin Li //
8*67e74705SXin Li //===----------------------------------------------------------------------===//
9*67e74705SXin Li //
10*67e74705SXin Li // This file implements the PTHLexer interface.
11*67e74705SXin Li //
12*67e74705SXin Li //===----------------------------------------------------------------------===//
13*67e74705SXin Li
14*67e74705SXin Li #include "clang/Lex/PTHLexer.h"
15*67e74705SXin Li #include "clang/Basic/FileManager.h"
16*67e74705SXin Li #include "clang/Basic/FileSystemStatCache.h"
17*67e74705SXin Li #include "clang/Basic/IdentifierTable.h"
18*67e74705SXin Li #include "clang/Basic/TokenKinds.h"
19*67e74705SXin Li #include "clang/Lex/LexDiagnostic.h"
20*67e74705SXin Li #include "clang/Lex/PTHManager.h"
21*67e74705SXin Li #include "clang/Lex/Preprocessor.h"
22*67e74705SXin Li #include "clang/Lex/Token.h"
23*67e74705SXin Li #include "llvm/ADT/StringExtras.h"
24*67e74705SXin Li #include "llvm/ADT/StringMap.h"
25*67e74705SXin Li #include "llvm/Support/EndianStream.h"
26*67e74705SXin Li #include "llvm/Support/MemoryBuffer.h"
27*67e74705SXin Li #include <memory>
28*67e74705SXin Li #include <system_error>
29*67e74705SXin Li using namespace clang;
30*67e74705SXin Li
31*67e74705SXin Li static const unsigned StoredTokenSize = 1 + 1 + 2 + 4 + 4;
32*67e74705SXin Li
33*67e74705SXin Li //===----------------------------------------------------------------------===//
34*67e74705SXin Li // PTHLexer methods.
35*67e74705SXin Li //===----------------------------------------------------------------------===//
36*67e74705SXin Li
PTHLexer(Preprocessor & PP,FileID FID,const unsigned char * D,const unsigned char * ppcond,PTHManager & PM)37*67e74705SXin Li PTHLexer::PTHLexer(Preprocessor &PP, FileID FID, const unsigned char *D,
38*67e74705SXin Li const unsigned char *ppcond, PTHManager &PM)
39*67e74705SXin Li : PreprocessorLexer(&PP, FID), TokBuf(D), CurPtr(D), LastHashTokPtr(nullptr),
40*67e74705SXin Li PPCond(ppcond), CurPPCondPtr(ppcond), PTHMgr(PM) {
41*67e74705SXin Li
42*67e74705SXin Li FileStartLoc = PP.getSourceManager().getLocForStartOfFile(FID);
43*67e74705SXin Li }
44*67e74705SXin Li
Lex(Token & Tok)45*67e74705SXin Li bool PTHLexer::Lex(Token& Tok) {
46*67e74705SXin Li //===--------------------------------------==//
47*67e74705SXin Li // Read the raw token data.
48*67e74705SXin Li //===--------------------------------------==//
49*67e74705SXin Li using namespace llvm::support;
50*67e74705SXin Li
51*67e74705SXin Li // Shadow CurPtr into an automatic variable.
52*67e74705SXin Li const unsigned char *CurPtrShadow = CurPtr;
53*67e74705SXin Li
54*67e74705SXin Li // Read in the data for the token.
55*67e74705SXin Li unsigned Word0 = endian::readNext<uint32_t, little, aligned>(CurPtrShadow);
56*67e74705SXin Li uint32_t IdentifierID =
57*67e74705SXin Li endian::readNext<uint32_t, little, aligned>(CurPtrShadow);
58*67e74705SXin Li uint32_t FileOffset =
59*67e74705SXin Li endian::readNext<uint32_t, little, aligned>(CurPtrShadow);
60*67e74705SXin Li
61*67e74705SXin Li tok::TokenKind TKind = (tok::TokenKind) (Word0 & 0xFF);
62*67e74705SXin Li Token::TokenFlags TFlags = (Token::TokenFlags) ((Word0 >> 8) & 0xFF);
63*67e74705SXin Li uint32_t Len = Word0 >> 16;
64*67e74705SXin Li
65*67e74705SXin Li CurPtr = CurPtrShadow;
66*67e74705SXin Li
67*67e74705SXin Li //===--------------------------------------==//
68*67e74705SXin Li // Construct the token itself.
69*67e74705SXin Li //===--------------------------------------==//
70*67e74705SXin Li
71*67e74705SXin Li Tok.startToken();
72*67e74705SXin Li Tok.setKind(TKind);
73*67e74705SXin Li Tok.setFlag(TFlags);
74*67e74705SXin Li assert(!LexingRawMode);
75*67e74705SXin Li Tok.setLocation(FileStartLoc.getLocWithOffset(FileOffset));
76*67e74705SXin Li Tok.setLength(Len);
77*67e74705SXin Li
78*67e74705SXin Li // Handle identifiers.
79*67e74705SXin Li if (Tok.isLiteral()) {
80*67e74705SXin Li Tok.setLiteralData((const char*) (PTHMgr.SpellingBase + IdentifierID));
81*67e74705SXin Li }
82*67e74705SXin Li else if (IdentifierID) {
83*67e74705SXin Li MIOpt.ReadToken();
84*67e74705SXin Li IdentifierInfo *II = PTHMgr.GetIdentifierInfo(IdentifierID-1);
85*67e74705SXin Li
86*67e74705SXin Li Tok.setIdentifierInfo(II);
87*67e74705SXin Li
88*67e74705SXin Li // Change the kind of this identifier to the appropriate token kind, e.g.
89*67e74705SXin Li // turning "for" into a keyword.
90*67e74705SXin Li Tok.setKind(II->getTokenID());
91*67e74705SXin Li
92*67e74705SXin Li if (II->isHandleIdentifierCase())
93*67e74705SXin Li return PP->HandleIdentifier(Tok);
94*67e74705SXin Li
95*67e74705SXin Li return true;
96*67e74705SXin Li }
97*67e74705SXin Li
98*67e74705SXin Li //===--------------------------------------==//
99*67e74705SXin Li // Process the token.
100*67e74705SXin Li //===--------------------------------------==//
101*67e74705SXin Li if (TKind == tok::eof) {
102*67e74705SXin Li // Save the end-of-file token.
103*67e74705SXin Li EofToken = Tok;
104*67e74705SXin Li
105*67e74705SXin Li assert(!ParsingPreprocessorDirective);
106*67e74705SXin Li assert(!LexingRawMode);
107*67e74705SXin Li
108*67e74705SXin Li return LexEndOfFile(Tok);
109*67e74705SXin Li }
110*67e74705SXin Li
111*67e74705SXin Li if (TKind == tok::hash && Tok.isAtStartOfLine()) {
112*67e74705SXin Li LastHashTokPtr = CurPtr - StoredTokenSize;
113*67e74705SXin Li assert(!LexingRawMode);
114*67e74705SXin Li PP->HandleDirective(Tok);
115*67e74705SXin Li
116*67e74705SXin Li return false;
117*67e74705SXin Li }
118*67e74705SXin Li
119*67e74705SXin Li if (TKind == tok::eod) {
120*67e74705SXin Li assert(ParsingPreprocessorDirective);
121*67e74705SXin Li ParsingPreprocessorDirective = false;
122*67e74705SXin Li return true;
123*67e74705SXin Li }
124*67e74705SXin Li
125*67e74705SXin Li MIOpt.ReadToken();
126*67e74705SXin Li return true;
127*67e74705SXin Li }
128*67e74705SXin Li
LexEndOfFile(Token & Result)129*67e74705SXin Li bool PTHLexer::LexEndOfFile(Token &Result) {
130*67e74705SXin Li // If we hit the end of the file while parsing a preprocessor directive,
131*67e74705SXin Li // end the preprocessor directive first. The next token returned will
132*67e74705SXin Li // then be the end of file.
133*67e74705SXin Li if (ParsingPreprocessorDirective) {
134*67e74705SXin Li ParsingPreprocessorDirective = false; // Done parsing the "line".
135*67e74705SXin Li return true; // Have a token.
136*67e74705SXin Li }
137*67e74705SXin Li
138*67e74705SXin Li assert(!LexingRawMode);
139*67e74705SXin Li
140*67e74705SXin Li // If we are in a #if directive, emit an error.
141*67e74705SXin Li while (!ConditionalStack.empty()) {
142*67e74705SXin Li if (PP->getCodeCompletionFileLoc() != FileStartLoc)
143*67e74705SXin Li PP->Diag(ConditionalStack.back().IfLoc,
144*67e74705SXin Li diag::err_pp_unterminated_conditional);
145*67e74705SXin Li ConditionalStack.pop_back();
146*67e74705SXin Li }
147*67e74705SXin Li
148*67e74705SXin Li // Finally, let the preprocessor handle this.
149*67e74705SXin Li return PP->HandleEndOfFile(Result);
150*67e74705SXin Li }
151*67e74705SXin Li
152*67e74705SXin Li // FIXME: We can just grab the last token instead of storing a copy
153*67e74705SXin Li // into EofToken.
getEOF(Token & Tok)154*67e74705SXin Li void PTHLexer::getEOF(Token& Tok) {
155*67e74705SXin Li assert(EofToken.is(tok::eof));
156*67e74705SXin Li Tok = EofToken;
157*67e74705SXin Li }
158*67e74705SXin Li
DiscardToEndOfLine()159*67e74705SXin Li void PTHLexer::DiscardToEndOfLine() {
160*67e74705SXin Li assert(ParsingPreprocessorDirective && ParsingFilename == false &&
161*67e74705SXin Li "Must be in a preprocessing directive!");
162*67e74705SXin Li
163*67e74705SXin Li // We assume that if the preprocessor wishes to discard to the end of
164*67e74705SXin Li // the line that it also means to end the current preprocessor directive.
165*67e74705SXin Li ParsingPreprocessorDirective = false;
166*67e74705SXin Li
167*67e74705SXin Li // Skip tokens by only peeking at their token kind and the flags.
168*67e74705SXin Li // We don't need to actually reconstruct full tokens from the token buffer.
169*67e74705SXin Li // This saves some copies and it also reduces IdentifierInfo* lookup.
170*67e74705SXin Li const unsigned char* p = CurPtr;
171*67e74705SXin Li while (1) {
172*67e74705SXin Li // Read the token kind. Are we at the end of the file?
173*67e74705SXin Li tok::TokenKind x = (tok::TokenKind) (uint8_t) *p;
174*67e74705SXin Li if (x == tok::eof) break;
175*67e74705SXin Li
176*67e74705SXin Li // Read the token flags. Are we at the start of the next line?
177*67e74705SXin Li Token::TokenFlags y = (Token::TokenFlags) (uint8_t) p[1];
178*67e74705SXin Li if (y & Token::StartOfLine) break;
179*67e74705SXin Li
180*67e74705SXin Li // Skip to the next token.
181*67e74705SXin Li p += StoredTokenSize;
182*67e74705SXin Li }
183*67e74705SXin Li
184*67e74705SXin Li CurPtr = p;
185*67e74705SXin Li }
186*67e74705SXin Li
187*67e74705SXin Li /// SkipBlock - Used by Preprocessor to skip the current conditional block.
SkipBlock()188*67e74705SXin Li bool PTHLexer::SkipBlock() {
189*67e74705SXin Li using namespace llvm::support;
190*67e74705SXin Li assert(CurPPCondPtr && "No cached PP conditional information.");
191*67e74705SXin Li assert(LastHashTokPtr && "No known '#' token.");
192*67e74705SXin Li
193*67e74705SXin Li const unsigned char *HashEntryI = nullptr;
194*67e74705SXin Li uint32_t TableIdx;
195*67e74705SXin Li
196*67e74705SXin Li do {
197*67e74705SXin Li // Read the token offset from the side-table.
198*67e74705SXin Li uint32_t Offset = endian::readNext<uint32_t, little, aligned>(CurPPCondPtr);
199*67e74705SXin Li
200*67e74705SXin Li // Read the target table index from the side-table.
201*67e74705SXin Li TableIdx = endian::readNext<uint32_t, little, aligned>(CurPPCondPtr);
202*67e74705SXin Li
203*67e74705SXin Li // Compute the actual memory address of the '#' token data for this entry.
204*67e74705SXin Li HashEntryI = TokBuf + Offset;
205*67e74705SXin Li
206*67e74705SXin Li // Optmization: "Sibling jumping". #if...#else...#endif blocks can
207*67e74705SXin Li // contain nested blocks. In the side-table we can jump over these
208*67e74705SXin Li // nested blocks instead of doing a linear search if the next "sibling"
209*67e74705SXin Li // entry is not at a location greater than LastHashTokPtr.
210*67e74705SXin Li if (HashEntryI < LastHashTokPtr && TableIdx) {
211*67e74705SXin Li // In the side-table we are still at an entry for a '#' token that
212*67e74705SXin Li // is earlier than the last one we saw. Check if the location we would
213*67e74705SXin Li // stride gets us closer.
214*67e74705SXin Li const unsigned char* NextPPCondPtr =
215*67e74705SXin Li PPCond + TableIdx*(sizeof(uint32_t)*2);
216*67e74705SXin Li assert(NextPPCondPtr >= CurPPCondPtr);
217*67e74705SXin Li // Read where we should jump to.
218*67e74705SXin Li const unsigned char *HashEntryJ =
219*67e74705SXin Li TokBuf + endian::readNext<uint32_t, little, aligned>(NextPPCondPtr);
220*67e74705SXin Li
221*67e74705SXin Li if (HashEntryJ <= LastHashTokPtr) {
222*67e74705SXin Li // Jump directly to the next entry in the side table.
223*67e74705SXin Li HashEntryI = HashEntryJ;
224*67e74705SXin Li TableIdx = endian::readNext<uint32_t, little, aligned>(NextPPCondPtr);
225*67e74705SXin Li CurPPCondPtr = NextPPCondPtr;
226*67e74705SXin Li }
227*67e74705SXin Li }
228*67e74705SXin Li }
229*67e74705SXin Li while (HashEntryI < LastHashTokPtr);
230*67e74705SXin Li assert(HashEntryI == LastHashTokPtr && "No PP-cond entry found for '#'");
231*67e74705SXin Li assert(TableIdx && "No jumping from #endifs.");
232*67e74705SXin Li
233*67e74705SXin Li // Update our side-table iterator.
234*67e74705SXin Li const unsigned char* NextPPCondPtr = PPCond + TableIdx*(sizeof(uint32_t)*2);
235*67e74705SXin Li assert(NextPPCondPtr >= CurPPCondPtr);
236*67e74705SXin Li CurPPCondPtr = NextPPCondPtr;
237*67e74705SXin Li
238*67e74705SXin Li // Read where we should jump to.
239*67e74705SXin Li HashEntryI =
240*67e74705SXin Li TokBuf + endian::readNext<uint32_t, little, aligned>(NextPPCondPtr);
241*67e74705SXin Li uint32_t NextIdx = endian::readNext<uint32_t, little, aligned>(NextPPCondPtr);
242*67e74705SXin Li
243*67e74705SXin Li // By construction NextIdx will be zero if this is a #endif. This is useful
244*67e74705SXin Li // to know to obviate lexing another token.
245*67e74705SXin Li bool isEndif = NextIdx == 0;
246*67e74705SXin Li
247*67e74705SXin Li // This case can occur when we see something like this:
248*67e74705SXin Li //
249*67e74705SXin Li // #if ...
250*67e74705SXin Li // /* a comment or nothing */
251*67e74705SXin Li // #elif
252*67e74705SXin Li //
253*67e74705SXin Li // If we are skipping the first #if block it will be the case that CurPtr
254*67e74705SXin Li // already points 'elif'. Just return.
255*67e74705SXin Li
256*67e74705SXin Li if (CurPtr > HashEntryI) {
257*67e74705SXin Li assert(CurPtr == HashEntryI + StoredTokenSize);
258*67e74705SXin Li // Did we reach a #endif? If so, go ahead and consume that token as well.
259*67e74705SXin Li if (isEndif)
260*67e74705SXin Li CurPtr += StoredTokenSize * 2;
261*67e74705SXin Li else
262*67e74705SXin Li LastHashTokPtr = HashEntryI;
263*67e74705SXin Li
264*67e74705SXin Li return isEndif;
265*67e74705SXin Li }
266*67e74705SXin Li
267*67e74705SXin Li // Otherwise, we need to advance. Update CurPtr to point to the '#' token.
268*67e74705SXin Li CurPtr = HashEntryI;
269*67e74705SXin Li
270*67e74705SXin Li // Update the location of the last observed '#'. This is useful if we
271*67e74705SXin Li // are skipping multiple blocks.
272*67e74705SXin Li LastHashTokPtr = CurPtr;
273*67e74705SXin Li
274*67e74705SXin Li // Skip the '#' token.
275*67e74705SXin Li assert(((tok::TokenKind)*CurPtr) == tok::hash);
276*67e74705SXin Li CurPtr += StoredTokenSize;
277*67e74705SXin Li
278*67e74705SXin Li // Did we reach a #endif? If so, go ahead and consume that token as well.
279*67e74705SXin Li if (isEndif) {
280*67e74705SXin Li CurPtr += StoredTokenSize * 2;
281*67e74705SXin Li }
282*67e74705SXin Li
283*67e74705SXin Li return isEndif;
284*67e74705SXin Li }
285*67e74705SXin Li
getSourceLocation()286*67e74705SXin Li SourceLocation PTHLexer::getSourceLocation() {
287*67e74705SXin Li // getSourceLocation is not on the hot path. It is used to get the location
288*67e74705SXin Li // of the next token when transitioning back to this lexer when done
289*67e74705SXin Li // handling a #included file. Just read the necessary data from the token
290*67e74705SXin Li // data buffer to construct the SourceLocation object.
291*67e74705SXin Li // NOTE: This is a virtual function; hence it is defined out-of-line.
292*67e74705SXin Li using namespace llvm::support;
293*67e74705SXin Li
294*67e74705SXin Li const unsigned char *OffsetPtr = CurPtr + (StoredTokenSize - 4);
295*67e74705SXin Li uint32_t Offset = endian::readNext<uint32_t, little, aligned>(OffsetPtr);
296*67e74705SXin Li return FileStartLoc.getLocWithOffset(Offset);
297*67e74705SXin Li }
298*67e74705SXin Li
299*67e74705SXin Li //===----------------------------------------------------------------------===//
300*67e74705SXin Li // PTH file lookup: map from strings to file data.
301*67e74705SXin Li //===----------------------------------------------------------------------===//
302*67e74705SXin Li
303*67e74705SXin Li /// PTHFileLookup - This internal data structure is used by the PTHManager
304*67e74705SXin Li /// to map from FileEntry objects managed by FileManager to offsets within
305*67e74705SXin Li /// the PTH file.
306*67e74705SXin Li namespace {
307*67e74705SXin Li class PTHFileData {
308*67e74705SXin Li const uint32_t TokenOff;
309*67e74705SXin Li const uint32_t PPCondOff;
310*67e74705SXin Li public:
PTHFileData(uint32_t tokenOff,uint32_t ppCondOff)311*67e74705SXin Li PTHFileData(uint32_t tokenOff, uint32_t ppCondOff)
312*67e74705SXin Li : TokenOff(tokenOff), PPCondOff(ppCondOff) {}
313*67e74705SXin Li
getTokenOffset() const314*67e74705SXin Li uint32_t getTokenOffset() const { return TokenOff; }
getPPCondOffset() const315*67e74705SXin Li uint32_t getPPCondOffset() const { return PPCondOff; }
316*67e74705SXin Li };
317*67e74705SXin Li
318*67e74705SXin Li
319*67e74705SXin Li class PTHFileLookupCommonTrait {
320*67e74705SXin Li public:
321*67e74705SXin Li typedef std::pair<unsigned char, const char*> internal_key_type;
322*67e74705SXin Li typedef unsigned hash_value_type;
323*67e74705SXin Li typedef unsigned offset_type;
324*67e74705SXin Li
ComputeHash(internal_key_type x)325*67e74705SXin Li static hash_value_type ComputeHash(internal_key_type x) {
326*67e74705SXin Li return llvm::HashString(x.second);
327*67e74705SXin Li }
328*67e74705SXin Li
329*67e74705SXin Li static std::pair<unsigned, unsigned>
ReadKeyDataLength(const unsigned char * & d)330*67e74705SXin Li ReadKeyDataLength(const unsigned char*& d) {
331*67e74705SXin Li using namespace llvm::support;
332*67e74705SXin Li unsigned keyLen =
333*67e74705SXin Li (unsigned)endian::readNext<uint16_t, little, unaligned>(d);
334*67e74705SXin Li unsigned dataLen = (unsigned) *(d++);
335*67e74705SXin Li return std::make_pair(keyLen, dataLen);
336*67e74705SXin Li }
337*67e74705SXin Li
ReadKey(const unsigned char * d,unsigned)338*67e74705SXin Li static internal_key_type ReadKey(const unsigned char* d, unsigned) {
339*67e74705SXin Li unsigned char k = *(d++); // Read the entry kind.
340*67e74705SXin Li return std::make_pair(k, (const char*) d);
341*67e74705SXin Li }
342*67e74705SXin Li };
343*67e74705SXin Li
344*67e74705SXin Li } // end anonymous namespace
345*67e74705SXin Li
346*67e74705SXin Li class PTHManager::PTHFileLookupTrait : public PTHFileLookupCommonTrait {
347*67e74705SXin Li public:
348*67e74705SXin Li typedef const FileEntry* external_key_type;
349*67e74705SXin Li typedef PTHFileData data_type;
350*67e74705SXin Li
GetInternalKey(const FileEntry * FE)351*67e74705SXin Li static internal_key_type GetInternalKey(const FileEntry* FE) {
352*67e74705SXin Li return std::make_pair((unsigned char) 0x1, FE->getName());
353*67e74705SXin Li }
354*67e74705SXin Li
EqualKey(internal_key_type a,internal_key_type b)355*67e74705SXin Li static bool EqualKey(internal_key_type a, internal_key_type b) {
356*67e74705SXin Li return a.first == b.first && strcmp(a.second, b.second) == 0;
357*67e74705SXin Li }
358*67e74705SXin Li
ReadData(const internal_key_type & k,const unsigned char * d,unsigned)359*67e74705SXin Li static PTHFileData ReadData(const internal_key_type& k,
360*67e74705SXin Li const unsigned char* d, unsigned) {
361*67e74705SXin Li assert(k.first == 0x1 && "Only file lookups can match!");
362*67e74705SXin Li using namespace llvm::support;
363*67e74705SXin Li uint32_t x = endian::readNext<uint32_t, little, unaligned>(d);
364*67e74705SXin Li uint32_t y = endian::readNext<uint32_t, little, unaligned>(d);
365*67e74705SXin Li return PTHFileData(x, y);
366*67e74705SXin Li }
367*67e74705SXin Li };
368*67e74705SXin Li
369*67e74705SXin Li class PTHManager::PTHStringLookupTrait {
370*67e74705SXin Li public:
371*67e74705SXin Li typedef uint32_t data_type;
372*67e74705SXin Li typedef const std::pair<const char*, unsigned> external_key_type;
373*67e74705SXin Li typedef external_key_type internal_key_type;
374*67e74705SXin Li typedef uint32_t hash_value_type;
375*67e74705SXin Li typedef unsigned offset_type;
376*67e74705SXin Li
EqualKey(const internal_key_type & a,const internal_key_type & b)377*67e74705SXin Li static bool EqualKey(const internal_key_type& a,
378*67e74705SXin Li const internal_key_type& b) {
379*67e74705SXin Li return (a.second == b.second) ? memcmp(a.first, b.first, a.second) == 0
380*67e74705SXin Li : false;
381*67e74705SXin Li }
382*67e74705SXin Li
ComputeHash(const internal_key_type & a)383*67e74705SXin Li static hash_value_type ComputeHash(const internal_key_type& a) {
384*67e74705SXin Li return llvm::HashString(StringRef(a.first, a.second));
385*67e74705SXin Li }
386*67e74705SXin Li
387*67e74705SXin Li // This hopefully will just get inlined and removed by the optimizer.
388*67e74705SXin Li static const internal_key_type&
GetInternalKey(const external_key_type & x)389*67e74705SXin Li GetInternalKey(const external_key_type& x) { return x; }
390*67e74705SXin Li
391*67e74705SXin Li static std::pair<unsigned, unsigned>
ReadKeyDataLength(const unsigned char * & d)392*67e74705SXin Li ReadKeyDataLength(const unsigned char*& d) {
393*67e74705SXin Li using namespace llvm::support;
394*67e74705SXin Li return std::make_pair(
395*67e74705SXin Li (unsigned)endian::readNext<uint16_t, little, unaligned>(d),
396*67e74705SXin Li sizeof(uint32_t));
397*67e74705SXin Li }
398*67e74705SXin Li
399*67e74705SXin Li static std::pair<const char*, unsigned>
ReadKey(const unsigned char * d,unsigned n)400*67e74705SXin Li ReadKey(const unsigned char* d, unsigned n) {
401*67e74705SXin Li assert(n >= 2 && d[n-1] == '\0');
402*67e74705SXin Li return std::make_pair((const char*) d, n-1);
403*67e74705SXin Li }
404*67e74705SXin Li
ReadData(const internal_key_type & k,const unsigned char * d,unsigned)405*67e74705SXin Li static uint32_t ReadData(const internal_key_type& k, const unsigned char* d,
406*67e74705SXin Li unsigned) {
407*67e74705SXin Li using namespace llvm::support;
408*67e74705SXin Li return endian::readNext<uint32_t, little, unaligned>(d);
409*67e74705SXin Li }
410*67e74705SXin Li };
411*67e74705SXin Li
412*67e74705SXin Li //===----------------------------------------------------------------------===//
413*67e74705SXin Li // PTHManager methods.
414*67e74705SXin Li //===----------------------------------------------------------------------===//
415*67e74705SXin Li
PTHManager(std::unique_ptr<const llvm::MemoryBuffer> buf,std::unique_ptr<PTHFileLookup> fileLookup,const unsigned char * idDataTable,std::unique_ptr<IdentifierInfo * [],llvm::FreeDeleter> perIDCache,std::unique_ptr<PTHStringIdLookup> stringIdLookup,unsigned numIds,const unsigned char * spellingBase,const char * originalSourceFile)416*67e74705SXin Li PTHManager::PTHManager(
417*67e74705SXin Li std::unique_ptr<const llvm::MemoryBuffer> buf,
418*67e74705SXin Li std::unique_ptr<PTHFileLookup> fileLookup, const unsigned char *idDataTable,
419*67e74705SXin Li std::unique_ptr<IdentifierInfo *[], llvm::FreeDeleter> perIDCache,
420*67e74705SXin Li std::unique_ptr<PTHStringIdLookup> stringIdLookup, unsigned numIds,
421*67e74705SXin Li const unsigned char *spellingBase, const char *originalSourceFile)
422*67e74705SXin Li : Buf(std::move(buf)), PerIDCache(std::move(perIDCache)),
423*67e74705SXin Li FileLookup(std::move(fileLookup)), IdDataTable(idDataTable),
424*67e74705SXin Li StringIdLookup(std::move(stringIdLookup)), NumIds(numIds), PP(nullptr),
425*67e74705SXin Li SpellingBase(spellingBase), OriginalSourceFile(originalSourceFile) {}
426*67e74705SXin Li
~PTHManager()427*67e74705SXin Li PTHManager::~PTHManager() {
428*67e74705SXin Li }
429*67e74705SXin Li
InvalidPTH(DiagnosticsEngine & Diags,const char * Msg)430*67e74705SXin Li static void InvalidPTH(DiagnosticsEngine &Diags, const char *Msg) {
431*67e74705SXin Li Diags.Report(Diags.getCustomDiagID(DiagnosticsEngine::Error, "%0")) << Msg;
432*67e74705SXin Li }
433*67e74705SXin Li
Create(StringRef file,DiagnosticsEngine & Diags)434*67e74705SXin Li PTHManager *PTHManager::Create(StringRef file, DiagnosticsEngine &Diags) {
435*67e74705SXin Li // Memory map the PTH file.
436*67e74705SXin Li llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> FileOrErr =
437*67e74705SXin Li llvm::MemoryBuffer::getFile(file);
438*67e74705SXin Li
439*67e74705SXin Li if (!FileOrErr) {
440*67e74705SXin Li // FIXME: Add ec.message() to this diag.
441*67e74705SXin Li Diags.Report(diag::err_invalid_pth_file) << file;
442*67e74705SXin Li return nullptr;
443*67e74705SXin Li }
444*67e74705SXin Li std::unique_ptr<llvm::MemoryBuffer> File = std::move(FileOrErr.get());
445*67e74705SXin Li
446*67e74705SXin Li using namespace llvm::support;
447*67e74705SXin Li
448*67e74705SXin Li // Get the buffer ranges and check if there are at least three 32-bit
449*67e74705SXin Li // words at the end of the file.
450*67e74705SXin Li const unsigned char *BufBeg = (const unsigned char*)File->getBufferStart();
451*67e74705SXin Li const unsigned char *BufEnd = (const unsigned char*)File->getBufferEnd();
452*67e74705SXin Li
453*67e74705SXin Li // Check the prologue of the file.
454*67e74705SXin Li if ((BufEnd - BufBeg) < (signed)(sizeof("cfe-pth") + 4 + 4) ||
455*67e74705SXin Li memcmp(BufBeg, "cfe-pth", sizeof("cfe-pth")) != 0) {
456*67e74705SXin Li Diags.Report(diag::err_invalid_pth_file) << file;
457*67e74705SXin Li return nullptr;
458*67e74705SXin Li }
459*67e74705SXin Li
460*67e74705SXin Li // Read the PTH version.
461*67e74705SXin Li const unsigned char *p = BufBeg + (sizeof("cfe-pth"));
462*67e74705SXin Li unsigned Version = endian::readNext<uint32_t, little, aligned>(p);
463*67e74705SXin Li
464*67e74705SXin Li if (Version < PTHManager::Version) {
465*67e74705SXin Li InvalidPTH(Diags,
466*67e74705SXin Li Version < PTHManager::Version
467*67e74705SXin Li ? "PTH file uses an older PTH format that is no longer supported"
468*67e74705SXin Li : "PTH file uses a newer PTH format that cannot be read");
469*67e74705SXin Li return nullptr;
470*67e74705SXin Li }
471*67e74705SXin Li
472*67e74705SXin Li // Compute the address of the index table at the end of the PTH file.
473*67e74705SXin Li const unsigned char *PrologueOffset = p;
474*67e74705SXin Li
475*67e74705SXin Li if (PrologueOffset >= BufEnd) {
476*67e74705SXin Li Diags.Report(diag::err_invalid_pth_file) << file;
477*67e74705SXin Li return nullptr;
478*67e74705SXin Li }
479*67e74705SXin Li
480*67e74705SXin Li // Construct the file lookup table. This will be used for mapping from
481*67e74705SXin Li // FileEntry*'s to cached tokens.
482*67e74705SXin Li const unsigned char* FileTableOffset = PrologueOffset + sizeof(uint32_t)*2;
483*67e74705SXin Li const unsigned char *FileTable =
484*67e74705SXin Li BufBeg + endian::readNext<uint32_t, little, aligned>(FileTableOffset);
485*67e74705SXin Li
486*67e74705SXin Li if (!(FileTable > BufBeg && FileTable < BufEnd)) {
487*67e74705SXin Li Diags.Report(diag::err_invalid_pth_file) << file;
488*67e74705SXin Li return nullptr; // FIXME: Proper error diagnostic?
489*67e74705SXin Li }
490*67e74705SXin Li
491*67e74705SXin Li std::unique_ptr<PTHFileLookup> FL(PTHFileLookup::Create(FileTable, BufBeg));
492*67e74705SXin Li
493*67e74705SXin Li // Warn if the PTH file is empty. We still want to create a PTHManager
494*67e74705SXin Li // as the PTH could be used with -include-pth.
495*67e74705SXin Li if (FL->isEmpty())
496*67e74705SXin Li InvalidPTH(Diags, "PTH file contains no cached source data");
497*67e74705SXin Li
498*67e74705SXin Li // Get the location of the table mapping from persistent ids to the
499*67e74705SXin Li // data needed to reconstruct identifiers.
500*67e74705SXin Li const unsigned char* IDTableOffset = PrologueOffset + sizeof(uint32_t)*0;
501*67e74705SXin Li const unsigned char *IData =
502*67e74705SXin Li BufBeg + endian::readNext<uint32_t, little, aligned>(IDTableOffset);
503*67e74705SXin Li
504*67e74705SXin Li if (!(IData >= BufBeg && IData < BufEnd)) {
505*67e74705SXin Li Diags.Report(diag::err_invalid_pth_file) << file;
506*67e74705SXin Li return nullptr;
507*67e74705SXin Li }
508*67e74705SXin Li
509*67e74705SXin Li // Get the location of the hashtable mapping between strings and
510*67e74705SXin Li // persistent IDs.
511*67e74705SXin Li const unsigned char* StringIdTableOffset = PrologueOffset + sizeof(uint32_t)*1;
512*67e74705SXin Li const unsigned char *StringIdTable =
513*67e74705SXin Li BufBeg + endian::readNext<uint32_t, little, aligned>(StringIdTableOffset);
514*67e74705SXin Li if (!(StringIdTable >= BufBeg && StringIdTable < BufEnd)) {
515*67e74705SXin Li Diags.Report(diag::err_invalid_pth_file) << file;
516*67e74705SXin Li return nullptr;
517*67e74705SXin Li }
518*67e74705SXin Li
519*67e74705SXin Li std::unique_ptr<PTHStringIdLookup> SL(
520*67e74705SXin Li PTHStringIdLookup::Create(StringIdTable, BufBeg));
521*67e74705SXin Li
522*67e74705SXin Li // Get the location of the spelling cache.
523*67e74705SXin Li const unsigned char* spellingBaseOffset = PrologueOffset + sizeof(uint32_t)*3;
524*67e74705SXin Li const unsigned char *spellingBase =
525*67e74705SXin Li BufBeg + endian::readNext<uint32_t, little, aligned>(spellingBaseOffset);
526*67e74705SXin Li if (!(spellingBase >= BufBeg && spellingBase < BufEnd)) {
527*67e74705SXin Li Diags.Report(diag::err_invalid_pth_file) << file;
528*67e74705SXin Li return nullptr;
529*67e74705SXin Li }
530*67e74705SXin Li
531*67e74705SXin Li // Get the number of IdentifierInfos and pre-allocate the identifier cache.
532*67e74705SXin Li uint32_t NumIds = endian::readNext<uint32_t, little, aligned>(IData);
533*67e74705SXin Li
534*67e74705SXin Li // Pre-allocate the persistent ID -> IdentifierInfo* cache. We use calloc()
535*67e74705SXin Li // so that we in the best case only zero out memory once when the OS returns
536*67e74705SXin Li // us new pages.
537*67e74705SXin Li std::unique_ptr<IdentifierInfo *[], llvm::FreeDeleter> PerIDCache;
538*67e74705SXin Li
539*67e74705SXin Li if (NumIds) {
540*67e74705SXin Li PerIDCache.reset((IdentifierInfo **)calloc(NumIds, sizeof(PerIDCache[0])));
541*67e74705SXin Li if (!PerIDCache) {
542*67e74705SXin Li InvalidPTH(Diags, "Could not allocate memory for processing PTH file");
543*67e74705SXin Li return nullptr;
544*67e74705SXin Li }
545*67e74705SXin Li }
546*67e74705SXin Li
547*67e74705SXin Li // Compute the address of the original source file.
548*67e74705SXin Li const unsigned char* originalSourceBase = PrologueOffset + sizeof(uint32_t)*4;
549*67e74705SXin Li unsigned len =
550*67e74705SXin Li endian::readNext<uint16_t, little, unaligned>(originalSourceBase);
551*67e74705SXin Li if (!len) originalSourceBase = nullptr;
552*67e74705SXin Li
553*67e74705SXin Li // Create the new PTHManager.
554*67e74705SXin Li return new PTHManager(std::move(File), std::move(FL), IData,
555*67e74705SXin Li std::move(PerIDCache), std::move(SL), NumIds,
556*67e74705SXin Li spellingBase, (const char *)originalSourceBase);
557*67e74705SXin Li }
558*67e74705SXin Li
LazilyCreateIdentifierInfo(unsigned PersistentID)559*67e74705SXin Li IdentifierInfo* PTHManager::LazilyCreateIdentifierInfo(unsigned PersistentID) {
560*67e74705SXin Li using namespace llvm::support;
561*67e74705SXin Li // Look in the PTH file for the string data for the IdentifierInfo object.
562*67e74705SXin Li const unsigned char* TableEntry = IdDataTable + sizeof(uint32_t)*PersistentID;
563*67e74705SXin Li const unsigned char *IDData =
564*67e74705SXin Li (const unsigned char *)Buf->getBufferStart() +
565*67e74705SXin Li endian::readNext<uint32_t, little, aligned>(TableEntry);
566*67e74705SXin Li assert(IDData < (const unsigned char*)Buf->getBufferEnd());
567*67e74705SXin Li
568*67e74705SXin Li // Allocate the object.
569*67e74705SXin Li std::pair<IdentifierInfo,const unsigned char*> *Mem =
570*67e74705SXin Li Alloc.Allocate<std::pair<IdentifierInfo,const unsigned char*> >();
571*67e74705SXin Li
572*67e74705SXin Li Mem->second = IDData;
573*67e74705SXin Li assert(IDData[0] != '\0');
574*67e74705SXin Li IdentifierInfo *II = new ((void*) Mem) IdentifierInfo();
575*67e74705SXin Li
576*67e74705SXin Li // Store the new IdentifierInfo in the cache.
577*67e74705SXin Li PerIDCache[PersistentID] = II;
578*67e74705SXin Li assert(II->getNameStart() && II->getNameStart()[0] != '\0');
579*67e74705SXin Li return II;
580*67e74705SXin Li }
581*67e74705SXin Li
get(StringRef Name)582*67e74705SXin Li IdentifierInfo* PTHManager::get(StringRef Name) {
583*67e74705SXin Li // Double check our assumption that the last character isn't '\0'.
584*67e74705SXin Li assert(Name.empty() || Name.back() != '\0');
585*67e74705SXin Li PTHStringIdLookup::iterator I =
586*67e74705SXin Li StringIdLookup->find(std::make_pair(Name.data(), Name.size()));
587*67e74705SXin Li if (I == StringIdLookup->end()) // No identifier found?
588*67e74705SXin Li return nullptr;
589*67e74705SXin Li
590*67e74705SXin Li // Match found. Return the identifier!
591*67e74705SXin Li assert(*I > 0);
592*67e74705SXin Li return GetIdentifierInfo(*I-1);
593*67e74705SXin Li }
594*67e74705SXin Li
CreateLexer(FileID FID)595*67e74705SXin Li PTHLexer *PTHManager::CreateLexer(FileID FID) {
596*67e74705SXin Li const FileEntry *FE = PP->getSourceManager().getFileEntryForID(FID);
597*67e74705SXin Li if (!FE)
598*67e74705SXin Li return nullptr;
599*67e74705SXin Li
600*67e74705SXin Li using namespace llvm::support;
601*67e74705SXin Li
602*67e74705SXin Li // Lookup the FileEntry object in our file lookup data structure. It will
603*67e74705SXin Li // return a variant that indicates whether or not there is an offset within
604*67e74705SXin Li // the PTH file that contains cached tokens.
605*67e74705SXin Li PTHFileLookup::iterator I = FileLookup->find(FE);
606*67e74705SXin Li
607*67e74705SXin Li if (I == FileLookup->end()) // No tokens available?
608*67e74705SXin Li return nullptr;
609*67e74705SXin Li
610*67e74705SXin Li const PTHFileData& FileData = *I;
611*67e74705SXin Li
612*67e74705SXin Li const unsigned char *BufStart = (const unsigned char *)Buf->getBufferStart();
613*67e74705SXin Li // Compute the offset of the token data within the buffer.
614*67e74705SXin Li const unsigned char* data = BufStart + FileData.getTokenOffset();
615*67e74705SXin Li
616*67e74705SXin Li // Get the location of pp-conditional table.
617*67e74705SXin Li const unsigned char* ppcond = BufStart + FileData.getPPCondOffset();
618*67e74705SXin Li uint32_t Len = endian::readNext<uint32_t, little, aligned>(ppcond);
619*67e74705SXin Li if (Len == 0) ppcond = nullptr;
620*67e74705SXin Li
621*67e74705SXin Li assert(PP && "No preprocessor set yet!");
622*67e74705SXin Li return new PTHLexer(*PP, FID, data, ppcond, *this);
623*67e74705SXin Li }
624*67e74705SXin Li
625*67e74705SXin Li //===----------------------------------------------------------------------===//
626*67e74705SXin Li // 'stat' caching.
627*67e74705SXin Li //===----------------------------------------------------------------------===//
628*67e74705SXin Li
629*67e74705SXin Li namespace {
630*67e74705SXin Li class PTHStatData {
631*67e74705SXin Li public:
632*67e74705SXin Li const bool HasData;
633*67e74705SXin Li uint64_t Size;
634*67e74705SXin Li time_t ModTime;
635*67e74705SXin Li llvm::sys::fs::UniqueID UniqueID;
636*67e74705SXin Li bool IsDirectory;
637*67e74705SXin Li
PTHStatData(uint64_t Size,time_t ModTime,llvm::sys::fs::UniqueID UniqueID,bool IsDirectory)638*67e74705SXin Li PTHStatData(uint64_t Size, time_t ModTime, llvm::sys::fs::UniqueID UniqueID,
639*67e74705SXin Li bool IsDirectory)
640*67e74705SXin Li : HasData(true), Size(Size), ModTime(ModTime), UniqueID(UniqueID),
641*67e74705SXin Li IsDirectory(IsDirectory) {}
642*67e74705SXin Li
PTHStatData()643*67e74705SXin Li PTHStatData() : HasData(false) {}
644*67e74705SXin Li };
645*67e74705SXin Li
646*67e74705SXin Li class PTHStatLookupTrait : public PTHFileLookupCommonTrait {
647*67e74705SXin Li public:
648*67e74705SXin Li typedef const char* external_key_type; // const char*
649*67e74705SXin Li typedef PTHStatData data_type;
650*67e74705SXin Li
GetInternalKey(const char * path)651*67e74705SXin Li static internal_key_type GetInternalKey(const char *path) {
652*67e74705SXin Li // The key 'kind' doesn't matter here because it is ignored in EqualKey.
653*67e74705SXin Li return std::make_pair((unsigned char) 0x0, path);
654*67e74705SXin Li }
655*67e74705SXin Li
EqualKey(internal_key_type a,internal_key_type b)656*67e74705SXin Li static bool EqualKey(internal_key_type a, internal_key_type b) {
657*67e74705SXin Li // When doing 'stat' lookups we don't care about the kind of 'a' and 'b',
658*67e74705SXin Li // just the paths.
659*67e74705SXin Li return strcmp(a.second, b.second) == 0;
660*67e74705SXin Li }
661*67e74705SXin Li
ReadData(const internal_key_type & k,const unsigned char * d,unsigned)662*67e74705SXin Li static data_type ReadData(const internal_key_type& k, const unsigned char* d,
663*67e74705SXin Li unsigned) {
664*67e74705SXin Li
665*67e74705SXin Li if (k.first /* File or Directory */) {
666*67e74705SXin Li bool IsDirectory = true;
667*67e74705SXin Li if (k.first == 0x1 /* File */) {
668*67e74705SXin Li IsDirectory = false;
669*67e74705SXin Li d += 4 * 2; // Skip the first 2 words.
670*67e74705SXin Li }
671*67e74705SXin Li
672*67e74705SXin Li using namespace llvm::support;
673*67e74705SXin Li
674*67e74705SXin Li uint64_t File = endian::readNext<uint64_t, little, unaligned>(d);
675*67e74705SXin Li uint64_t Device = endian::readNext<uint64_t, little, unaligned>(d);
676*67e74705SXin Li llvm::sys::fs::UniqueID UniqueID(Device, File);
677*67e74705SXin Li time_t ModTime = endian::readNext<uint64_t, little, unaligned>(d);
678*67e74705SXin Li uint64_t Size = endian::readNext<uint64_t, little, unaligned>(d);
679*67e74705SXin Li return data_type(Size, ModTime, UniqueID, IsDirectory);
680*67e74705SXin Li }
681*67e74705SXin Li
682*67e74705SXin Li // Negative stat. Don't read anything.
683*67e74705SXin Li return data_type();
684*67e74705SXin Li }
685*67e74705SXin Li };
686*67e74705SXin Li } // end anonymous namespace
687*67e74705SXin Li
688*67e74705SXin Li namespace clang {
689*67e74705SXin Li class PTHStatCache : public FileSystemStatCache {
690*67e74705SXin Li typedef llvm::OnDiskChainedHashTable<PTHStatLookupTrait> CacheTy;
691*67e74705SXin Li CacheTy Cache;
692*67e74705SXin Li
693*67e74705SXin Li public:
PTHStatCache(PTHManager::PTHFileLookup & FL)694*67e74705SXin Li PTHStatCache(PTHManager::PTHFileLookup &FL)
695*67e74705SXin Li : Cache(FL.getNumBuckets(), FL.getNumEntries(), FL.getBuckets(),
696*67e74705SXin Li FL.getBase()) {}
697*67e74705SXin Li
getStat(const char * Path,FileData & Data,bool isFile,std::unique_ptr<vfs::File> * F,vfs::FileSystem & FS)698*67e74705SXin Li LookupResult getStat(const char *Path, FileData &Data, bool isFile,
699*67e74705SXin Li std::unique_ptr<vfs::File> *F,
700*67e74705SXin Li vfs::FileSystem &FS) override {
701*67e74705SXin Li // Do the lookup for the file's data in the PTH file.
702*67e74705SXin Li CacheTy::iterator I = Cache.find(Path);
703*67e74705SXin Li
704*67e74705SXin Li // If we don't get a hit in the PTH file just forward to 'stat'.
705*67e74705SXin Li if (I == Cache.end())
706*67e74705SXin Li return statChained(Path, Data, isFile, F, FS);
707*67e74705SXin Li
708*67e74705SXin Li const PTHStatData &D = *I;
709*67e74705SXin Li
710*67e74705SXin Li if (!D.HasData)
711*67e74705SXin Li return CacheMissing;
712*67e74705SXin Li
713*67e74705SXin Li Data.Name = Path;
714*67e74705SXin Li Data.Size = D.Size;
715*67e74705SXin Li Data.ModTime = D.ModTime;
716*67e74705SXin Li Data.UniqueID = D.UniqueID;
717*67e74705SXin Li Data.IsDirectory = D.IsDirectory;
718*67e74705SXin Li Data.IsNamedPipe = false;
719*67e74705SXin Li Data.InPCH = true;
720*67e74705SXin Li
721*67e74705SXin Li return CacheExists;
722*67e74705SXin Li }
723*67e74705SXin Li };
724*67e74705SXin Li }
725*67e74705SXin Li
createStatCache()726*67e74705SXin Li std::unique_ptr<FileSystemStatCache> PTHManager::createStatCache() {
727*67e74705SXin Li return llvm::make_unique<PTHStatCache>(*FileLookup);
728*67e74705SXin Li }
729