xref: /aosp_15_r20/external/google-breakpad/src/common/windows/pdb_source_line_writer.h (revision 9712c20fc9bbfbac4935993a2ca0b3958c5adad2)
1 // Copyright 2006 Google LLC
2 //
3 // Redistribution and use in source and binary forms, with or without
4 // modification, are permitted provided that the following conditions are
5 // met:
6 //
7 //     * Redistributions of source code must retain the above copyright
8 // notice, this list of conditions and the following disclaimer.
9 //     * Redistributions in binary form must reproduce the above
10 // copyright notice, this list of conditions and the following disclaimer
11 // in the documentation and/or other materials provided with the
12 // distribution.
13 //     * Neither the name of Google LLC nor the names of its
14 // contributors may be used to endorse or promote products derived from
15 // this software without specific prior written permission.
16 //
17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 
29 // PDBSourceLineWriter uses a pdb file produced by Visual C++ to output
30 // a line/address map for use with BasicSourceLineResolver.
31 
32 #ifndef COMMON_WINDOWS_PDB_SOURCE_LINE_WRITER_H_
33 #define COMMON_WINDOWS_PDB_SOURCE_LINE_WRITER_H_
34 
35 #include <atlcomcli.h>
36 
37 #include <map>
38 #include <memory>
39 #include <string>
40 #include <unordered_map>
41 #include <vector>
42 
43 #include "common/windows/module_info.h"
44 #include "common/windows/omap.h"
45 
46 struct IDiaEnumLineNumbers;
47 struct IDiaSession;
48 struct IDiaSymbol;
49 
50 namespace google_breakpad {
51 
52 using std::map;
53 using std::vector;
54 using std::wstring;
55 using std::unordered_map;
56 
57 class PDBSourceLineWriter {
58  public:
59   enum FileFormat {
60     PDB_FILE,  // a .pdb file containing debug symbols
61     EXE_FILE,  // a .exe or .dll file
62     ANY_FILE   // try PDB_FILE and then EXE_FILE
63   };
64 
65   explicit PDBSourceLineWriter(bool handle_inline = false);
66   ~PDBSourceLineWriter();
67 
68   // Opens the given file.  For executable files, the corresponding pdb
69   // file must be available; Open will be if it is not.
70   // If there is already a pdb file open, it is automatically closed.
71   // Returns true on success.
72   bool Open(const wstring& file, FileFormat format);
73 
74   // Closes the current pdb file and its associated resources.
75   void Close();
76 
77   // Sets the code file full path.  This is optional for 32-bit modules.  It is
78   // also optional for 64-bit modules when there is an executable file stored
79   // in the same directory as the PDB file.  It is only required for 64-bit
80   // modules when the executable file is not in the same location as the PDB
81   // file and it must be called after Open() and before WriteMap().
82   // If Open() was called for an executable file, then it is an error to call
83   // SetCodeFile() with a different file path and it will return false.
84   bool SetCodeFile(const wstring& exe_file);
85 
86   // Writes a Breakpad symbol file from the current pdb file to |symbol_file|.
87   // Returns true on success.
88   bool WriteSymbols(FILE *symbol_file);
89 
90   // Retrieves information about the module's debugging file.  Returns
91   // true on success and false on failure.
92   bool GetModuleInfo(PDBModuleInfo *info);
93 
94   // Retrieves information about the module's PE file.  Returns
95   // true on success and false on failure.
96   bool GetPEInfo(PEModuleInfo *info);
97 
98   // Sets uses_guid to true if the opened file uses a new-style CodeView
99   // record with a 128-bit GUID, or false if the opened file uses an old-style
100   // CodeView record.  When no GUID is available, a 32-bit signature should be
101   // used to identify the module instead.  If the information cannot be
102   // determined, this method returns false.
103   bool UsesGUID(bool *uses_guid);
104 
105  private:
106   // InlineOrigin represents INLINE_ORIGIN record in a symbol file. It's an
107   // inlined function.
108   struct InlineOrigin {
109     // The unique id for an InlineOrigin.
110     int id;
111     // The name of the inlined function.
112     wstring name;
113   };
114 
115   // Line represents LINE record in a symbol file. It represents a source code
116   // line.
117   struct Line {
118     // The relative address of a line.
119     DWORD rva;
120     // The number bytes this line has.
121     DWORD length;
122     // The source line number.
123     DWORD line_num;
124     // The source file id where the source line is located at.
125     DWORD file_id;
126   };
127 
128   // Inline represents INLINE record in a symbol file.
129   class Inline {
130    public:
131     explicit Inline(int inline_nest_level);
132 
133     void SetOriginId(int origin_id);
134 
135     // Adding inlinee line's range into ranges. If line is adjacent with any
136     // existing lines, extend the range. Otherwise, add line as a new range.
137     void ExtendRanges(const Line& line);
138 
139     void SetCallSiteLine(DWORD call_site_line);
140 
141     void SetCallSiteFileId(DWORD call_site_file_id);
142 
143     void SetChildInlines(std::vector<std::unique_ptr<Inline>> child_inlines);
144 
145     void Print(FILE* output) const;
146 
147    private:
148     // The nest level of this inline record.
149     int inline_nest_level_;
150     // The source line number at where this inlined function is called.
151     DWORD call_site_line_ = 0;
152     // The call site file id at where this inlined function is called.
153     DWORD call_site_file_id_ = 0;
154     // The id used for referring to an InlineOrigin.
155     int origin_id_ = 0;
156     // A map from rva to length. This is the address ranges covered by this
157     // Inline.
158     map<DWORD, DWORD> ranges_;
159     // The list of direct Inlines inlined inside this Inline.
160     vector<std::unique_ptr<Inline>> child_inlines_;
161   };
162 
163   // Lines represents a map of lines inside a function with rva as the key.
164   // AddLine function adds a line into the map and ensures that there is no
165   // overlap between any two lines in the map.
166   class Lines {
167    public:
GetLineMap()168     const map<DWORD, Line>& GetLineMap() const { return line_map_; }
169 
170     // Finds the line from line_map_ that contains the given rva returns its
171     // line_num. If not found, return 0.
172     DWORD GetLineNum(DWORD rva) const;
173 
174     // Finds the line from line_map_ that contains the given rva returns its
175     // file_id. If not found, return 0.
176     DWORD GetFileId(DWORD rva) const;
177 
178     // Add the `line` into line_map_. If the `line` overlaps with existing
179     // lines, truncate the existing lines and add the given line. It ensures
180     // that all lines in line_map_ do not overlap with each other. For example,
181     // suppose there is a line A in the map and we call AddLine with Line B.
182     // Line A: rva: 100, length: 20, line_num: 10, file_id: 1
183     // Line B: rva: 105, length: 10, line_num: 4, file_id: 2
184     // After calling AddLine with Line B, we will have the following lines:
185     // Line 1: rva: 100, length: 5, line_num: 10, file_id: 1
186     // Line 2: rva: 105, length: 10, line_num: 4, file_id: 2
187     // Line 3: rva: 115, length: 5, line_num: 10, file_id: 1
188     void AddLine(const Line& line);
189 
190    private:
191     // Finds the line from line_map_ that contains the given rva. If not found,
192     // return nullptr.
193     const Line* GetLine(DWORD rva) const;
194     // The key is rva. AddLine function ensures that any two lines in the map do
195     // not overlap.
196     map<DWORD, Line> line_map_;
197   };
198 
199   // Construct Line from IDiaLineNumber. The output Line is stored at line.
200   // Return true on success.
201   bool GetLine(IDiaLineNumber* dia_line, Line* line) const;
202 
203   // Construct Lines from IDiaEnumLineNumbers. The list of Lines are stored at
204   // line_list.
205   // Returns true on success.
206   bool GetLines(IDiaEnumLineNumbers* lines, Lines* line_list) const;
207 
208   // Outputs the line/address pairs for each line in the enumerator.
209   void PrintLines(const Lines& lines) const;
210 
211   // Outputs a function address and name, followed by its source line list.
212   // block can be the same object as function, or it can be a reference to a
213   // code block that is lexically part of this function, but resides at a
214   // separate address. If has_multiple_symbols is true, this function's
215   // instructions correspond to multiple symbols. Returns true on success.
216   bool PrintFunction(IDiaSymbol *function, IDiaSymbol *block,
217                      bool has_multiple_symbols);
218 
219   // Outputs all functions as described above.  Returns true on success.
220   bool PrintFunctions();
221 
222   // Outputs all of the source files in the session's pdb file.
223   // Returns true on success.
224   bool PrintSourceFiles();
225 
226   // Output all inline origins.
227   void PrintInlineOrigins() const;
228 
229   // Retrieve inlines inside the given block. It also adds inlinee lines to
230   // `line_list` since inner lines are more precise source location. If the
231   // block has children wih SymTagInlineSite Tag, it will recursively (DFS) call
232   // itself with each child as first argument. Returns true on success.
233   // `block`: the IDiaSymbol that may have inline sites.
234   // `line_list`: the list of lines inside current function.
235   // `inline_nest_level`: the nest level of block's Inlines.
236   // `inlines`: the vector to store the list of inlines for the block.
237   bool GetInlines(IDiaSymbol* block,
238                   Lines* line_list,
239                   int inline_nest_level,
240                   vector<std::unique_ptr<Inline>>* inlines);
241 
242   // Outputs all inlines.
243   void PrintInlines(const vector<std::unique_ptr<Inline>>& inlines) const;
244 
245   // Outputs all of the frame information necessary to construct stack
246   // backtraces in the absence of frame pointers. For x86 data stored in
247   // .pdb files. Returns true on success.
248   bool PrintFrameDataUsingPDB();
249 
250   // Outputs all of the frame information necessary to construct stack
251   // backtraces in the absence of frame pointers. For x64 data stored in
252   // .exe, .dll files. Returns true on success.
253   bool PrintFrameDataUsingEXE();
254 
255   // Outputs all of the frame information necessary to construct stack
256   // backtraces in the absence of frame pointers.  Returns true on success.
257   bool PrintFrameData();
258 
259   // Outputs a single public symbol address and name, if the symbol corresponds
260   // to a code address.  Returns true on success.  If symbol is does not
261   // correspond to code, returns true without outputting anything. If
262   // has_multiple_symbols is true, the symbol corresponds to a code address and
263   // the instructions correspond to multiple symbols.
264   bool PrintCodePublicSymbol(IDiaSymbol *symbol, bool has_multiple_symbols);
265 
266   // Outputs a line identifying the PDB file that is being dumped, along with
267   // its uuid and age.
268   bool PrintPDBInfo();
269 
270   // Outputs a line identifying the PE file corresponding to the PDB
271   // file that is being dumped, along with its code identifier,
272   // which consists of its timestamp and file size.
273   bool PrintPEInfo();
274 
275   // Returns true if this filename has already been seen,
276   // and an ID is stored for it, or false if it has not.
FileIDIsCached(const wstring & file)277   bool FileIDIsCached(const wstring& file) {
278     return unique_files_.find(file) != unique_files_.end();
279   }
280 
281   // Cache this filename and ID for later reuse.
CacheFileID(const wstring & file,DWORD id)282   void CacheFileID(const wstring& file, DWORD id) {
283     unique_files_[file] = id;
284   }
285 
286   // Store this ID in the cache as a duplicate for this filename.
StoreDuplicateFileID(const wstring & file,DWORD id)287   void StoreDuplicateFileID(const wstring& file, DWORD id) {
288     unordered_map<wstring, DWORD>::iterator iter = unique_files_.find(file);
289     if (iter != unique_files_.end()) {
290       // map this id to the previously seen one
291       file_ids_[id] = iter->second;
292     }
293   }
294 
295   // Given a file's unique ID, return the ID that should be used to
296   // reference it. There may be multiple files with identical filenames
297   // but different unique IDs. The cache attempts to coalesce these into
298   // one ID per unique filename.
GetRealFileID(DWORD id)299   DWORD GetRealFileID(DWORD id) const {
300     unordered_map<DWORD, DWORD>::const_iterator iter = file_ids_.find(id);
301     if (iter == file_ids_.end())
302       return id;
303     return iter->second;
304   }
305 
306   // Find the PE file corresponding to the loaded PDB file, and
307   // set the code_file_ member. Returns false on failure.
308   bool FindPEFile();
309 
310   // Returns the function name for a symbol.  If possible, the name is
311   // undecorated.  If the symbol's decorated form indicates the size of
312   // parameters on the stack, this information is returned in stack_param_size.
313   // Returns true on success.  If the symbol doesn't encode parameter size
314   // information, stack_param_size is set to -1.
315   static bool GetSymbolFunctionName(IDiaSymbol *function, BSTR *name,
316                                     int *stack_param_size);
317 
318   // Returns the number of bytes of stack space used for a function's
319   // parameters.  function must have the tag SymTagFunction.  In the event of
320   // a failure, returns 0, which is also a valid number of bytes.
321   static int GetFunctionStackParamSize(IDiaSymbol *function);
322 
323   // The filename of the PE file corresponding to the currently-open
324   // pdb file.
325   wstring code_file_;
326 
327   // The session for the currently-open pdb file.
328   CComPtr<IDiaSession> session_;
329 
330   // The current output file for this WriteMap invocation.
331   FILE *output_;
332 
333   // There may be many duplicate filenames with different IDs.
334   // This maps from the DIA "unique ID" to a single ID per unique
335   // filename.
336   unordered_map<DWORD, DWORD> file_ids_;
337   // This maps unique filenames to file IDs.
338   unordered_map<wstring, DWORD> unique_files_;
339 
340   // The INLINE_ORIGINS records. The key is the function name.
341   std::map<wstring, InlineOrigin> inline_origins_;
342 
343   // This is used for calculating post-transform symbol addresses and lengths.
344   ImageMap image_map_;
345 
346   // If we should output INLINE/INLINE_ORIGIN records
347   bool handle_inline_;
348 
349   // Disallow copy ctor and operator=
350   PDBSourceLineWriter(const PDBSourceLineWriter&);
351   void operator=(const PDBSourceLineWriter&);
352 };
353 
354 }  // namespace google_breakpad
355 
356 #endif  // COMMON_WINDOWS_PDB_SOURCE_LINE_WRITER_H_
357