1 // Copyright 2006 Google LLC 2 // 3 // Redistribution and use in source and binary forms, with or without 4 // modification, are permitted provided that the following conditions are 5 // met: 6 // 7 // * Redistributions of source code must retain the above copyright 8 // notice, this list of conditions and the following disclaimer. 9 // * Redistributions in binary form must reproduce the above 10 // copyright notice, this list of conditions and the following disclaimer 11 // in the documentation and/or other materials provided with the 12 // distribution. 13 // * Neither the name of Google LLC nor the names of its 14 // contributors may be used to endorse or promote products derived from 15 // this software without specific prior written permission. 16 // 17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 29 // PDBSourceLineWriter uses a pdb file produced by Visual C++ to output 30 // a line/address map for use with BasicSourceLineResolver. 31 32 #ifndef COMMON_WINDOWS_PDB_SOURCE_LINE_WRITER_H_ 33 #define COMMON_WINDOWS_PDB_SOURCE_LINE_WRITER_H_ 34 35 #include <atlcomcli.h> 36 37 #include <map> 38 #include <memory> 39 #include <string> 40 #include <unordered_map> 41 #include <vector> 42 43 #include "common/windows/module_info.h" 44 #include "common/windows/omap.h" 45 46 struct IDiaEnumLineNumbers; 47 struct IDiaSession; 48 struct IDiaSymbol; 49 50 namespace google_breakpad { 51 52 using std::map; 53 using std::vector; 54 using std::wstring; 55 using std::unordered_map; 56 57 class PDBSourceLineWriter { 58 public: 59 enum FileFormat { 60 PDB_FILE, // a .pdb file containing debug symbols 61 EXE_FILE, // a .exe or .dll file 62 ANY_FILE // try PDB_FILE and then EXE_FILE 63 }; 64 65 explicit PDBSourceLineWriter(bool handle_inline = false); 66 ~PDBSourceLineWriter(); 67 68 // Opens the given file. For executable files, the corresponding pdb 69 // file must be available; Open will be if it is not. 70 // If there is already a pdb file open, it is automatically closed. 71 // Returns true on success. 72 bool Open(const wstring& file, FileFormat format); 73 74 // Closes the current pdb file and its associated resources. 75 void Close(); 76 77 // Sets the code file full path. This is optional for 32-bit modules. It is 78 // also optional for 64-bit modules when there is an executable file stored 79 // in the same directory as the PDB file. It is only required for 64-bit 80 // modules when the executable file is not in the same location as the PDB 81 // file and it must be called after Open() and before WriteMap(). 82 // If Open() was called for an executable file, then it is an error to call 83 // SetCodeFile() with a different file path and it will return false. 84 bool SetCodeFile(const wstring& exe_file); 85 86 // Writes a Breakpad symbol file from the current pdb file to |symbol_file|. 87 // Returns true on success. 88 bool WriteSymbols(FILE *symbol_file); 89 90 // Retrieves information about the module's debugging file. Returns 91 // true on success and false on failure. 92 bool GetModuleInfo(PDBModuleInfo *info); 93 94 // Retrieves information about the module's PE file. Returns 95 // true on success and false on failure. 96 bool GetPEInfo(PEModuleInfo *info); 97 98 // Sets uses_guid to true if the opened file uses a new-style CodeView 99 // record with a 128-bit GUID, or false if the opened file uses an old-style 100 // CodeView record. When no GUID is available, a 32-bit signature should be 101 // used to identify the module instead. If the information cannot be 102 // determined, this method returns false. 103 bool UsesGUID(bool *uses_guid); 104 105 private: 106 // InlineOrigin represents INLINE_ORIGIN record in a symbol file. It's an 107 // inlined function. 108 struct InlineOrigin { 109 // The unique id for an InlineOrigin. 110 int id; 111 // The name of the inlined function. 112 wstring name; 113 }; 114 115 // Line represents LINE record in a symbol file. It represents a source code 116 // line. 117 struct Line { 118 // The relative address of a line. 119 DWORD rva; 120 // The number bytes this line has. 121 DWORD length; 122 // The source line number. 123 DWORD line_num; 124 // The source file id where the source line is located at. 125 DWORD file_id; 126 }; 127 128 // Inline represents INLINE record in a symbol file. 129 class Inline { 130 public: 131 explicit Inline(int inline_nest_level); 132 133 void SetOriginId(int origin_id); 134 135 // Adding inlinee line's range into ranges. If line is adjacent with any 136 // existing lines, extend the range. Otherwise, add line as a new range. 137 void ExtendRanges(const Line& line); 138 139 void SetCallSiteLine(DWORD call_site_line); 140 141 void SetCallSiteFileId(DWORD call_site_file_id); 142 143 void SetChildInlines(std::vector<std::unique_ptr<Inline>> child_inlines); 144 145 void Print(FILE* output) const; 146 147 private: 148 // The nest level of this inline record. 149 int inline_nest_level_; 150 // The source line number at where this inlined function is called. 151 DWORD call_site_line_ = 0; 152 // The call site file id at where this inlined function is called. 153 DWORD call_site_file_id_ = 0; 154 // The id used for referring to an InlineOrigin. 155 int origin_id_ = 0; 156 // A map from rva to length. This is the address ranges covered by this 157 // Inline. 158 map<DWORD, DWORD> ranges_; 159 // The list of direct Inlines inlined inside this Inline. 160 vector<std::unique_ptr<Inline>> child_inlines_; 161 }; 162 163 // Lines represents a map of lines inside a function with rva as the key. 164 // AddLine function adds a line into the map and ensures that there is no 165 // overlap between any two lines in the map. 166 class Lines { 167 public: GetLineMap()168 const map<DWORD, Line>& GetLineMap() const { return line_map_; } 169 170 // Finds the line from line_map_ that contains the given rva returns its 171 // line_num. If not found, return 0. 172 DWORD GetLineNum(DWORD rva) const; 173 174 // Finds the line from line_map_ that contains the given rva returns its 175 // file_id. If not found, return 0. 176 DWORD GetFileId(DWORD rva) const; 177 178 // Add the `line` into line_map_. If the `line` overlaps with existing 179 // lines, truncate the existing lines and add the given line. It ensures 180 // that all lines in line_map_ do not overlap with each other. For example, 181 // suppose there is a line A in the map and we call AddLine with Line B. 182 // Line A: rva: 100, length: 20, line_num: 10, file_id: 1 183 // Line B: rva: 105, length: 10, line_num: 4, file_id: 2 184 // After calling AddLine with Line B, we will have the following lines: 185 // Line 1: rva: 100, length: 5, line_num: 10, file_id: 1 186 // Line 2: rva: 105, length: 10, line_num: 4, file_id: 2 187 // Line 3: rva: 115, length: 5, line_num: 10, file_id: 1 188 void AddLine(const Line& line); 189 190 private: 191 // Finds the line from line_map_ that contains the given rva. If not found, 192 // return nullptr. 193 const Line* GetLine(DWORD rva) const; 194 // The key is rva. AddLine function ensures that any two lines in the map do 195 // not overlap. 196 map<DWORD, Line> line_map_; 197 }; 198 199 // Construct Line from IDiaLineNumber. The output Line is stored at line. 200 // Return true on success. 201 bool GetLine(IDiaLineNumber* dia_line, Line* line) const; 202 203 // Construct Lines from IDiaEnumLineNumbers. The list of Lines are stored at 204 // line_list. 205 // Returns true on success. 206 bool GetLines(IDiaEnumLineNumbers* lines, Lines* line_list) const; 207 208 // Outputs the line/address pairs for each line in the enumerator. 209 void PrintLines(const Lines& lines) const; 210 211 // Outputs a function address and name, followed by its source line list. 212 // block can be the same object as function, or it can be a reference to a 213 // code block that is lexically part of this function, but resides at a 214 // separate address. If has_multiple_symbols is true, this function's 215 // instructions correspond to multiple symbols. Returns true on success. 216 bool PrintFunction(IDiaSymbol *function, IDiaSymbol *block, 217 bool has_multiple_symbols); 218 219 // Outputs all functions as described above. Returns true on success. 220 bool PrintFunctions(); 221 222 // Outputs all of the source files in the session's pdb file. 223 // Returns true on success. 224 bool PrintSourceFiles(); 225 226 // Output all inline origins. 227 void PrintInlineOrigins() const; 228 229 // Retrieve inlines inside the given block. It also adds inlinee lines to 230 // `line_list` since inner lines are more precise source location. If the 231 // block has children wih SymTagInlineSite Tag, it will recursively (DFS) call 232 // itself with each child as first argument. Returns true on success. 233 // `block`: the IDiaSymbol that may have inline sites. 234 // `line_list`: the list of lines inside current function. 235 // `inline_nest_level`: the nest level of block's Inlines. 236 // `inlines`: the vector to store the list of inlines for the block. 237 bool GetInlines(IDiaSymbol* block, 238 Lines* line_list, 239 int inline_nest_level, 240 vector<std::unique_ptr<Inline>>* inlines); 241 242 // Outputs all inlines. 243 void PrintInlines(const vector<std::unique_ptr<Inline>>& inlines) const; 244 245 // Outputs all of the frame information necessary to construct stack 246 // backtraces in the absence of frame pointers. For x86 data stored in 247 // .pdb files. Returns true on success. 248 bool PrintFrameDataUsingPDB(); 249 250 // Outputs all of the frame information necessary to construct stack 251 // backtraces in the absence of frame pointers. For x64 data stored in 252 // .exe, .dll files. Returns true on success. 253 bool PrintFrameDataUsingEXE(); 254 255 // Outputs all of the frame information necessary to construct stack 256 // backtraces in the absence of frame pointers. Returns true on success. 257 bool PrintFrameData(); 258 259 // Outputs a single public symbol address and name, if the symbol corresponds 260 // to a code address. Returns true on success. If symbol is does not 261 // correspond to code, returns true without outputting anything. If 262 // has_multiple_symbols is true, the symbol corresponds to a code address and 263 // the instructions correspond to multiple symbols. 264 bool PrintCodePublicSymbol(IDiaSymbol *symbol, bool has_multiple_symbols); 265 266 // Outputs a line identifying the PDB file that is being dumped, along with 267 // its uuid and age. 268 bool PrintPDBInfo(); 269 270 // Outputs a line identifying the PE file corresponding to the PDB 271 // file that is being dumped, along with its code identifier, 272 // which consists of its timestamp and file size. 273 bool PrintPEInfo(); 274 275 // Returns true if this filename has already been seen, 276 // and an ID is stored for it, or false if it has not. FileIDIsCached(const wstring & file)277 bool FileIDIsCached(const wstring& file) { 278 return unique_files_.find(file) != unique_files_.end(); 279 } 280 281 // Cache this filename and ID for later reuse. CacheFileID(const wstring & file,DWORD id)282 void CacheFileID(const wstring& file, DWORD id) { 283 unique_files_[file] = id; 284 } 285 286 // Store this ID in the cache as a duplicate for this filename. StoreDuplicateFileID(const wstring & file,DWORD id)287 void StoreDuplicateFileID(const wstring& file, DWORD id) { 288 unordered_map<wstring, DWORD>::iterator iter = unique_files_.find(file); 289 if (iter != unique_files_.end()) { 290 // map this id to the previously seen one 291 file_ids_[id] = iter->second; 292 } 293 } 294 295 // Given a file's unique ID, return the ID that should be used to 296 // reference it. There may be multiple files with identical filenames 297 // but different unique IDs. The cache attempts to coalesce these into 298 // one ID per unique filename. GetRealFileID(DWORD id)299 DWORD GetRealFileID(DWORD id) const { 300 unordered_map<DWORD, DWORD>::const_iterator iter = file_ids_.find(id); 301 if (iter == file_ids_.end()) 302 return id; 303 return iter->second; 304 } 305 306 // Find the PE file corresponding to the loaded PDB file, and 307 // set the code_file_ member. Returns false on failure. 308 bool FindPEFile(); 309 310 // Returns the function name for a symbol. If possible, the name is 311 // undecorated. If the symbol's decorated form indicates the size of 312 // parameters on the stack, this information is returned in stack_param_size. 313 // Returns true on success. If the symbol doesn't encode parameter size 314 // information, stack_param_size is set to -1. 315 static bool GetSymbolFunctionName(IDiaSymbol *function, BSTR *name, 316 int *stack_param_size); 317 318 // Returns the number of bytes of stack space used for a function's 319 // parameters. function must have the tag SymTagFunction. In the event of 320 // a failure, returns 0, which is also a valid number of bytes. 321 static int GetFunctionStackParamSize(IDiaSymbol *function); 322 323 // The filename of the PE file corresponding to the currently-open 324 // pdb file. 325 wstring code_file_; 326 327 // The session for the currently-open pdb file. 328 CComPtr<IDiaSession> session_; 329 330 // The current output file for this WriteMap invocation. 331 FILE *output_; 332 333 // There may be many duplicate filenames with different IDs. 334 // This maps from the DIA "unique ID" to a single ID per unique 335 // filename. 336 unordered_map<DWORD, DWORD> file_ids_; 337 // This maps unique filenames to file IDs. 338 unordered_map<wstring, DWORD> unique_files_; 339 340 // The INLINE_ORIGINS records. The key is the function name. 341 std::map<wstring, InlineOrigin> inline_origins_; 342 343 // This is used for calculating post-transform symbol addresses and lengths. 344 ImageMap image_map_; 345 346 // If we should output INLINE/INLINE_ORIGIN records 347 bool handle_inline_; 348 349 // Disallow copy ctor and operator= 350 PDBSourceLineWriter(const PDBSourceLineWriter&); 351 void operator=(const PDBSourceLineWriter&); 352 }; 353 354 } // namespace google_breakpad 355 356 #endif // COMMON_WINDOWS_PDB_SOURCE_LINE_WRITER_H_ 357