xref: /aosp_15_r20/external/google-breakpad/src/common/linux/dump_symbols.cc (revision 9712c20fc9bbfbac4935993a2ca0b3958c5adad2)
1 // Copyright 2011 Google LLC
2 //
3 // Redistribution and use in source and binary forms, with or without
4 // modification, are permitted provided that the following conditions are
5 // met:
6 //
7 //     * Redistributions of source code must retain the above copyright
8 // notice, this list of conditions and the following disclaimer.
9 //     * Redistributions in binary form must reproduce the above
10 // copyright notice, this list of conditions and the following disclaimer
11 // in the documentation and/or other materials provided with the
12 // distribution.
13 //     * Neither the name of Google LLC nor the names of its
14 // contributors may be used to endorse or promote products derived from
15 // this software without specific prior written permission.
16 //
17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 
29 // Restructured in 2009 by: Jim Blandy <[email protected]> <[email protected]>
30 
31 // dump_symbols.cc: implement google_breakpad::WriteSymbolFile:
32 // Find all the debugging info in a file and dump it as a Breakpad symbol file.
33 
34 #ifdef HAVE_CONFIG_H
35 #include <config.h>  // Must come first
36 #endif
37 
38 #include "common/linux/dump_symbols.h"
39 
40 #include <assert.h>
41 #include <elf.h>
42 #include <errno.h>
43 #include <fcntl.h>
44 #include <limits.h>
45 #include <link.h>
46 #include <stdint.h>
47 #include <stdio.h>
48 #include <stdlib.h>
49 #include <string.h>
50 #include <sys/mman.h>
51 #include <sys/stat.h>
52 #include <unistd.h>
53 #include <zlib.h>
54 #ifdef HAVE_LIBZSTD
55 #include <zstd.h>
56 #endif
57 
58 #include <set>
59 #include <string>
60 #include <utility>
61 #include <vector>
62 
63 #include "common/dwarf/bytereader-inl.h"
64 #include "common/dwarf/dwarf2diehandler.h"
65 #include "common/dwarf_cfi_to_module.h"
66 #include "common/dwarf_cu_to_module.h"
67 #include "common/dwarf_line_to_module.h"
68 #include "common/dwarf_range_list_handler.h"
69 #include "common/linux/crc32.h"
70 #include "common/linux/eintr_wrapper.h"
71 #include "common/linux/elfutils.h"
72 #include "common/linux/elfutils-inl.h"
73 #include "common/linux/elf_symbols_to_module.h"
74 #include "common/linux/file_id.h"
75 #include "common/memory_allocator.h"
76 #include "common/module.h"
77 #include "common/path_helper.h"
78 #include "common/scoped_ptr.h"
79 #ifndef NO_STABS_SUPPORT
80 #include "common/stabs_reader.h"
81 #include "common/stabs_to_module.h"
82 #endif
83 #include "common/using_std_string.h"
84 
85 // This namespace contains helper functions.
86 namespace {
87 
88 using google_breakpad::DumpOptions;
89 using google_breakpad::DwarfCFIToModule;
90 using google_breakpad::DwarfCUToModule;
91 using google_breakpad::DwarfLineToModule;
92 using google_breakpad::DwarfRangeListHandler;
93 using google_breakpad::ElfClass;
94 using google_breakpad::ElfClass32;
95 using google_breakpad::ElfClass64;
96 using google_breakpad::elf::FileID;
97 using google_breakpad::FindElfSectionByName;
98 using google_breakpad::GetOffset;
99 using google_breakpad::IsValidElf;
100 using google_breakpad::elf::kDefaultBuildIdSize;
101 using google_breakpad::Module;
102 using google_breakpad::PageAllocator;
103 #ifndef NO_STABS_SUPPORT
104 using google_breakpad::StabsToModule;
105 #endif
106 using google_breakpad::scoped_ptr;
107 using google_breakpad::wasteful_vector;
108 
109 // Define AARCH64 ELF architecture if host machine does not include this define.
110 #ifndef EM_AARCH64
111 #define EM_AARCH64      183
112 #endif
113 
114 // Define ZStd compression if host machine does not include this define.
115 #ifndef ELFCOMPRESS_ZSTD
116 #define ELFCOMPRESS_ZSTD 2
117 #endif
118 
119 //
120 // FDWrapper
121 //
122 // Wrapper class to make sure opened file is closed.
123 //
124 class FDWrapper {
125  public:
FDWrapper(int fd)126   explicit FDWrapper(int fd) :
127     fd_(fd) {}
~FDWrapper()128   ~FDWrapper() {
129     if (fd_ != -1)
130       close(fd_);
131   }
get()132   int get() {
133     return fd_;
134   }
release()135   int release() {
136     int fd = fd_;
137     fd_ = -1;
138     return fd;
139   }
140  private:
141   int fd_;
142 };
143 
144 //
145 // MmapWrapper
146 //
147 // Wrapper class to make sure mapped regions are unmapped.
148 //
149 class MmapWrapper {
150  public:
MmapWrapper()151   MmapWrapper() : is_set_(false) {}
~MmapWrapper()152   ~MmapWrapper() {
153     if (is_set_ && base_ != NULL) {
154       assert(size_ > 0);
155       munmap(base_, size_);
156     }
157   }
set(void * mapped_address,size_t mapped_size)158   void set(void* mapped_address, size_t mapped_size) {
159     is_set_ = true;
160     base_ = mapped_address;
161     size_ = mapped_size;
162   }
release()163   void release() {
164     assert(is_set_);
165     is_set_ = false;
166     base_ = NULL;
167     size_ = 0;
168   }
169 
170  private:
171   bool is_set_;
172   void* base_;
173   size_t size_;
174 };
175 
176 // Find the preferred loading address of the binary.
177 template<typename ElfClass>
GetLoadingAddress(const typename ElfClass::Phdr * program_headers,int nheader)178 typename ElfClass::Addr GetLoadingAddress(
179     const typename ElfClass::Phdr* program_headers,
180     int nheader) {
181   typedef typename ElfClass::Phdr Phdr;
182 
183   // For non-PIC executables (e_type == ET_EXEC), the load address is
184   // the start address of the first PT_LOAD segment.  (ELF requires
185   // the segments to be sorted by load address.)  For PIC executables
186   // and dynamic libraries (e_type == ET_DYN), this address will
187   // normally be zero.
188   for (int i = 0; i < nheader; ++i) {
189     const Phdr& header = program_headers[i];
190     if (header.p_type == PT_LOAD)
191       return header.p_vaddr;
192   }
193   return 0;
194 }
195 
196 // Find the set of address ranges for all PT_LOAD segments.
197 template <typename ElfClass>
GetPtLoadSegmentRanges(const typename ElfClass::Phdr * program_headers,int nheader)198 vector<Module::Range> GetPtLoadSegmentRanges(
199     const typename ElfClass::Phdr* program_headers,
200     int nheader) {
201   typedef typename ElfClass::Phdr Phdr;
202   vector<Module::Range> ranges;
203 
204   for (int i = 0; i < nheader; ++i) {
205     const Phdr& header = program_headers[i];
206     if (header.p_type == PT_LOAD) {
207       ranges.push_back(Module::Range(header.p_vaddr, header.p_memsz));
208     }
209   }
210   return ranges;
211 }
212 
213 #ifndef NO_STABS_SUPPORT
214 template<typename ElfClass>
LoadStabs(const typename ElfClass::Ehdr * elf_header,const typename ElfClass::Shdr * stab_section,const typename ElfClass::Shdr * stabstr_section,const bool big_endian,Module * module)215 bool LoadStabs(const typename ElfClass::Ehdr* elf_header,
216                const typename ElfClass::Shdr* stab_section,
217                const typename ElfClass::Shdr* stabstr_section,
218                const bool big_endian,
219                Module* module) {
220   // A callback object to handle data from the STABS reader.
221   StabsToModule handler(module);
222   // Find the addresses of the STABS data, and create a STABS reader object.
223   // On Linux, STABS entries always have 32-bit values, regardless of the
224   // address size of the architecture whose code they're describing, and
225   // the strings are always "unitized".
226   const uint8_t* stabs =
227       GetOffset<ElfClass, uint8_t>(elf_header, stab_section->sh_offset);
228   const uint8_t* stabstr =
229       GetOffset<ElfClass, uint8_t>(elf_header, stabstr_section->sh_offset);
230   google_breakpad::StabsReader reader(stabs, stab_section->sh_size,
231                                       stabstr, stabstr_section->sh_size,
232                                       big_endian, 4, true, &handler);
233   // Read the STABS data, and do post-processing.
234   if (!reader.Process())
235     return false;
236   handler.Finalize();
237   return true;
238 }
239 #endif  // NO_STABS_SUPPORT
240 
241 // A range handler that accepts rangelist data parsed by
242 // google_breakpad::RangeListReader and populates a range vector (typically
243 // owned by a function) with the results.
244 class DumperRangesHandler : public DwarfCUToModule::RangesHandler {
245  public:
DumperRangesHandler(google_breakpad::ByteReader * reader)246   DumperRangesHandler(google_breakpad::ByteReader* reader) :
247       reader_(reader) { }
248 
ReadRanges(enum google_breakpad::DwarfForm form,uint64_t data,google_breakpad::RangeListReader::CURangesInfo * cu_info,vector<Module::Range> * ranges)249   bool ReadRanges(
250       enum google_breakpad::DwarfForm form, uint64_t data,
251       google_breakpad::RangeListReader::CURangesInfo* cu_info,
252       vector<Module::Range>* ranges) {
253     DwarfRangeListHandler handler(ranges);
254     google_breakpad::RangeListReader range_list_reader(reader_, cu_info,
255                                                     &handler);
256     return range_list_reader.ReadRanges(form, data);
257   }
258 
259  private:
260   google_breakpad::ByteReader* reader_;
261 };
262 
263 // A line-to-module loader that accepts line number info parsed by
264 // google_breakpad::LineInfo and populates a Module and a line vector
265 // with the results.
266 class DumperLineToModule: public DwarfCUToModule::LineToModuleHandler {
267  public:
268   // Create a line-to-module converter using BYTE_READER.
DumperLineToModule(google_breakpad::ByteReader * byte_reader)269   explicit DumperLineToModule(google_breakpad::ByteReader* byte_reader)
270       : byte_reader_(byte_reader) { }
StartCompilationUnit(const string & compilation_dir)271   void StartCompilationUnit(const string& compilation_dir) {
272     compilation_dir_ = compilation_dir;
273   }
ReadProgram(const uint8_t * program,uint64_t length,const uint8_t * string_section,uint64_t string_section_length,const uint8_t * line_string_section,uint64_t line_string_section_length,Module * module,std::vector<Module::Line> * lines,std::map<uint32_t,Module::File * > * files)274   void ReadProgram(const uint8_t* program,
275                    uint64_t length,
276                    const uint8_t* string_section,
277                    uint64_t string_section_length,
278                    const uint8_t* line_string_section,
279                    uint64_t line_string_section_length,
280                    Module* module,
281                    std::vector<Module::Line>* lines,
282                    std::map<uint32_t, Module::File*>* files) {
283     DwarfLineToModule handler(module, compilation_dir_, lines, files);
284     google_breakpad::LineInfo parser(program, length, byte_reader_,
285                                   string_section, string_section_length,
286                                   line_string_section,
287                                   line_string_section_length,
288                                   &handler);
289     parser.Start();
290   }
291  private:
292   string compilation_dir_;
293   google_breakpad::ByteReader* byte_reader_;
294 };
295 
296 template<typename ElfClass>
IsCompressedHeader(const typename ElfClass::Shdr * section)297 bool IsCompressedHeader(const typename ElfClass::Shdr* section) {
298   return (section->sh_flags & SHF_COMPRESSED) != 0;
299 }
300 
301 template<typename ElfClass>
GetCompressionHeader(typename ElfClass::Chdr & compression_header,const uint8_t * content,uint64_t size)302 uint32_t GetCompressionHeader(
303     typename ElfClass::Chdr& compression_header,
304     const uint8_t* content, uint64_t size) {
305   const typename ElfClass::Chdr* header =
306       reinterpret_cast<const typename ElfClass::Chdr *>(content);
307 
308   if (size < sizeof (*header)) {
309     return 0;
310   }
311 
312   compression_header = *header;
313   return sizeof (*header);
314 }
315 
UncompressZlibSectionContents(const uint8_t * compressed_buffer,uint64_t compressed_size,uint64_t uncompressed_size)316 std::pair<uint8_t *, uint64_t> UncompressZlibSectionContents(
317     const uint8_t* compressed_buffer, uint64_t compressed_size, uint64_t uncompressed_size) {
318   z_stream stream;
319   memset(&stream, 0, sizeof stream);
320 
321   stream.avail_in = compressed_size;
322   stream.avail_out = uncompressed_size;
323   stream.next_in = const_cast<uint8_t *>(compressed_buffer);
324 
325   google_breakpad::scoped_array<uint8_t> uncompressed_buffer(
326     new uint8_t[uncompressed_size]);
327 
328   int status = inflateInit(&stream);
329   while (stream.avail_in != 0 && status == Z_OK) {
330     stream.next_out =
331       uncompressed_buffer.get() + uncompressed_size - stream.avail_out;
332 
333     if ((status = inflate(&stream, Z_FINISH)) != Z_STREAM_END) {
334       break;
335     }
336 
337     status = inflateReset(&stream);
338   }
339 
340   return inflateEnd(&stream) != Z_OK || status != Z_OK || stream.avail_out != 0
341     ? std::make_pair(nullptr, 0)
342     : std::make_pair(uncompressed_buffer.release(), uncompressed_size);
343 }
344 
345 #ifdef HAVE_LIBZSTD
UncompressZstdSectionContents(const uint8_t * compressed_buffer,uint64_t compressed_size,uint64_t uncompressed_size)346 std::pair<uint8_t *, uint64_t> UncompressZstdSectionContents(
347     const uint8_t* compressed_buffer, uint64_t compressed_size,uint64_t uncompressed_size) {
348 
349   google_breakpad::scoped_array<uint8_t> uncompressed_buffer(new uint8_t[uncompressed_size]);
350   size_t out_size = ZSTD_decompress(uncompressed_buffer.get(), uncompressed_size,
351     compressed_buffer, compressed_size);
352   if (ZSTD_isError(out_size)) {
353     return std::make_pair(nullptr, 0);
354   }
355   assert(out_size == uncompressed_size);
356   return std::make_pair(uncompressed_buffer.release(), uncompressed_size);
357 }
358 #endif
359 
UncompressSectionContents(uint64_t compression_type,const uint8_t * compressed_buffer,uint64_t compressed_size,uint64_t uncompressed_size)360 std::pair<uint8_t *, uint64_t> UncompressSectionContents(
361     uint64_t compression_type, const uint8_t* compressed_buffer,
362     uint64_t compressed_size, uint64_t uncompressed_size) {
363   if (compression_type == ELFCOMPRESS_ZLIB) {
364     return UncompressZlibSectionContents(compressed_buffer, compressed_size, uncompressed_size);
365   }
366 
367 #ifdef HAVE_LIBZSTD
368   if (compression_type == ELFCOMPRESS_ZSTD) {
369     return UncompressZstdSectionContents(compressed_buffer, compressed_size, uncompressed_size);
370   }
371 #endif
372 
373   return std::make_pair(nullptr, 0);
374 }
375 
StartProcessSplitDwarf(google_breakpad::CompilationUnit * reader,Module * module,google_breakpad::Endianness endianness,bool handle_inter_cu_refs,bool handle_inline)376 void StartProcessSplitDwarf(google_breakpad::CompilationUnit* reader,
377                             Module* module,
378                             google_breakpad::Endianness endianness,
379                             bool handle_inter_cu_refs,
380                             bool handle_inline) {
381   std::string split_file;
382   google_breakpad::SectionMap split_sections;
383   google_breakpad::ByteReader split_byte_reader(endianness);
384   uint64_t cu_offset = 0;
385   if (!reader->ProcessSplitDwarf(split_file, split_sections, split_byte_reader,
386                                  cu_offset))
387     return;
388   DwarfCUToModule::FileContext file_context(split_file, module,
389                                             handle_inter_cu_refs);
390   for (auto section : split_sections)
391     file_context.AddSectionToSectionMap(section.first, section.second.first,
392                                         section.second.second);
393   // Because DWP/DWO file doesn't have .debug_addr/.debug_line/.debug_line_str,
394   // its debug info will refer to .debug_addr/.debug_line in the main binary.
395   if (file_context.section_map().find(".debug_addr") ==
396       file_context.section_map().end())
397     file_context.AddSectionToSectionMap(".debug_addr", reader->GetAddrBuffer(),
398                                         reader->GetAddrBufferLen());
399   if (file_context.section_map().find(".debug_line") ==
400       file_context.section_map().end())
401     file_context.AddSectionToSectionMap(".debug_line", reader->GetLineBuffer(),
402                                         reader->GetLineBufferLen());
403   if (file_context.section_map().find(".debug_line_str") ==
404       file_context.section_map().end())
405     file_context.AddSectionToSectionMap(".debug_line_str",
406                                         reader->GetLineStrBuffer(),
407                                         reader->GetLineStrBufferLen());
408 
409   DumperRangesHandler ranges_handler(&split_byte_reader);
410   DumperLineToModule line_to_module(&split_byte_reader);
411   DwarfCUToModule::WarningReporter reporter(split_file, cu_offset);
412   DwarfCUToModule root_handler(
413       &file_context, &line_to_module, &ranges_handler, &reporter, handle_inline,
414       reader->GetLowPC(), reader->GetAddrBase(), reader->HasSourceLineInfo(),
415       reader->GetSourceLineOffset());
416   google_breakpad::DIEDispatcher die_dispatcher(&root_handler);
417   google_breakpad::CompilationUnit split_reader(
418       split_file, file_context.section_map(), cu_offset, &split_byte_reader,
419       &die_dispatcher);
420   split_reader.SetSplitDwarf(reader->GetAddrBase(), reader->GetDWOID());
421   split_reader.Start();
422   // Normally, it won't happen unless we have transitive reference.
423   if (split_reader.ShouldProcessSplitDwarf()) {
424     StartProcessSplitDwarf(&split_reader, module, endianness,
425                            handle_inter_cu_refs, handle_inline);
426   }
427 }
428 
429 template<typename ElfClass>
LoadDwarf(const string & dwarf_filename,const typename ElfClass::Ehdr * elf_header,const bool big_endian,bool handle_inter_cu_refs,bool handle_inline,Module * module)430 bool LoadDwarf(const string& dwarf_filename,
431                const typename ElfClass::Ehdr* elf_header,
432                const bool big_endian,
433                bool handle_inter_cu_refs,
434                bool handle_inline,
435                Module* module) {
436   typedef typename ElfClass::Shdr Shdr;
437 
438   const google_breakpad::Endianness endianness = big_endian ?
439       google_breakpad::ENDIANNESS_BIG : google_breakpad::ENDIANNESS_LITTLE;
440   google_breakpad::ByteReader byte_reader(endianness);
441 
442   // Construct a context for this file.
443   DwarfCUToModule::FileContext file_context(dwarf_filename,
444                                             module,
445                                             handle_inter_cu_refs);
446 
447   // Build a map of the ELF file's sections.
448   const Shdr* sections =
449       GetOffset<ElfClass, Shdr>(elf_header, elf_header->e_shoff);
450   int num_sections = elf_header->e_shnum;
451   const Shdr* section_names = sections + elf_header->e_shstrndx;
452   for (int i = 0; i < num_sections; i++) {
453     const Shdr* section = &sections[i];
454     string name = GetOffset<ElfClass, char>(elf_header,
455                                             section_names->sh_offset) +
456                   section->sh_name;
457     const uint8_t* contents = GetOffset<ElfClass, uint8_t>(elf_header,
458                                                            section->sh_offset);
459     uint64_t size = section->sh_size;
460 
461     if (!IsCompressedHeader<ElfClass>(section)) {
462       file_context.AddSectionToSectionMap(name, contents, size);
463       continue;
464     }
465 
466     typename ElfClass::Chdr chdr;
467 
468     uint32_t compression_header_size =
469       GetCompressionHeader<ElfClass>(chdr, contents, size);
470 
471     if (compression_header_size == 0 || chdr.ch_size == 0) {
472       continue;
473     }
474 
475     contents += compression_header_size;
476     size -= compression_header_size;
477 
478     std::pair<uint8_t *, uint64_t> uncompressed =
479       UncompressSectionContents(chdr.ch_type, contents, size, chdr.ch_size);
480 
481     if (uncompressed.first != nullptr && uncompressed.second != 0) {
482       file_context.AddManagedSectionToSectionMap(name, uncompressed.first, uncompressed.second);
483     }
484   }
485 
486   // .debug_ranges and .debug_rnglists reader
487   DumperRangesHandler ranges_handler(&byte_reader);
488 
489   // Parse all the compilation units in the .debug_info section.
490   DumperLineToModule line_to_module(&byte_reader);
491   google_breakpad::SectionMap::const_iterator debug_info_entry =
492       file_context.section_map().find(".debug_info");
493   assert(debug_info_entry != file_context.section_map().end());
494   const std::pair<const uint8_t*, uint64_t>& debug_info_section =
495       debug_info_entry->second;
496   // This should never have been called if the file doesn't have a
497   // .debug_info section.
498   assert(debug_info_section.first);
499   uint64_t debug_info_length = debug_info_section.second;
500   for (uint64_t offset = 0; offset < debug_info_length;) {
501     // Make a handler for the root DIE that populates MODULE with the
502     // data that was found.
503     DwarfCUToModule::WarningReporter reporter(dwarf_filename, offset);
504     DwarfCUToModule root_handler(&file_context, &line_to_module,
505                                  &ranges_handler, &reporter, handle_inline);
506     // Make a Dwarf2Handler that drives the DIEHandler.
507     google_breakpad::DIEDispatcher die_dispatcher(&root_handler);
508     // Make a DWARF parser for the compilation unit at OFFSET.
509     google_breakpad::CompilationUnit reader(dwarf_filename,
510                                          file_context.section_map(),
511                                          offset,
512                                          &byte_reader,
513                                          &die_dispatcher);
514     // Process the entire compilation unit; get the offset of the next.
515     offset += reader.Start();
516     // Start to process split dwarf file.
517     if (reader.ShouldProcessSplitDwarf()) {
518       StartProcessSplitDwarf(&reader, module, endianness, handle_inter_cu_refs,
519                              handle_inline);
520     }
521   }
522   return true;
523 }
524 
525 // Fill REGISTER_NAMES with the register names appropriate to the
526 // machine architecture given in HEADER, indexed by the register
527 // numbers used in DWARF call frame information. Return true on
528 // success, or false if HEADER's machine architecture is not
529 // supported.
530 template<typename ElfClass>
DwarfCFIRegisterNames(const typename ElfClass::Ehdr * elf_header,std::vector<string> * register_names)531 bool DwarfCFIRegisterNames(const typename ElfClass::Ehdr* elf_header,
532                            std::vector<string>* register_names) {
533   switch (elf_header->e_machine) {
534     case EM_386:
535       *register_names = DwarfCFIToModule::RegisterNames::I386();
536       return true;
537     case EM_ARM:
538       *register_names = DwarfCFIToModule::RegisterNames::ARM();
539       return true;
540     case EM_AARCH64:
541       *register_names = DwarfCFIToModule::RegisterNames::ARM64();
542       return true;
543     case EM_MIPS:
544       *register_names = DwarfCFIToModule::RegisterNames::MIPS();
545       return true;
546     case EM_X86_64:
547       *register_names = DwarfCFIToModule::RegisterNames::X86_64();
548       return true;
549     case EM_RISCV:
550       *register_names = DwarfCFIToModule::RegisterNames::RISCV();
551       return true;
552     default:
553       return false;
554   }
555 }
556 
557 template<typename ElfClass>
LoadDwarfCFI(const string & dwarf_filename,const typename ElfClass::Ehdr * elf_header,const char * section_name,const typename ElfClass::Shdr * section,const bool eh_frame,const typename ElfClass::Shdr * got_section,const typename ElfClass::Shdr * text_section,const bool big_endian,Module * module)558 bool LoadDwarfCFI(const string& dwarf_filename,
559                   const typename ElfClass::Ehdr* elf_header,
560                   const char* section_name,
561                   const typename ElfClass::Shdr* section,
562                   const bool eh_frame,
563                   const typename ElfClass::Shdr* got_section,
564                   const typename ElfClass::Shdr* text_section,
565                   const bool big_endian,
566                   Module* module) {
567   // Find the appropriate set of register names for this file's
568   // architecture.
569   std::vector<string> register_names;
570   if (!DwarfCFIRegisterNames<ElfClass>(elf_header, &register_names)) {
571     fprintf(stderr, "%s: unrecognized ELF machine architecture '%d';"
572             " cannot convert DWARF call frame information\n",
573             dwarf_filename.c_str(), elf_header->e_machine);
574     return false;
575   }
576 
577   const google_breakpad::Endianness endianness = big_endian ?
578       google_breakpad::ENDIANNESS_BIG : google_breakpad::ENDIANNESS_LITTLE;
579 
580   // Find the call frame information and its size.
581   const uint8_t* cfi =
582       GetOffset<ElfClass, uint8_t>(elf_header, section->sh_offset);
583   size_t cfi_size = section->sh_size;
584 
585   // Plug together the parser, handler, and their entourages.
586   DwarfCFIToModule::Reporter module_reporter(dwarf_filename, section_name);
587   DwarfCFIToModule handler(module, register_names, &module_reporter);
588   google_breakpad::ByteReader byte_reader(endianness);
589 
590   byte_reader.SetAddressSize(ElfClass::kAddrSize);
591 
592   // Provide the base addresses for .eh_frame encoded pointers, if
593   // possible.
594   byte_reader.SetCFIDataBase(section->sh_addr, cfi);
595   if (got_section)
596     byte_reader.SetDataBase(got_section->sh_addr);
597   if (text_section)
598     byte_reader.SetTextBase(text_section->sh_addr);
599 
600   google_breakpad::CallFrameInfo::Reporter dwarf_reporter(dwarf_filename,
601                                                        section_name);
602   if (!IsCompressedHeader<ElfClass>(section)) {
603     google_breakpad::CallFrameInfo parser(cfi, cfi_size,
604                                           &byte_reader, &handler,
605                                           &dwarf_reporter, eh_frame);
606     parser.Start();
607     return true;
608   }
609 
610   typename ElfClass::Chdr chdr;
611   uint32_t compression_header_size =
612     GetCompressionHeader<ElfClass>(chdr, cfi, cfi_size);
613 
614   if (compression_header_size == 0 || chdr.ch_size == 0) {
615     fprintf(stderr, "%s: decompression failed at header\n",
616             dwarf_filename.c_str());
617     return false;
618   }
619   if (compression_header_size > cfi_size) {
620     fprintf(stderr, "%s: decompression error, compression_header too large\n",
621             dwarf_filename.c_str());
622     return false;
623   }
624 
625   cfi += compression_header_size;
626   cfi_size -= compression_header_size;
627 
628   std::pair<uint8_t *, uint64_t> uncompressed =
629     UncompressSectionContents(chdr.ch_type, cfi, cfi_size, chdr.ch_size);
630 
631   if (uncompressed.first == nullptr || uncompressed.second == 0) {
632     fprintf(stderr, "%s: decompression failed\n", dwarf_filename.c_str());
633     return false;
634   }
635   google_breakpad::CallFrameInfo parser(uncompressed.first, uncompressed.second,
636                                         &byte_reader, &handler, &dwarf_reporter,
637                                         eh_frame);
638   parser.Start();
639   return true;
640 }
641 
LoadELF(const string & obj_file,MmapWrapper * map_wrapper,void ** elf_header)642 bool LoadELF(const string& obj_file, MmapWrapper* map_wrapper,
643              void** elf_header) {
644   int obj_fd = open(obj_file.c_str(), O_RDONLY);
645   if (obj_fd < 0) {
646     fprintf(stderr, "Failed to open ELF file '%s': %s\n",
647             obj_file.c_str(), strerror(errno));
648     return false;
649   }
650   FDWrapper obj_fd_wrapper(obj_fd);
651   struct stat st;
652   if (fstat(obj_fd, &st) != 0 && st.st_size <= 0) {
653     fprintf(stderr, "Unable to fstat ELF file '%s': %s\n",
654             obj_file.c_str(), strerror(errno));
655     return false;
656   }
657   void* obj_base = mmap(NULL, st.st_size,
658                         PROT_READ | PROT_WRITE, MAP_PRIVATE, obj_fd, 0);
659   if (obj_base == MAP_FAILED) {
660     fprintf(stderr, "Failed to mmap ELF file '%s': %s\n",
661             obj_file.c_str(), strerror(errno));
662     return false;
663   }
664   map_wrapper->set(obj_base, st.st_size);
665   *elf_header = obj_base;
666   if (!IsValidElf(*elf_header)) {
667     fprintf(stderr, "Not a valid ELF file: %s\n", obj_file.c_str());
668     return false;
669   }
670   return true;
671 }
672 
673 // Get the endianness of ELF_HEADER. If it's invalid, return false.
674 template<typename ElfClass>
ElfEndianness(const typename ElfClass::Ehdr * elf_header,bool * big_endian)675 bool ElfEndianness(const typename ElfClass::Ehdr* elf_header,
676                    bool* big_endian) {
677   if (elf_header->e_ident[EI_DATA] == ELFDATA2LSB) {
678     *big_endian = false;
679     return true;
680   }
681   if (elf_header->e_ident[EI_DATA] == ELFDATA2MSB) {
682     *big_endian = true;
683     return true;
684   }
685 
686   fprintf(stderr, "bad data encoding in ELF header: %d\n",
687           elf_header->e_ident[EI_DATA]);
688   return false;
689 }
690 
691 // Given |left_abspath|, find the absolute path for |right_path| and see if the
692 // two absolute paths are the same.
IsSameFile(const char * left_abspath,const string & right_path)693 bool IsSameFile(const char* left_abspath, const string& right_path) {
694   char right_abspath[PATH_MAX];
695   if (!realpath(right_path.c_str(), right_abspath))
696     return false;
697   return strcmp(left_abspath, right_abspath) == 0;
698 }
699 
700 // Read the .gnu_debuglink and get the debug file name. If anything goes
701 // wrong, return an empty string.
ReadDebugLink(const uint8_t * debuglink,const size_t debuglink_size,const bool big_endian,const string & obj_file,const std::vector<string> & debug_dirs)702 string ReadDebugLink(const uint8_t* debuglink,
703                      const size_t debuglink_size,
704                      const bool big_endian,
705                      const string& obj_file,
706                      const std::vector<string>& debug_dirs) {
707   // Include '\0' + CRC32 (4 bytes).
708   size_t debuglink_len = strlen(reinterpret_cast<const char*>(debuglink)) + 5;
709   debuglink_len = 4 * ((debuglink_len + 3) / 4);  // Round up to 4 bytes.
710 
711   // Sanity check.
712   if (debuglink_len != debuglink_size) {
713     fprintf(stderr, "Mismatched .gnu_debuglink string / section size: "
714             "%zx %zx\n", debuglink_len, debuglink_size);
715     return string();
716   }
717 
718   char obj_file_abspath[PATH_MAX];
719   if (!realpath(obj_file.c_str(), obj_file_abspath)) {
720     fprintf(stderr, "Cannot resolve absolute path for %s\n", obj_file.c_str());
721     return string();
722   }
723 
724   std::vector<string> searched_paths;
725   string debuglink_path;
726   std::vector<string>::const_iterator it;
727   for (it = debug_dirs.begin(); it < debug_dirs.end(); ++it) {
728     const string& debug_dir = *it;
729     debuglink_path = debug_dir + "/" +
730                      reinterpret_cast<const char*>(debuglink);
731 
732     // There is the annoying case of /path/to/foo.so having foo.so as the
733     // debug link file name. Thus this may end up opening /path/to/foo.so again,
734     // and there is a small chance of the two files having the same CRC.
735     if (IsSameFile(obj_file_abspath, debuglink_path))
736       continue;
737 
738     searched_paths.push_back(debug_dir);
739     int debuglink_fd = open(debuglink_path.c_str(), O_RDONLY);
740     if (debuglink_fd < 0)
741       continue;
742 
743     FDWrapper debuglink_fd_wrapper(debuglink_fd);
744 
745     // The CRC is the last 4 bytes in |debuglink|.
746     const google_breakpad::Endianness endianness = big_endian ?
747         google_breakpad::ENDIANNESS_BIG : google_breakpad::ENDIANNESS_LITTLE;
748     google_breakpad::ByteReader byte_reader(endianness);
749     uint32_t expected_crc =
750         byte_reader.ReadFourBytes(&debuglink[debuglink_size - 4]);
751 
752     uint32_t actual_crc = 0;
753     while (true) {
754       const size_t kReadSize = 4096;
755       char buf[kReadSize];
756       ssize_t bytes_read = HANDLE_EINTR(read(debuglink_fd, &buf, kReadSize));
757       if (bytes_read < 0) {
758         fprintf(stderr, "Error reading debug ELF file %s.\n",
759                 debuglink_path.c_str());
760         return string();
761       }
762       if (bytes_read == 0)
763         break;
764       actual_crc = google_breakpad::UpdateCrc32(actual_crc, buf, bytes_read);
765     }
766     if (actual_crc != expected_crc) {
767       fprintf(stderr, "Error reading debug ELF file - CRC32 mismatch: %s\n",
768               debuglink_path.c_str());
769       continue;
770     }
771 
772     // Found debug file.
773     return debuglink_path;
774   }
775 
776   // Not found case.
777   fprintf(stderr, "Failed to find debug ELF file for '%s' after trying:\n",
778           obj_file.c_str());
779   for (it = searched_paths.begin(); it < searched_paths.end(); ++it) {
780     const string& debug_dir = *it;
781     fprintf(stderr, "  %s/%s\n", debug_dir.c_str(), debuglink);
782   }
783   return string();
784 }
785 
786 //
787 // LoadSymbolsInfo
788 //
789 // Holds the state between the two calls to LoadSymbols() in case it's necessary
790 // to follow the .gnu_debuglink section and load debug information from a
791 // different file.
792 //
793 template<typename ElfClass>
794 class LoadSymbolsInfo {
795  public:
796   typedef typename ElfClass::Addr Addr;
797 
LoadSymbolsInfo(const std::vector<string> & dbg_dirs)798   explicit LoadSymbolsInfo(const std::vector<string>& dbg_dirs) :
799     debug_dirs_(dbg_dirs),
800     has_loading_addr_(false) {}
801 
802   // Keeps track of which sections have been loaded so sections don't
803   // accidentally get loaded twice from two different files.
LoadedSection(const string & section)804   void LoadedSection(const string& section) {
805     if (loaded_sections_.count(section) == 0) {
806       loaded_sections_.insert(section);
807     } else {
808       fprintf(stderr, "Section %s has already been loaded.\n",
809               section.c_str());
810     }
811   }
812 
813   // The ELF file and linked debug file are expected to have the same preferred
814   // loading address.
set_loading_addr(Addr addr,const string & filename)815   void set_loading_addr(Addr addr, const string& filename) {
816     if (!has_loading_addr_) {
817       loading_addr_ = addr;
818       loaded_file_ = filename;
819       return;
820     }
821 
822     if (addr != loading_addr_) {
823       fprintf(stderr,
824               "ELF file '%s' and debug ELF file '%s' "
825               "have different load addresses.\n",
826               loaded_file_.c_str(), filename.c_str());
827       assert(false);
828     }
829   }
830 
831   // Setters and getters
debug_dirs() const832   const std::vector<string>& debug_dirs() const {
833     return debug_dirs_;
834   }
835 
debuglink_file() const836   string debuglink_file() const {
837     return debuglink_file_;
838   }
set_debuglink_file(string file)839   void set_debuglink_file(string file) {
840     debuglink_file_ = file;
841   }
842 
843  private:
844   const std::vector<string>& debug_dirs_; // Directories in which to
845                                           // search for the debug ELF file.
846 
847   string debuglink_file_;  // Full path to the debug ELF file.
848 
849   bool has_loading_addr_;  // Indicate if LOADING_ADDR_ is valid.
850 
851   Addr loading_addr_;  // Saves the preferred loading address from the
852                        // first call to LoadSymbols().
853 
854   string loaded_file_;  // Name of the file loaded from the first call to
855                         // LoadSymbols().
856 
857   std::set<string> loaded_sections_;  // Tracks the Loaded ELF sections
858                                       // between calls to LoadSymbols().
859 };
860 
861 template<typename ElfClass>
LoadSymbols(const string & obj_file,const bool big_endian,const typename ElfClass::Ehdr * elf_header,const bool read_gnu_debug_link,LoadSymbolsInfo<ElfClass> * info,const DumpOptions & options,Module * module)862 bool LoadSymbols(const string& obj_file,
863                  const bool big_endian,
864                  const typename ElfClass::Ehdr* elf_header,
865                  const bool read_gnu_debug_link,
866                  LoadSymbolsInfo<ElfClass>* info,
867                  const DumpOptions& options,
868                  Module* module) {
869   typedef typename ElfClass::Addr Addr;
870   typedef typename ElfClass::Phdr Phdr;
871   typedef typename ElfClass::Shdr Shdr;
872 
873   Addr loading_addr = GetLoadingAddress<ElfClass>(
874       GetOffset<ElfClass, Phdr>(elf_header, elf_header->e_phoff),
875       elf_header->e_phnum);
876   module->SetLoadAddress(loading_addr);
877   info->set_loading_addr(loading_addr, obj_file);
878 
879   // Allow filtering of extraneous debug information in partitioned libraries.
880   // Such libraries contain debug information for all libraries extracted from
881   // the same combined library, implying extensive duplication.
882   vector<Module::Range> address_ranges = GetPtLoadSegmentRanges<ElfClass>(
883       GetOffset<ElfClass, Phdr>(elf_header, elf_header->e_phoff),
884       elf_header->e_phnum);
885   module->SetAddressRanges(address_ranges);
886 
887   const Shdr* sections =
888       GetOffset<ElfClass, Shdr>(elf_header, elf_header->e_shoff);
889   const Shdr* section_names = sections + elf_header->e_shstrndx;
890   const char* names =
891       GetOffset<ElfClass, char>(elf_header, section_names->sh_offset);
892   const char* names_end = names + section_names->sh_size;
893   bool found_debug_info_section = false;
894   bool found_usable_info = false;
895 
896   if ((options.symbol_data & SYMBOLS_AND_FILES) ||
897       (options.symbol_data & INLINES)) {
898 #ifndef NO_STABS_SUPPORT
899     // Look for STABS debugging information, and load it if present.
900     const Shdr* stab_section =
901       FindElfSectionByName<ElfClass>(".stab", SHT_PROGBITS,
902                                      sections, names, names_end,
903                                      elf_header->e_shnum);
904     if (stab_section) {
905       const Shdr* stabstr_section = stab_section->sh_link + sections;
906       if (stabstr_section) {
907         found_debug_info_section = true;
908         found_usable_info = true;
909         info->LoadedSection(".stab");
910         if (!LoadStabs<ElfClass>(elf_header, stab_section, stabstr_section,
911                                  big_endian, module)) {
912           fprintf(stderr, "%s: \".stab\" section found, but failed to load"
913                   " STABS debugging information\n", obj_file.c_str());
914         }
915       }
916     }
917 #endif  // NO_STABS_SUPPORT
918 
919     // See if there are export symbols available.
920     const Shdr* symtab_section =
921         FindElfSectionByName<ElfClass>(".symtab", SHT_SYMTAB,
922                                        sections, names, names_end,
923                                        elf_header->e_shnum);
924     const Shdr* strtab_section =
925         FindElfSectionByName<ElfClass>(".strtab", SHT_STRTAB,
926                                        sections, names, names_end,
927                                        elf_header->e_shnum);
928     if (symtab_section && strtab_section) {
929       info->LoadedSection(".symtab");
930 
931       const uint8_t* symtab =
932           GetOffset<ElfClass, uint8_t>(elf_header,
933                                        symtab_section->sh_offset);
934       const uint8_t* strtab =
935           GetOffset<ElfClass, uint8_t>(elf_header,
936                                        strtab_section->sh_offset);
937       bool result =
938           ELFSymbolsToModule(symtab,
939                              symtab_section->sh_size,
940                              strtab,
941                              strtab_section->sh_size,
942                              big_endian,
943                              ElfClass::kAddrSize,
944                              module);
945       found_usable_info = found_usable_info || result;
946     } else {
947       // Look in dynsym only if full symbol table was not available.
948       const Shdr* dynsym_section =
949           FindElfSectionByName<ElfClass>(".dynsym", SHT_DYNSYM,
950                                          sections, names, names_end,
951                                          elf_header->e_shnum);
952       const Shdr* dynstr_section =
953           FindElfSectionByName<ElfClass>(".dynstr", SHT_STRTAB,
954                                          sections, names, names_end,
955                                          elf_header->e_shnum);
956       if (dynsym_section && dynstr_section) {
957         info->LoadedSection(".dynsym");
958 
959         const uint8_t* dynsyms =
960             GetOffset<ElfClass, uint8_t>(elf_header,
961                                          dynsym_section->sh_offset);
962         const uint8_t* dynstrs =
963             GetOffset<ElfClass, uint8_t>(elf_header,
964                                          dynstr_section->sh_offset);
965         bool result =
966             ELFSymbolsToModule(dynsyms,
967                                dynsym_section->sh_size,
968                                dynstrs,
969                                dynstr_section->sh_size,
970                                big_endian,
971                                ElfClass::kAddrSize,
972                                module);
973         found_usable_info = found_usable_info || result;
974       }
975     }
976 
977     // Only Load .debug_info after loading symbol table to avoid duplicate
978     // PUBLIC records.
979     // Look for DWARF debugging information, and load it if present.
980     const Shdr* dwarf_section =
981       FindElfSectionByName<ElfClass>(".debug_info", SHT_PROGBITS,
982                                      sections, names, names_end,
983                                      elf_header->e_shnum);
984 
985     // .debug_info section type is SHT_PROGBITS for mips on pnacl toolchains,
986     // but MIPS_DWARF for regular gnu toolchains, so both need to be checked
987     if (elf_header->e_machine == EM_MIPS && !dwarf_section) {
988       dwarf_section =
989         FindElfSectionByName<ElfClass>(".debug_info", SHT_MIPS_DWARF,
990                                        sections, names, names_end,
991                                        elf_header->e_shnum);
992     }
993 
994     if (dwarf_section) {
995       found_debug_info_section = true;
996       found_usable_info = true;
997       info->LoadedSection(".debug_info");
998       if (!LoadDwarf<ElfClass>(obj_file, elf_header, big_endian,
999                                options.handle_inter_cu_refs,
1000                                options.symbol_data & INLINES, module)) {
1001         fprintf(stderr, "%s: \".debug_info\" section found, but failed to load "
1002                 "DWARF debugging information\n", obj_file.c_str());
1003       }
1004     }
1005   }
1006 
1007   if (options.symbol_data & CFI) {
1008     // Dwarf Call Frame Information (CFI) is actually independent from
1009     // the other DWARF debugging information, and can be used alone.
1010     const Shdr* dwarf_cfi_section =
1011         FindElfSectionByName<ElfClass>(".debug_frame", SHT_PROGBITS,
1012                                        sections, names, names_end,
1013                                        elf_header->e_shnum);
1014 
1015     // .debug_frame section type is SHT_PROGBITS for mips on pnacl toolchains,
1016     // but MIPS_DWARF for regular gnu toolchains, so both need to be checked
1017     if (elf_header->e_machine == EM_MIPS && !dwarf_cfi_section) {
1018       dwarf_cfi_section =
1019           FindElfSectionByName<ElfClass>(".debug_frame", SHT_MIPS_DWARF,
1020                                         sections, names, names_end,
1021                                         elf_header->e_shnum);
1022     }
1023 
1024     if (dwarf_cfi_section) {
1025       // Ignore the return value of this function; even without call frame
1026       // information, the other debugging information could be perfectly
1027       // useful.
1028       info->LoadedSection(".debug_frame");
1029       bool result =
1030           LoadDwarfCFI<ElfClass>(obj_file, elf_header, ".debug_frame",
1031                                  dwarf_cfi_section, false, 0, 0, big_endian,
1032                                  module);
1033       found_usable_info = found_usable_info || result;
1034     }
1035 
1036     // Linux C++ exception handling information can also provide
1037     // unwinding data.
1038     const Shdr* eh_frame_section =
1039         FindElfSectionByName<ElfClass>(".eh_frame", SHT_PROGBITS,
1040                                        sections, names, names_end,
1041                                        elf_header->e_shnum);
1042     if (eh_frame_section) {
1043       // Pointers in .eh_frame data may be relative to the base addresses of
1044       // certain sections. Provide those sections if present.
1045       const Shdr* got_section =
1046           FindElfSectionByName<ElfClass>(".got", SHT_PROGBITS,
1047                                          sections, names, names_end,
1048                                          elf_header->e_shnum);
1049       const Shdr* text_section =
1050           FindElfSectionByName<ElfClass>(".text", SHT_PROGBITS,
1051                                          sections, names, names_end,
1052                                          elf_header->e_shnum);
1053       info->LoadedSection(".eh_frame");
1054       // As above, ignore the return value of this function.
1055       bool result =
1056           LoadDwarfCFI<ElfClass>(obj_file, elf_header, ".eh_frame",
1057                                  eh_frame_section, true,
1058                                  got_section, text_section, big_endian, module);
1059       found_usable_info = found_usable_info || result;
1060     }
1061   }
1062 
1063   if (!found_debug_info_section) {
1064     fprintf(stderr, "%s: file contains no debugging information"
1065             " (no \".stab\" or \".debug_info\" sections)\n",
1066             obj_file.c_str());
1067 
1068     // Failed, but maybe there's a .gnu_debuglink section?
1069     if (read_gnu_debug_link) {
1070       const Shdr* gnu_debuglink_section
1071           = FindElfSectionByName<ElfClass>(".gnu_debuglink", SHT_PROGBITS,
1072                                            sections, names,
1073                                            names_end, elf_header->e_shnum);
1074       if (gnu_debuglink_section) {
1075         if (!info->debug_dirs().empty()) {
1076           const uint8_t* debuglink_contents =
1077               GetOffset<ElfClass, uint8_t>(elf_header,
1078                                            gnu_debuglink_section->sh_offset);
1079           string debuglink_file =
1080               ReadDebugLink(debuglink_contents,
1081                             gnu_debuglink_section->sh_size,
1082                             big_endian,
1083                             obj_file,
1084                             info->debug_dirs());
1085           info->set_debuglink_file(debuglink_file);
1086         } else {
1087           fprintf(stderr, ".gnu_debuglink section found in '%s', "
1088                   "but no debug path specified.\n", obj_file.c_str());
1089         }
1090       } else {
1091         fprintf(stderr, "%s does not contain a .gnu_debuglink section.\n",
1092                 obj_file.c_str());
1093       }
1094     } else {
1095       // Return true if some usable information was found, since the caller
1096       // doesn't want to use .gnu_debuglink.
1097       return found_usable_info;
1098     }
1099 
1100     // No debug info was found, let the user try again with .gnu_debuglink
1101     // if present.
1102     return false;
1103   }
1104 
1105   return true;
1106 }
1107 
1108 // Return the breakpad symbol file identifier for the architecture of
1109 // ELF_HEADER.
1110 template<typename ElfClass>
ElfArchitecture(const typename ElfClass::Ehdr * elf_header)1111 const char* ElfArchitecture(const typename ElfClass::Ehdr* elf_header) {
1112   typedef typename ElfClass::Half Half;
1113   Half arch = elf_header->e_machine;
1114   switch (arch) {
1115     case EM_386:        return "x86";
1116     case EM_ARM:        return "arm";
1117     case EM_AARCH64:    return "arm64";
1118     case EM_MIPS:       return "mips";
1119     case EM_PPC64:      return "ppc64";
1120     case EM_PPC:        return "ppc";
1121     case EM_S390:       return "s390";
1122     case EM_SPARC:      return "sparc";
1123     case EM_SPARCV9:    return "sparcv9";
1124     case EM_X86_64:     return "x86_64";
1125     case EM_RISCV:      return "riscv";
1126     default: return NULL;
1127   }
1128 }
1129 
1130 template<typename ElfClass>
SanitizeDebugFile(const typename ElfClass::Ehdr * debug_elf_header,const string & debuglink_file,const string & obj_filename,const char * obj_file_architecture,const bool obj_file_is_big_endian)1131 bool SanitizeDebugFile(const typename ElfClass::Ehdr* debug_elf_header,
1132                        const string& debuglink_file,
1133                        const string& obj_filename,
1134                        const char* obj_file_architecture,
1135                        const bool obj_file_is_big_endian) {
1136   const char* debug_architecture =
1137       ElfArchitecture<ElfClass>(debug_elf_header);
1138   if (!debug_architecture) {
1139     fprintf(stderr, "%s: unrecognized ELF machine architecture: %d\n",
1140             debuglink_file.c_str(), debug_elf_header->e_machine);
1141     return false;
1142   }
1143   if (strcmp(obj_file_architecture, debug_architecture)) {
1144     fprintf(stderr, "%s with ELF machine architecture %s does not match "
1145             "%s with ELF architecture %s\n",
1146             debuglink_file.c_str(), debug_architecture,
1147             obj_filename.c_str(), obj_file_architecture);
1148     return false;
1149   }
1150   bool debug_big_endian;
1151   if (!ElfEndianness<ElfClass>(debug_elf_header, &debug_big_endian))
1152     return false;
1153   if (debug_big_endian != obj_file_is_big_endian) {
1154     fprintf(stderr, "%s and %s does not match in endianness\n",
1155             obj_filename.c_str(), debuglink_file.c_str());
1156     return false;
1157   }
1158   return true;
1159 }
1160 
1161 template<typename ElfClass>
InitModuleForElfClass(const typename ElfClass::Ehdr * elf_header,const string & obj_filename,const string & obj_os,scoped_ptr<Module> & module,bool enable_multiple_field)1162 bool InitModuleForElfClass(const typename ElfClass::Ehdr* elf_header,
1163                            const string& obj_filename,
1164                            const string& obj_os,
1165                            scoped_ptr<Module>& module,
1166                            bool enable_multiple_field) {
1167   PageAllocator allocator;
1168   wasteful_vector<uint8_t> identifier(&allocator, kDefaultBuildIdSize);
1169   if (!FileID::ElfFileIdentifierFromMappedFile(elf_header, identifier)) {
1170     fprintf(stderr, "%s: unable to generate file identifier\n",
1171             obj_filename.c_str());
1172     return false;
1173   }
1174 
1175   const char* architecture = ElfArchitecture<ElfClass>(elf_header);
1176   if (!architecture) {
1177     fprintf(stderr, "%s: unrecognized ELF machine architecture: %d\n",
1178             obj_filename.c_str(), elf_header->e_machine);
1179     return false;
1180   }
1181 
1182   char name_buf[NAME_MAX] = {};
1183   std::string name = google_breakpad::ElfFileSoNameFromMappedFile(
1184                          elf_header, name_buf, sizeof(name_buf))
1185                          ? name_buf
1186                          : google_breakpad::BaseName(obj_filename);
1187 
1188   // Add an extra "0" at the end.  PDB files on Windows have an 'age'
1189   // number appended to the end of the file identifier; this isn't
1190   // really used or necessary on other platforms, but be consistent.
1191   string id = FileID::ConvertIdentifierToUUIDString(identifier) + "0";
1192   // This is just the raw Build ID in hex.
1193   string code_id = FileID::ConvertIdentifierToString(identifier);
1194 
1195   module.reset(new Module(name, obj_os, architecture, id, code_id,
1196                           enable_multiple_field));
1197 
1198   return true;
1199 }
1200 
1201 template<typename ElfClass>
ReadSymbolDataElfClass(const typename ElfClass::Ehdr * elf_header,const string & obj_filename,const string & obj_os,const std::vector<string> & debug_dirs,const DumpOptions & options,Module ** out_module)1202 bool ReadSymbolDataElfClass(const typename ElfClass::Ehdr* elf_header,
1203                             const string& obj_filename,
1204                             const string& obj_os,
1205                             const std::vector<string>& debug_dirs,
1206                             const DumpOptions& options,
1207                             Module** out_module) {
1208   typedef typename ElfClass::Ehdr Ehdr;
1209 
1210   *out_module = NULL;
1211 
1212   scoped_ptr<Module> module;
1213   if (!InitModuleForElfClass<ElfClass>(elf_header, obj_filename, obj_os, module,
1214                                        options.enable_multiple_field)) {
1215     return false;
1216   }
1217 
1218   // Figure out what endianness this file is.
1219   bool big_endian;
1220   if (!ElfEndianness<ElfClass>(elf_header, &big_endian))
1221     return false;
1222 
1223   LoadSymbolsInfo<ElfClass> info(debug_dirs);
1224   if (!LoadSymbols<ElfClass>(obj_filename, big_endian, elf_header,
1225                              !debug_dirs.empty(), &info,
1226                              options, module.get())) {
1227     const string debuglink_file = info.debuglink_file();
1228     if (debuglink_file.empty())
1229       return false;
1230 
1231     // Load debuglink ELF file.
1232     fprintf(stderr, "Found debugging info in %s\n", debuglink_file.c_str());
1233     MmapWrapper debug_map_wrapper;
1234     Ehdr* debug_elf_header = NULL;
1235     if (!LoadELF(debuglink_file, &debug_map_wrapper,
1236                  reinterpret_cast<void**>(&debug_elf_header)) ||
1237         !SanitizeDebugFile<ElfClass>(debug_elf_header, debuglink_file,
1238                                      obj_filename,
1239                                      module->architecture().c_str(),
1240                                      big_endian)) {
1241       return false;
1242     }
1243 
1244     if (!LoadSymbols<ElfClass>(debuglink_file, big_endian,
1245                                debug_elf_header, false, &info,
1246                                options, module.get())) {
1247       return false;
1248     }
1249   }
1250 
1251   *out_module = module.release();
1252   return true;
1253 }
1254 
1255 }  // namespace
1256 
1257 namespace google_breakpad {
1258 
1259 // Not explicitly exported, but not static so it can be used in unit tests.
ReadSymbolDataInternal(const uint8_t * obj_file,const string & obj_filename,const string & obj_os,const std::vector<string> & debug_dirs,const DumpOptions & options,Module ** module)1260 bool ReadSymbolDataInternal(const uint8_t* obj_file,
1261                             const string& obj_filename,
1262                             const string& obj_os,
1263                             const std::vector<string>& debug_dirs,
1264                             const DumpOptions& options,
1265                             Module** module) {
1266   if (!IsValidElf(obj_file)) {
1267     fprintf(stderr, "Not a valid ELF file: %s\n", obj_filename.c_str());
1268     return false;
1269   }
1270 
1271   int elfclass = ElfClass(obj_file);
1272   if (elfclass == ELFCLASS32) {
1273     return ReadSymbolDataElfClass<ElfClass32>(
1274         reinterpret_cast<const Elf32_Ehdr*>(obj_file), obj_filename, obj_os,
1275         debug_dirs, options, module);
1276   }
1277   if (elfclass == ELFCLASS64) {
1278     return ReadSymbolDataElfClass<ElfClass64>(
1279         reinterpret_cast<const Elf64_Ehdr*>(obj_file), obj_filename, obj_os,
1280         debug_dirs, options, module);
1281   }
1282 
1283   return false;
1284 }
1285 
WriteSymbolFile(const string & load_path,const string & obj_file,const string & obj_os,const std::vector<string> & debug_dirs,const DumpOptions & options,std::ostream & sym_stream)1286 bool WriteSymbolFile(const string& load_path,
1287                      const string& obj_file,
1288                      const string& obj_os,
1289                      const std::vector<string>& debug_dirs,
1290                      const DumpOptions& options,
1291                      std::ostream& sym_stream) {
1292   Module* module;
1293   if (!ReadSymbolData(load_path, obj_file, obj_os, debug_dirs, options,
1294                       &module))
1295     return false;
1296 
1297   bool result = module->Write(sym_stream, options.symbol_data);
1298   delete module;
1299   return result;
1300 }
1301 
1302 // Read the selected object file's debugging information, and write out the
1303 // header only to |stream|. Return true on success; if an error occurs, report
1304 // it and return false.
WriteSymbolFileHeader(const string & load_path,const string & obj_file,const string & obj_os,std::ostream & sym_stream)1305 bool WriteSymbolFileHeader(const string& load_path,
1306                            const string& obj_file,
1307                            const string& obj_os,
1308                            std::ostream& sym_stream) {
1309   MmapWrapper map_wrapper;
1310   void* elf_header = NULL;
1311   if (!LoadELF(load_path, &map_wrapper, &elf_header)) {
1312     fprintf(stderr, "Could not load ELF file: %s\n", obj_file.c_str());
1313     return false;
1314   }
1315 
1316   if (!IsValidElf(elf_header)) {
1317     fprintf(stderr, "Not a valid ELF file: %s\n", obj_file.c_str());
1318     return false;
1319   }
1320 
1321   int elfclass = ElfClass(elf_header);
1322   scoped_ptr<Module> module;
1323   if (elfclass == ELFCLASS32) {
1324     if (!InitModuleForElfClass<ElfClass32>(
1325         reinterpret_cast<const Elf32_Ehdr*>(elf_header), obj_file, obj_os,
1326         module, /*enable_multiple_field=*/false)) {
1327       fprintf(stderr, "Failed to load ELF module: %s\n", obj_file.c_str());
1328       return false;
1329     }
1330   } else if (elfclass == ELFCLASS64) {
1331     if (!InitModuleForElfClass<ElfClass64>(
1332         reinterpret_cast<const Elf64_Ehdr*>(elf_header), obj_file, obj_os,
1333         module, /*enable_multiple_field=*/false)) {
1334       fprintf(stderr, "Failed to load ELF module: %s\n", obj_file.c_str());
1335       return false;
1336     }
1337   } else {
1338     fprintf(stderr, "Unsupported module file: %s\n", obj_file.c_str());
1339     return false;
1340   }
1341 
1342   return module->Write(sym_stream, ALL_SYMBOL_DATA);
1343 }
1344 
ReadSymbolData(const string & load_path,const string & obj_file,const string & obj_os,const std::vector<string> & debug_dirs,const DumpOptions & options,Module ** module)1345 bool ReadSymbolData(const string& load_path,
1346                     const string& obj_file,
1347                     const string& obj_os,
1348                     const std::vector<string>& debug_dirs,
1349                     const DumpOptions& options,
1350                     Module** module) {
1351   MmapWrapper map_wrapper;
1352   void* elf_header = NULL;
1353   if (!LoadELF(load_path, &map_wrapper, &elf_header))
1354     return false;
1355 
1356   return ReadSymbolDataInternal(reinterpret_cast<uint8_t*>(elf_header),
1357                                 obj_file, obj_os, debug_dirs, options, module);
1358 }
1359 
1360 }  // namespace google_breakpad
1361