xref: /aosp_15_r20/external/google-breakpad/src/common/dwarf/elf_reader.cc (revision 9712c20fc9bbfbac4935993a2ca0b3958c5adad2)
1 // Copyright 2005 Google LLC
2 // Author: [email protected] (Andrew Chatham)
3 // Author: [email protected] (Satoru Takabayashi)
4 //
5 // Code for reading in ELF files.
6 //
7 // For information on the ELF format, see
8 // http://www.x86.org/ftp/manuals/tools/elf.pdf
9 //
10 // I also liked:
11 // http://www.caldera.com/developers/gabi/1998-04-29/contents.html
12 //
13 // A note about types: When dealing with the file format, we use types
14 // like Elf32_Word, but in the public interfaces we treat all
15 // addresses as uint64. As a result, we should be able to symbolize
16 // 64-bit binaries from a 32-bit process (which we don't do,
17 // anyway). size_t should therefore be avoided, except where required
18 // by things like mmap().
19 //
20 // Although most of this code can deal with arbitrary ELF files of
21 // either word size, the public ElfReader interface only examines
22 // files loaded into the current address space, which must all match
23 // the machine's native word size. This code cannot handle ELF files
24 // with a non-native byte ordering.
25 //
26 // TODO(chatham): It would be nice if we could accomplish this task
27 // without using malloc(), so we could use it as the process is dying.
28 
29 #ifndef _GNU_SOURCE
30 #define _GNU_SOURCE  // needed for pread()
31 #endif
32 
33 #ifdef HAVE_CONFIG_H
34 #include <config.h>  // Must come first
35 #endif
36 
37 #include <fcntl.h>
38 #include <limits.h>
39 #include <string.h>
40 #include <sys/mman.h>
41 #include <sys/stat.h>
42 #include <sys/types.h>
43 #include <unistd.h>
44 
45 #include <algorithm>
46 #include <map>
47 #include <string>
48 #include <string_view>
49 #include <vector>
50 // TODO(saugustine): Add support for compressed debug.
51 // Also need to add configure tests for zlib.
52 //#include "zlib.h"
53 
54 #include "third_party/musl/include/elf.h"
55 #include "elf_reader.h"
56 #include "common/using_std_string.h"
57 
58 // EM_AARCH64 is not defined by elf.h of GRTE v3 on x86.
59 // TODO(dougkwan): Remove this when v17 is retired.
60 #if !defined(EM_AARCH64)
61 #define EM_AARCH64      183             /* ARM AARCH64 */
62 #endif
63 
64 // Map Linux macros to their Apple equivalents.
65 #if __APPLE__
66 #ifndef __LITTLE_ENDIAN
67 #define __LITTLE_ENDIAN __ORDER_LITTLE_ENDIAN__
68 #endif  // __LITTLE_ENDIAN
69 #ifndef __BIG_ENDIAN
70 #define __BIG_ENDIAN __ORDER_BIG_ENDIAN__
71 #endif  // __BIG_ENDIAN
72 #ifndef __BYTE_ORDER
73 #define __BYTE_ORDER __BYTE_ORDER__
74 #endif  // __BYTE_ORDER
75 #endif  // __APPLE__
76 
77 // TODO(dthomson): Can be removed once all Java code is using the Google3
78 // launcher. We need to avoid processing PLT functions as it causes memory
79 // fragmentation in malloc, which is fixed in tcmalloc - and if the Google3
80 // launcher is used the JVM will then use tcmalloc. b/13735638
81 //DEFINE_bool(elfreader_process_dynsyms, true,
82 //            "Activate PLT function processing");
83 
84 using std::vector;
85 
86 namespace {
87 
88 // The lowest bit of an ARM symbol value is used to indicate a Thumb address.
89 const int kARMThumbBitOffset = 0;
90 
91 // Converts an ARM Thumb symbol value to a true aligned address value.
92 template <typename T>
AdjustARMThumbSymbolValue(const T & symbol_table_value)93 T AdjustARMThumbSymbolValue(const T& symbol_table_value) {
94   return symbol_table_value & ~(1 << kARMThumbBitOffset);
95 }
96 
97 // Names of PLT-related sections.
98 const char kElfPLTRelSectionName[] = ".rel.plt";      // Use Rel struct.
99 const char kElfPLTRelaSectionName[] = ".rela.plt";    // Use Rela struct.
100 const char kElfPLTSectionName[] = ".plt";
101 const char kElfDynSymSectionName[] = ".dynsym";
102 
103 const int kX86PLTCodeSize = 0x10;  // Size of one x86 PLT function in bytes.
104 const int kARMPLTCodeSize = 0xc;
105 const int kAARCH64PLTCodeSize = 0x10;
106 
107 const int kX86PLT0Size = 0x10;  // Size of the special PLT0 entry.
108 const int kARMPLT0Size = 0x14;
109 const int kAARCH64PLT0Size = 0x20;
110 
111 // Suffix for PLT functions when it needs to be explicitly identified as such.
112 const char kPLTFunctionSuffix[] = "@plt";
113 
114 // Replace callsites of this function to std::string_view::starts_with after
115 // adopting C++20.
StringViewStartsWith(std::string_view sv,std::string_view prefix)116 bool StringViewStartsWith(std::string_view sv, std::string_view prefix) {
117   return sv.compare(0, prefix.size(), prefix) == 0;
118 }
119 
120 }  // namespace
121 
122 namespace google_breakpad {
123 
124 template <class ElfArch> class ElfReaderImpl;
125 
126 // 32-bit and 64-bit ELF files are processed exactly the same, except
127 // for various field sizes. Elf32 and Elf64 encompass all of the
128 // differences between the two formats, and all format-specific code
129 // in this file is templated on one of them.
130 class Elf32 {
131  public:
132   typedef Elf32_Ehdr Ehdr;
133   typedef Elf32_Shdr Shdr;
134   typedef Elf32_Phdr Phdr;
135   typedef Elf32_Word Word;
136   typedef Elf32_Sym Sym;
137   typedef Elf32_Rel Rel;
138   typedef Elf32_Rela Rela;
139 
140   // What should be in the EI_CLASS header.
141   static const int kElfClass = ELFCLASS32;
142 
143   // Given a symbol pointer, return the binding type (eg STB_WEAK).
Bind(const Elf32_Sym * sym)144   static char Bind(const Elf32_Sym* sym) {
145     return ELF32_ST_BIND(sym->st_info);
146   }
147   // Given a symbol pointer, return the symbol type (eg STT_FUNC).
Type(const Elf32_Sym * sym)148   static char Type(const Elf32_Sym* sym) {
149     return ELF32_ST_TYPE(sym->st_info);
150   }
151 
152   // Extract the symbol index from the r_info field of a relocation.
r_sym(const Elf32_Word r_info)153   static int r_sym(const Elf32_Word r_info) {
154     return ELF32_R_SYM(r_info);
155   }
156 };
157 
158 
159 class Elf64 {
160  public:
161   typedef Elf64_Ehdr Ehdr;
162   typedef Elf64_Shdr Shdr;
163   typedef Elf64_Phdr Phdr;
164   typedef Elf64_Word Word;
165   typedef Elf64_Sym Sym;
166   typedef Elf64_Rel Rel;
167   typedef Elf64_Rela Rela;
168 
169   // What should be in the EI_CLASS header.
170   static const int kElfClass = ELFCLASS64;
171 
Bind(const Elf64_Sym * sym)172   static char Bind(const Elf64_Sym* sym) {
173     return ELF64_ST_BIND(sym->st_info);
174   }
Type(const Elf64_Sym * sym)175   static char Type(const Elf64_Sym* sym) {
176     return ELF64_ST_TYPE(sym->st_info);
177   }
r_sym(const Elf64_Xword r_info)178   static int r_sym(const Elf64_Xword r_info) {
179     return ELF64_R_SYM(r_info);
180   }
181 };
182 
183 
184 // ElfSectionReader mmaps a section of an ELF file ("section" is ELF
185 // terminology). The ElfReaderImpl object providing the section header
186 // must exist for the lifetime of this object.
187 //
188 // The motivation for mmaping individual sections of the file is that
189 // many Google executables are large enough when unstripped that we
190 // have to worry about running out of virtual address space.
191 //
192 // For compressed sections we have no choice but to allocate memory.
193 template<class ElfArch>
194 class ElfSectionReader {
195  public:
ElfSectionReader(const char * cname,const string & path,int fd,const typename ElfArch::Shdr & section_header)196   ElfSectionReader(const char* cname, const string& path, int fd,
197                    const typename ElfArch::Shdr& section_header)
198       : contents_aligned_(NULL),
199         contents_(NULL),
200         header_(section_header) {
201     // Back up to the beginning of the page we're interested in.
202     const size_t additional = header_.sh_offset % getpagesize();
203     const size_t offset_aligned = header_.sh_offset - additional;
204     section_size_ = header_.sh_size;
205     size_aligned_ = section_size_ + additional;
206     // If the section has been stripped or is empty, do not attempt
207     // to process its contents.
208     if (header_.sh_type == SHT_NOBITS || header_.sh_size == 0)
209       return;
210     // extra sh_type check for string table.
211     std::string_view name{cname};
212     if ((name == ".strtab" || name == ".shstrtab") &&
213         header_.sh_type != SHT_STRTAB) {
214       fprintf(stderr,
215               "Invalid sh_type for string table section: expected "
216               "SHT_STRTAB or SHT_DYNSYM, but got %d\n",
217               header_.sh_type);
218       return;
219     }
220 
221     contents_aligned_ = mmap(NULL, size_aligned_, PROT_READ, MAP_SHARED,
222                              fd, offset_aligned);
223     // Set where the offset really should begin.
224     contents_ = reinterpret_cast<char*>(contents_aligned_) +
225                 (header_.sh_offset - offset_aligned);
226 
227     // Check for and handle any compressed contents.
228     //if (StringViewStartsWith(name, ".zdebug_"))
229     //  DecompressZlibContents();
230     // TODO(saugustine): Add support for proposed elf-section flag
231     // "SHF_COMPRESS".
232   }
233 
~ElfSectionReader()234   ~ElfSectionReader() {
235     if (contents_aligned_ != NULL)
236       munmap(contents_aligned_, size_aligned_);
237     else
238       delete[] contents_;
239   }
240 
241   // Return the section header for this section.
header() const242   typename ElfArch::Shdr const& header() const { return header_; }
243 
244   // Return memory at the given offset within this section.
GetOffset(typename ElfArch::Word bytes) const245   const char* GetOffset(typename ElfArch::Word bytes) const {
246     return contents_ + bytes;
247   }
248 
contents() const249   const char* contents() const { return contents_; }
section_size() const250   size_t section_size() const { return section_size_; }
251 
252  private:
253   // page-aligned file contents
254   void* contents_aligned_;
255   // contents as usable by the client. For non-compressed sections,
256   // pointer within contents_aligned_ to where the section data
257   // begins; for compressed sections, pointer to the decompressed
258   // data.
259   char* contents_;
260   // size of contents_aligned_
261   size_t size_aligned_;
262   // size of contents.
263   size_t section_size_;
264   const typename ElfArch::Shdr header_;
265 };
266 
267 // An iterator over symbols in a given section. It handles walking
268 // through the entries in the specified section and mapping symbol
269 // entries to their names in the appropriate string table (in
270 // another section).
271 template<class ElfArch>
272 class SymbolIterator {
273  public:
SymbolIterator(ElfReaderImpl<ElfArch> * reader,typename ElfArch::Word section_type)274   SymbolIterator(ElfReaderImpl<ElfArch>* reader,
275                  typename ElfArch::Word section_type)
276       : symbol_section_(reader->GetSectionByType(section_type)),
277         string_section_(NULL),
278         num_symbols_in_section_(0),
279         symbol_within_section_(0) {
280 
281     // If this section type doesn't exist, leave
282     // num_symbols_in_section_ as zero, so this iterator is already
283     // done().
284     if (symbol_section_ != NULL) {
285       num_symbols_in_section_ = symbol_section_->header().sh_size /
286                                 symbol_section_->header().sh_entsize;
287 
288       // Symbol sections have sh_link set to the section number of
289       // the string section containing the symbol names.
290       string_section_ = reader->GetSection(symbol_section_->header().sh_link);
291     }
292   }
293 
294   // Return true iff we have passed all symbols in this section.
done() const295   bool done() const {
296     return symbol_within_section_ >= num_symbols_in_section_;
297   }
298 
299   // Advance to the next symbol in this section.
300   // REQUIRES: !done()
Next()301   void Next() { ++symbol_within_section_; }
302 
303   // Return a pointer to the current symbol.
304   // REQUIRES: !done()
GetSymbol() const305   const typename ElfArch::Sym* GetSymbol() const {
306     return reinterpret_cast<const typename ElfArch::Sym*>(
307         symbol_section_->GetOffset(symbol_within_section_ *
308                                    symbol_section_->header().sh_entsize));
309   }
310 
311   // Return the name of the current symbol, NULL if it has none.
312   // REQUIRES: !done()
GetSymbolName() const313   const char* GetSymbolName() const {
314     int name_offset = GetSymbol()->st_name;
315     if (name_offset == 0)
316       return NULL;
317     return string_section_->GetOffset(name_offset);
318   }
319 
GetCurrentSymbolIndex() const320   int GetCurrentSymbolIndex() const {
321     return symbol_within_section_;
322   }
323 
324  private:
325   const ElfSectionReader<ElfArch>* const symbol_section_;
326   const ElfSectionReader<ElfArch>* string_section_;
327   int num_symbols_in_section_;
328   int symbol_within_section_;
329 };
330 
331 
332 // Copied from strings/strutil.h.  Per chatham,
333 // this library should not depend on strings.
334 
MyHasSuffixString(const string & str,const string & suffix)335 static inline bool MyHasSuffixString(const string& str, const string& suffix) {
336   int len = str.length();
337   int suflen = suffix.length();
338   return (suflen <= len) && (str.compare(len-suflen, suflen, suffix) == 0);
339 }
340 
341 
342 // ElfReader loads an ELF binary and can provide information about its
343 // contents. It is most useful for matching addresses to function
344 // names. It does not understand debugging formats (eg dwarf2), so it
345 // can't print line numbers. It takes a path to an elf file and a
346 // readable file descriptor for that file, which it does not assume
347 // ownership of.
348 template<class ElfArch>
349 class ElfReaderImpl {
350  public:
ElfReaderImpl(const string & path,int fd)351   explicit ElfReaderImpl(const string& path, int fd)
352       : path_(path),
353         fd_(fd),
354         section_headers_(NULL),
355         program_headers_(NULL),
356         opd_section_(NULL),
357         base_for_text_(0),
358         plts_supported_(false),
359         plt_code_size_(0),
360         plt0_size_(0),
361         visited_relocation_entries_(false) {
362     string error;
363     is_dwp_ = MyHasSuffixString(path, ".dwp");
364     ParseHeaders(fd, path);
365     // Currently we need some extra information for PowerPC64 binaries
366     // including a way to read the .opd section for function descriptors and a
367     // way to find the linked base for function symbols.
368     if (header_.e_machine == EM_PPC64) {
369       // "opd_section_" must always be checked for NULL before use.
370       opd_section_ = GetSectionInfoByName(".opd", &opd_info_);
371       for (unsigned int k = 0u; k < GetNumSections(); ++k) {
372         std::string_view name{GetSectionName(section_headers_[k].sh_name)};
373         if (StringViewStartsWith(name, ".text")) {
374           base_for_text_ =
375               section_headers_[k].sh_addr - section_headers_[k].sh_offset;
376           break;
377         }
378       }
379     }
380     // Turn on PLTs.
381     if (header_.e_machine == EM_386 || header_.e_machine == EM_X86_64) {
382       plt_code_size_ = kX86PLTCodeSize;
383       plt0_size_ = kX86PLT0Size;
384       plts_supported_ = true;
385     } else if (header_.e_machine == EM_ARM) {
386       plt_code_size_ = kARMPLTCodeSize;
387       plt0_size_ = kARMPLT0Size;
388       plts_supported_ = true;
389     } else if (header_.e_machine == EM_AARCH64) {
390       plt_code_size_ = kAARCH64PLTCodeSize;
391       plt0_size_ = kAARCH64PLT0Size;
392       plts_supported_ = true;
393     }
394   }
395 
~ElfReaderImpl()396   ~ElfReaderImpl() {
397     for (unsigned int i = 0u; i < sections_.size(); ++i)
398       delete sections_[i];
399     delete [] section_headers_;
400     delete [] program_headers_;
401   }
402 
403   // Examine the headers of the file and return whether the file looks
404   // like an ELF file for this architecture. Takes an already-open
405   // file descriptor for the candidate file, reading in the prologue
406   // to see if the ELF file appears to match the current
407   // architecture. If error is non-NULL, it will be set with a reason
408   // in case of failure.
IsArchElfFile(int fd,string * error)409   static bool IsArchElfFile(int fd, string* error) {
410     unsigned char header[EI_NIDENT];
411     if (pread(fd, header, sizeof(header), 0) != sizeof(header)) {
412       if (error != NULL) *error = "Could not read header";
413       return false;
414     }
415 
416     if (memcmp(header, ELFMAG, SELFMAG) != 0) {
417       if (error != NULL) *error = "Missing ELF magic";
418       return false;
419     }
420 
421     if (header[EI_CLASS] != ElfArch::kElfClass) {
422       if (error != NULL) *error = "Different word size";
423       return false;
424     }
425 
426     int endian = 0;
427     if (header[EI_DATA] == ELFDATA2LSB)
428       endian = __LITTLE_ENDIAN;
429     else if (header[EI_DATA] == ELFDATA2MSB)
430       endian = __BIG_ENDIAN;
431     if (endian != __BYTE_ORDER) {
432       if (error != NULL) *error = "Different byte order";
433       return false;
434     }
435 
436     return true;
437   }
438 
439   // Return true if we can use this symbol in Address-to-Symbol map.
CanUseSymbol(const char * name,const typename ElfArch::Sym * sym)440   bool CanUseSymbol(const char* name, const typename ElfArch::Sym* sym) {
441     // For now we only save FUNC and NOTYPE symbols. For now we just
442     // care about functions, but some functions written in assembler
443     // don't have a proper ELF type attached to them, so we store
444     // NOTYPE symbols as well. The remaining significant type is
445     // OBJECT (eg global variables), which represent about 25% of
446     // the symbols in a typical google3 binary.
447     if (ElfArch::Type(sym) != STT_FUNC &&
448         ElfArch::Type(sym) != STT_NOTYPE) {
449       return false;
450     }
451 
452     // Target specific filtering.
453     switch (header_.e_machine) {
454     case EM_AARCH64:
455     case EM_ARM:
456       // Filter out '$x' special local symbols used by tools
457       return name[0] != '$' || ElfArch::Bind(sym) != STB_LOCAL;
458     case EM_X86_64:
459       // Filter out read-only constants like .LC123.
460       return name[0] != '.' || ElfArch::Bind(sym) != STB_LOCAL;
461     default:
462       return true;
463     }
464   }
465 
466   // Iterate over the symbols in a section, either SHT_DYNSYM or
467   // SHT_SYMTAB. Add all symbols to the given SymbolMap.
468   /*
469   void GetSymbolPositions(SymbolMap* symbols,
470                           typename ElfArch::Word section_type,
471                           uint64_t mem_offset,
472                           uint64_t file_offset) {
473     // This map is used to filter out "nested" functions.
474     // See comment below.
475     AddrToSymMap addr_to_sym_map;
476     for (SymbolIterator<ElfArch> it(this, section_type);
477          !it.done(); it.Next()) {
478       const char* name = it.GetSymbolName();
479       if (name == NULL)
480         continue;
481       const typename ElfArch::Sym* sym = it.GetSymbol();
482       if (CanUseSymbol(name, sym)) {
483         const int sec = sym->st_shndx;
484 
485         // We don't support special section indices. The most common
486         // is SHN_ABS, for absolute symbols used deep in the bowels of
487         // glibc. Also ignore any undefined symbols.
488         if (sec == SHN_UNDEF ||
489             (sec >= SHN_LORESERVE && sec <= SHN_HIRESERVE)) {
490           continue;
491         }
492 
493         const typename ElfArch::Shdr& hdr = section_headers_[sec];
494 
495         // Adjust for difference between where we expected to mmap
496         // this section, and where it was actually mmapped.
497         const int64_t expected_base = hdr.sh_addr - hdr.sh_offset;
498         const int64_t real_base = mem_offset - file_offset;
499         const int64_t adjust = real_base - expected_base;
500 
501         uint64_t start = sym->st_value + adjust;
502 
503         // Adjust function symbols for PowerPC64 by dereferencing and adjusting
504         // the function descriptor to get the function address.
505         if (header_.e_machine == EM_PPC64 && ElfArch::Type(sym) == STT_FUNC) {
506           const uint64_t opd_addr =
507               AdjustPPC64FunctionDescriptorSymbolValue(sym->st_value);
508           // Only adjust the returned value if the function address was found.
509           if (opd_addr != sym->st_value) {
510             const int64_t adjust_function_symbols =
511                 real_base - base_for_text_;
512             start = opd_addr + adjust_function_symbols;
513           }
514         }
515 
516         addr_to_sym_map.push_back(std::make_pair(start, sym));
517       }
518     }
519     std::sort(addr_to_sym_map.begin(), addr_to_sym_map.end(), &AddrToSymSorter);
520     addr_to_sym_map.erase(std::unique(addr_to_sym_map.begin(),
521                                       addr_to_sym_map.end(), &AddrToSymEquals),
522                           addr_to_sym_map.end());
523 
524     // Squeeze out any "nested functions".
525     // Nested functions are not allowed in C, but libc plays tricks.
526     //
527     // For example, here is disassembly of /lib64/tls/libc-2.3.5.so:
528     //   0x00000000000aa380 <read+0>:             cmpl   $0x0,0x2781b9(%rip)
529     //   0x00000000000aa387 <read+7>:             jne    0xaa39b <read+27>
530     //   0x00000000000aa389 <__read_nocancel+0>:  mov    $0x0,%rax
531     //   0x00000000000aa390 <__read_nocancel+7>:  syscall
532     //   0x00000000000aa392 <__read_nocancel+9>:  cmp $0xfffffffffffff001,%rax
533     //   0x00000000000aa398 <__read_nocancel+15>: jae    0xaa3ef <read+111>
534     //   0x00000000000aa39a <__read_nocancel+17>: retq
535     //   0x00000000000aa39b <read+27>:            sub    $0x28,%rsp
536     //   0x00000000000aa39f <read+31>:            mov    %rdi,0x8(%rsp)
537     //   ...
538     // Without removing __read_nocancel, symbolizer will return NULL
539     // given e.g. 0xaa39f (because the lower bound is __read_nocancel,
540     // but 0xaa39f is beyond its end.
541     if (addr_to_sym_map.empty()) {
542       return;
543     }
544     const ElfSectionReader<ElfArch>* const symbol_section =
545         this->GetSectionByType(section_type);
546     const ElfSectionReader<ElfArch>* const string_section =
547         this->GetSection(symbol_section->header().sh_link);
548 
549     typename AddrToSymMap::iterator curr = addr_to_sym_map.begin();
550     // Always insert the first symbol.
551     symbols->AddSymbol(string_section->GetOffset(curr->second->st_name),
552                        curr->first, curr->second->st_size);
553     typename AddrToSymMap::iterator prev = curr++;
554     for (; curr != addr_to_sym_map.end(); ++curr) {
555       const uint64_t prev_addr = prev->first;
556       const uint64_t curr_addr = curr->first;
557       const typename ElfArch::Sym* const prev_sym = prev->second;
558       const typename ElfArch::Sym* const curr_sym = curr->second;
559       if (prev_addr + prev_sym->st_size <= curr_addr ||
560           // The next condition is true if two symbols overlap like this:
561           //
562           //   Previous symbol  |----------------------------|
563           //   Current symbol     |-------------------------------|
564           //
565           // These symbols are not found in google3 codebase, but in
566           // jdk1.6.0_01_gg1/jre/lib/i386/server/libjvm.so.
567           //
568           // 0619e040 00000046 t CardTableModRefBS::write_region_work()
569           // 0619e070 00000046 t CardTableModRefBS::write_ref_array_work()
570           //
571           // We allow overlapped symbols rather than ignore these.
572           // Due to the way SymbolMap::GetSymbolAtPosition() works,
573           // lookup for any address in [curr_addr, curr_addr + its size)
574           // (e.g. 0619e071) will produce the current symbol,
575           // which is the desired outcome.
576           prev_addr + prev_sym->st_size < curr_addr + curr_sym->st_size) {
577         const char* name = string_section->GetOffset(curr_sym->st_name);
578         symbols->AddSymbol(name, curr_addr, curr_sym->st_size);
579         prev = curr;
580       } else {
581         // Current symbol is "nested" inside previous one like this:
582         //
583         //   Previous symbol  |----------------------------|
584         //   Current symbol     |---------------------|
585         //
586         // This happens within glibc, e.g. __read_nocancel is nested
587         // "inside" __read. Ignore "inner" symbol.
588         //DCHECK_LE(curr_addr + curr_sym->st_size,
589         //          prev_addr + prev_sym->st_size);
590         ;
591       }
592     }
593   }
594 */
595 
VisitSymbols(typename ElfArch::Word section_type,ElfReader::SymbolSink * sink)596   void VisitSymbols(typename ElfArch::Word section_type,
597                     ElfReader::SymbolSink* sink) {
598     VisitSymbols(section_type, sink, -1, -1, false);
599   }
600 
VisitSymbols(typename ElfArch::Word section_type,ElfReader::SymbolSink * sink,int symbol_binding,int symbol_type,bool get_raw_symbol_values)601   void VisitSymbols(typename ElfArch::Word section_type,
602                     ElfReader::SymbolSink* sink,
603                     int symbol_binding,
604                     int symbol_type,
605                     bool get_raw_symbol_values) {
606     for (SymbolIterator<ElfArch> it(this, section_type);
607          !it.done(); it.Next()) {
608       const char* name = it.GetSymbolName();
609       if (!name) continue;
610       const typename ElfArch::Sym* sym = it.GetSymbol();
611       if ((symbol_binding < 0 || ElfArch::Bind(sym) == symbol_binding) &&
612           (symbol_type < 0 || ElfArch::Type(sym) == symbol_type)) {
613         typename ElfArch::Sym symbol = *sym;
614         // Add a PLT symbol in addition to the main undefined symbol.
615         // Only do this for SHT_DYNSYM, because PLT symbols are dynamic.
616         int symbol_index = it.GetCurrentSymbolIndex();
617         // TODO(dthomson): Can be removed once all Java code is using the
618         // Google3 launcher.
619         if (section_type == SHT_DYNSYM &&
620             static_cast<unsigned int>(symbol_index) < symbols_plt_offsets_.size() &&
621             symbols_plt_offsets_[symbol_index] != 0) {
622           string plt_name = string(name) + kPLTFunctionSuffix;
623           if (plt_function_names_[symbol_index].empty()) {
624             plt_function_names_[symbol_index] = plt_name;
625           } else if (plt_function_names_[symbol_index] != plt_name) {
626 		;
627           }
628           sink->AddSymbol(plt_function_names_[symbol_index].c_str(),
629                           symbols_plt_offsets_[it.GetCurrentSymbolIndex()],
630                           plt_code_size_);
631         }
632         if (!get_raw_symbol_values)
633           AdjustSymbolValue(&symbol);
634         sink->AddSymbol(name, symbol.st_value, symbol.st_size);
635       }
636     }
637   }
638 
VisitRelocationEntries()639   void VisitRelocationEntries() {
640     if (visited_relocation_entries_) {
641       return;
642     }
643     visited_relocation_entries_ = true;
644 
645     if (!plts_supported_) {
646       return;
647     }
648     // First determine if PLTs exist. If not, then there is nothing to do.
649     ElfReader::SectionInfo plt_section_info;
650     const char* plt_section =
651         GetSectionInfoByName(kElfPLTSectionName, &plt_section_info);
652     if (!plt_section) {
653       return;
654     }
655     if (plt_section_info.size == 0) {
656       return;
657     }
658 
659     // The PLTs could be referenced by either a Rel or Rela (Rel with Addend)
660     // section.
661     ElfReader::SectionInfo rel_section_info;
662     ElfReader::SectionInfo rela_section_info;
663     const char* rel_section =
664         GetSectionInfoByName(kElfPLTRelSectionName, &rel_section_info);
665     const char* rela_section =
666         GetSectionInfoByName(kElfPLTRelaSectionName, &rela_section_info);
667 
668     const typename ElfArch::Rel* rel =
669         reinterpret_cast<const typename ElfArch::Rel*>(rel_section);
670     const typename ElfArch::Rela* rela =
671         reinterpret_cast<const typename ElfArch::Rela*>(rela_section);
672 
673     if (!rel_section && !rela_section) {
674       return;
675     }
676 
677     // Use either Rel or Rela section, depending on which one exists.
678     size_t section_size = rel_section ? rel_section_info.size
679                                       : rela_section_info.size;
680     size_t entry_size = rel_section ? sizeof(typename ElfArch::Rel)
681                                     : sizeof(typename ElfArch::Rela);
682 
683     // Determine the number of entries in the dynamic symbol table.
684     ElfReader::SectionInfo dynsym_section_info;
685     const char* dynsym_section =
686         GetSectionInfoByName(kElfDynSymSectionName, &dynsym_section_info);
687     // The dynsym section might not exist, or it might be empty. In either case
688     // there is nothing to be done so return.
689     if (!dynsym_section || dynsym_section_info.size == 0) {
690       return;
691     }
692     size_t num_dynamic_symbols =
693         dynsym_section_info.size / dynsym_section_info.entsize;
694     symbols_plt_offsets_.resize(num_dynamic_symbols, 0);
695 
696     // TODO(dthomson): Can be removed once all Java code is using the
697     // Google3 launcher.
698     // Make storage room for PLT function name strings.
699     plt_function_names_.resize(num_dynamic_symbols);
700 
701     for (size_t i = 0; i < section_size / entry_size; ++i) {
702       // Determine symbol index from the |r_info| field.
703       int sym_index = ElfArch::r_sym(rel_section ? rel[i].r_info
704                                                  : rela[i].r_info);
705       if (static_cast<unsigned int>(sym_index) >= symbols_plt_offsets_.size()) {
706         continue;
707       }
708       symbols_plt_offsets_[sym_index] =
709           plt_section_info.addr + plt0_size_ + i * plt_code_size_;
710     }
711   }
712 
713   // Return an ElfSectionReader for the first section of the given
714   // type by iterating through all section headers. Returns NULL if
715   // the section type is not found.
GetSectionByType(typename ElfArch::Word section_type)716   const ElfSectionReader<ElfArch>* GetSectionByType(
717       typename ElfArch::Word section_type) {
718     for (unsigned int k = 0u; k < GetNumSections(); ++k) {
719       if (section_headers_[k].sh_type == section_type) {
720         return GetSection(k);
721       }
722     }
723     return NULL;
724   }
725 
726   // Return the name of section "shndx".  Returns NULL if the section
727   // is not found.
GetSectionNameByIndex(int shndx)728   const char* GetSectionNameByIndex(int shndx) {
729     return GetSectionName(section_headers_[shndx].sh_name);
730   }
731 
732   // Return a pointer to section "shndx", and store the size in
733   // "size".  Returns NULL if the section is not found.
GetSectionContentsByIndex(int shndx,size_t * size)734   const char* GetSectionContentsByIndex(int shndx, size_t* size) {
735     const ElfSectionReader<ElfArch>* section = GetSection(shndx);
736     if (section != NULL) {
737       *size = section->section_size();
738       return section->contents();
739     }
740     return NULL;
741   }
742 
743   // Return a pointer to the first section of the given name by
744   // iterating through all section headers, and store the size in
745   // "size".  Returns NULL if the section name is not found.
GetSectionContentsByName(const string & section_name,size_t * size)746   const char* GetSectionContentsByName(const string& section_name,
747                                        size_t* size) {
748     for (unsigned int k = 0u; k < GetNumSections(); ++k) {
749       // When searching for sections in a .dwp file, the sections
750       // we're looking for will always be at the end of the section
751       // table, so reverse the direction of iteration.
752       int shndx = is_dwp_ ? GetNumSections() - k - 1 : k;
753       const char* name = GetSectionName(section_headers_[shndx].sh_name);
754       if (name != NULL && ElfReader::SectionNamesMatch(section_name, name)) {
755         const ElfSectionReader<ElfArch>* section = GetSection(shndx);
756         if (section == NULL) {
757           return NULL;
758         } else {
759           *size = section->section_size();
760           return section->contents();
761         }
762       }
763     }
764     return NULL;
765   }
766 
767   // This is like GetSectionContentsByName() but it returns a lot of extra
768   // information about the section.
GetSectionInfoByName(const string & section_name,ElfReader::SectionInfo * info)769   const char* GetSectionInfoByName(const string& section_name,
770                                    ElfReader::SectionInfo* info) {
771     for (unsigned int k = 0u; k < GetNumSections(); ++k) {
772       // When searching for sections in a .dwp file, the sections
773       // we're looking for will always be at the end of the section
774       // table, so reverse the direction of iteration.
775       int shndx = is_dwp_ ? GetNumSections() - k - 1 : k;
776       const char* name = GetSectionName(section_headers_[shndx].sh_name);
777       if (name != NULL && ElfReader::SectionNamesMatch(section_name, name)) {
778         const ElfSectionReader<ElfArch>* section = GetSection(shndx);
779         if (section == NULL) {
780           return NULL;
781         } else {
782           info->type = section->header().sh_type;
783           info->flags = section->header().sh_flags;
784           info->addr = section->header().sh_addr;
785           info->offset = section->header().sh_offset;
786           info->size = section->header().sh_size;
787           info->link = section->header().sh_link;
788           info->info = section->header().sh_info;
789           info->addralign = section->header().sh_addralign;
790           info->entsize = section->header().sh_entsize;
791           return section->contents();
792         }
793       }
794     }
795     return NULL;
796   }
797 
798   // p_vaddr of the first PT_LOAD segment (if any), or 0 if no PT_LOAD
799   // segments are present. This is the address an ELF image was linked
800   // (by static linker) to be loaded at. Usually (but not always) 0 for
801   // shared libraries and position-independent executables.
VaddrOfFirstLoadSegment() const802   uint64_t VaddrOfFirstLoadSegment() const {
803     // Relocatable objects (of type ET_REL) do not have LOAD segments.
804     if (header_.e_type == ET_REL) {
805       return 0;
806     }
807     for (int i = 0; i < GetNumProgramHeaders(); ++i) {
808       if (program_headers_[i].p_type == PT_LOAD) {
809         return program_headers_[i].p_vaddr;
810       }
811     }
812     return 0;
813   }
814 
815   // According to the LSB ("ELF special sections"), sections with debug
816   // info are prefixed by ".debug".  The names are not specified, but they
817   // look like ".debug_line", ".debug_info", etc.
HasDebugSections()818   bool HasDebugSections() {
819     // Debug sections are likely to be near the end, so reverse the
820     // direction of iteration.
821     for (int k = GetNumSections() - 1; k >= 0; --k) {
822       std::string_view name{GetSectionName(section_headers_[k].sh_name)};
823       if (StringViewStartsWith(name, ".debug") ||
824           StringViewStartsWith(name, ".zdebug")) {
825         return true;
826       }
827     }
828     return false;
829   }
830 
IsDynamicSharedObject() const831   bool IsDynamicSharedObject() const {
832     return header_.e_type == ET_DYN;
833   }
834 
835   // Return the number of sections.
GetNumSections() const836   uint64_t GetNumSections() const {
837     if (HasManySections())
838       return first_section_header_.sh_size;
839     return header_.e_shnum;
840   }
841 
842  private:
843   typedef vector<pair<uint64_t, const typename ElfArch::Sym*> > AddrToSymMap;
844 
AddrToSymSorter(const typename AddrToSymMap::value_type & lhs,const typename AddrToSymMap::value_type & rhs)845   static bool AddrToSymSorter(const typename AddrToSymMap::value_type& lhs,
846                               const typename AddrToSymMap::value_type& rhs) {
847     return lhs.first < rhs.first;
848   }
849 
AddrToSymEquals(const typename AddrToSymMap::value_type & lhs,const typename AddrToSymMap::value_type & rhs)850   static bool AddrToSymEquals(const typename AddrToSymMap::value_type& lhs,
851                               const typename AddrToSymMap::value_type& rhs) {
852     return lhs.first == rhs.first;
853   }
854 
855   // Does this ELF file have too many sections to fit in the program header?
HasManySections() const856   bool HasManySections() const {
857     return header_.e_shnum == SHN_UNDEF;
858   }
859 
860   // Return the number of program headers.
GetNumProgramHeaders() const861   int GetNumProgramHeaders() const {
862     if (HasManySections() && header_.e_phnum == 0xffff &&
863         first_section_header_.sh_info != 0)
864       return first_section_header_.sh_info;
865     return header_.e_phnum;
866   }
867 
868   // Return the index of the string table.
GetStringTableIndex() const869   int GetStringTableIndex() const {
870     if (HasManySections()) {
871       if (header_.e_shstrndx == 0xffff)
872         return first_section_header_.sh_link;
873       else if (header_.e_shstrndx >= GetNumSections())
874         return 0;
875     }
876     return header_.e_shstrndx;
877   }
878 
879   // Given an offset into the section header string table, return the
880   // section name.
GetSectionName(typename ElfArch::Word sh_name)881   const char* GetSectionName(typename ElfArch::Word sh_name) {
882     const ElfSectionReader<ElfArch>* shstrtab =
883         GetSection(GetStringTableIndex());
884     if (shstrtab != NULL) {
885       return shstrtab->GetOffset(sh_name);
886     }
887     return NULL;
888   }
889 
890   // Return an ElfSectionReader for the given section. The reader will
891   // be freed when this object is destroyed.
GetSection(int num)892   const ElfSectionReader<ElfArch>* GetSection(int num) {
893     const char* name;
894     // Hard-coding the name for the section-name string table prevents
895     // infinite recursion.
896     if (num == GetStringTableIndex())
897       name = ".shstrtab";
898     else
899       name = GetSectionNameByIndex(num);
900     ElfSectionReader<ElfArch>*& reader = sections_[num];
901     if (reader == NULL)
902       reader = new ElfSectionReader<ElfArch>(name, path_, fd_,
903                                              section_headers_[num]);
904     return reader->contents() ? reader : nullptr;
905   }
906 
907   // Parse out the overall header information from the file and assert
908   // that it looks sane. This contains information like the magic
909   // number and target architecture.
ParseHeaders(int fd,const string & path)910   bool ParseHeaders(int fd, const string& path) {
911     // Read in the global ELF header.
912     if (pread(fd, &header_, sizeof(header_), 0) != sizeof(header_)) {
913       return false;
914     }
915 
916     // Must be an executable, dynamic shared object or relocatable object
917     if (header_.e_type != ET_EXEC &&
918         header_.e_type != ET_DYN &&
919         header_.e_type != ET_REL) {
920       return false;
921     }
922     // Need a section header.
923     if (header_.e_shoff == 0) {
924       return false;
925     }
926 
927     if (header_.e_shnum == SHN_UNDEF) {
928       // The number of sections in the program header is only a 16-bit value. In
929       // the event of overflow (greater than SHN_LORESERVE sections), e_shnum
930       // will read SHN_UNDEF and the true number of section header table entries
931       // is found in the sh_size field of the first section header.
932       // See: http://www.sco.com/developers/gabi/2003-12-17/ch4.sheader.html
933       if (pread(fd, &first_section_header_, sizeof(first_section_header_),
934                 header_.e_shoff) != sizeof(first_section_header_)) {
935         return false;
936       }
937     }
938 
939     // Dynamically allocate enough space to store the section headers
940     // and read them out of the file.
941     const int section_headers_size =
942         GetNumSections() * sizeof(*section_headers_);
943     section_headers_ = new typename ElfArch::Shdr[section_headers_size];
944     if (pread(fd, section_headers_, section_headers_size, header_.e_shoff) !=
945         section_headers_size) {
946       return false;
947     }
948 
949     // Dynamically allocate enough space to store the program headers
950     // and read them out of the file.
951     //const int program_headers_size =
952     //    GetNumProgramHeaders() * sizeof(*program_headers_);
953     program_headers_ = new typename ElfArch::Phdr[GetNumProgramHeaders()];
954 
955     // Presize the sections array for efficiency.
956     sections_.resize(GetNumSections(), NULL);
957     return true;
958   }
959 
960   // Given the "value" of a function descriptor return the address of the
961   // function (i.e. the dereferenced value). Otherwise return "value".
AdjustPPC64FunctionDescriptorSymbolValue(uint64_t value)962   uint64_t AdjustPPC64FunctionDescriptorSymbolValue(uint64_t value) {
963     if (opd_section_ != NULL &&
964         opd_info_.addr <= value &&
965         value < opd_info_.addr + opd_info_.size) {
966       uint64_t offset = value - opd_info_.addr;
967       return (*reinterpret_cast<const uint64_t*>(opd_section_ + offset));
968     }
969     return value;
970   }
971 
AdjustSymbolValue(typename ElfArch::Sym * sym)972   void AdjustSymbolValue(typename ElfArch::Sym* sym) {
973     switch (header_.e_machine) {
974     case EM_ARM:
975       // For ARM architecture, if the LSB of the function symbol offset is set,
976       // it indicates a Thumb function.  This bit should not be taken literally.
977       // Clear it.
978       if (ElfArch::Type(sym) == STT_FUNC)
979         sym->st_value = AdjustARMThumbSymbolValue(sym->st_value);
980       break;
981     case EM_386:
982       // No adjustment needed for Intel x86 architecture.  However, explicitly
983       // define this case as we use it quite often.
984       break;
985     case EM_PPC64:
986       // PowerPC64 currently has function descriptors as part of the ABI.
987       // Function symbols need to be adjusted accordingly.
988       if (ElfArch::Type(sym) == STT_FUNC)
989         sym->st_value = AdjustPPC64FunctionDescriptorSymbolValue(sym->st_value);
990       break;
991     default:
992       break;
993     }
994   }
995 
996   friend class SymbolIterator<ElfArch>;
997 
998   // The file we're reading.
999   const string path_;
1000   // Open file descriptor for path_. Not owned by this object.
1001   const int fd_;
1002 
1003   // The global header of the ELF file.
1004   typename ElfArch::Ehdr header_;
1005 
1006   // The header of the first section. This may be used to supplement the ELF
1007   // file header.
1008   typename ElfArch::Shdr first_section_header_;
1009 
1010   // Array of GetNumSections() section headers, allocated when we read
1011   // in the global header.
1012   typename ElfArch::Shdr* section_headers_;
1013 
1014   // Array of GetNumProgramHeaders() program headers, allocated when we read
1015   // in the global header.
1016   typename ElfArch::Phdr* program_headers_;
1017 
1018   // An array of pointers to ElfSectionReaders. Sections are
1019   // mmaped as they're needed and not released until this object is
1020   // destroyed.
1021   vector<ElfSectionReader<ElfArch>*> sections_;
1022 
1023   // For PowerPC64 we need to keep track of function descriptors when looking up
1024   // values for funtion symbols values. Function descriptors are kept in the
1025   // .opd section and are dereferenced to find the function address.
1026   ElfReader::SectionInfo opd_info_;
1027   const char* opd_section_;  // Must be checked for NULL before use.
1028   int64_t base_for_text_;
1029 
1030   // Read PLT-related sections for the current architecture.
1031   bool plts_supported_;
1032   // Code size of each PLT function for the current architecture.
1033   size_t plt_code_size_;
1034   // Size of the special first entry in the .plt section that calls the runtime
1035   // loader resolution routine, and that all other entries jump to when doing
1036   // lazy symbol binding.
1037   size_t plt0_size_;
1038 
1039   // Maps a dynamic symbol index to a PLT offset.
1040   // The vector entry index is the dynamic symbol index.
1041   std::vector<uint64_t> symbols_plt_offsets_;
1042 
1043   // Container for PLT function name strings. These strings are passed by
1044   // reference to SymbolSink::AddSymbol() so they need to be stored somewhere.
1045   std::vector<string> plt_function_names_;
1046 
1047   bool visited_relocation_entries_;
1048 
1049   // True if this is a .dwp file.
1050   bool is_dwp_;
1051 };
1052 
ElfReader(const string & path)1053 ElfReader::ElfReader(const string& path)
1054     : path_(path), fd_(-1), impl32_(NULL), impl64_(NULL) {
1055   // linux 2.6.XX kernel can show deleted files like this:
1056   //   /var/run/nscd/dbYLJYaE (deleted)
1057   // and the kernel-supplied vdso and vsyscall mappings like this:
1058   //   [vdso]
1059   //   [vsyscall]
1060   if (MyHasSuffixString(path, " (deleted)"))
1061     return;
1062   if (path == "[vdso]")
1063     return;
1064   if (path == "[vsyscall]")
1065     return;
1066 
1067   fd_ = open(path.c_str(), O_RDONLY);
1068 }
1069 
~ElfReader()1070 ElfReader::~ElfReader() {
1071   if (fd_ != -1)
1072     close(fd_);
1073   if (impl32_ != NULL)
1074     delete impl32_;
1075   if (impl64_ != NULL)
1076     delete impl64_;
1077 }
1078 
1079 
1080 // The only word-size specific part of this file is IsNativeElfFile().
1081 #if ULONG_MAX == 0xffffffff
1082 #define NATIVE_ELF_ARCH Elf32
1083 #elif ULONG_MAX == 0xffffffffffffffff
1084 #define NATIVE_ELF_ARCH Elf64
1085 #else
1086 #error "Invalid word size"
1087 #endif
1088 
1089 template <typename ElfArch>
IsElfFile(const int fd,const string & path)1090 static bool IsElfFile(const int fd, const string& path) {
1091   if (fd < 0)
1092     return false;
1093   if (!ElfReaderImpl<ElfArch>::IsArchElfFile(fd, NULL)) {
1094     // No error message here.  IsElfFile gets called many times.
1095     return false;
1096   }
1097   return true;
1098 }
1099 
IsNativeElfFile() const1100 bool ElfReader::IsNativeElfFile() const {
1101   return IsElfFile<NATIVE_ELF_ARCH>(fd_, path_);
1102 }
1103 
IsElf32File() const1104 bool ElfReader::IsElf32File() const {
1105   return IsElfFile<Elf32>(fd_, path_);
1106 }
1107 
IsElf64File() const1108 bool ElfReader::IsElf64File() const {
1109   return IsElfFile<Elf64>(fd_, path_);
1110 }
1111 
1112 /*
1113 void ElfReader::AddSymbols(SymbolMap* symbols,
1114                            uint64_t mem_offset, uint64_t file_offset,
1115                            uint64_t length) {
1116   if (fd_ < 0)
1117     return;
1118   // TODO(chatham): Actually use the information about file offset and
1119   // the length of the mapped section. On some machines the data
1120   // section gets mapped as executable, and we'll end up reading the
1121   // file twice and getting some of the offsets wrong.
1122   if (IsElf32File()) {
1123     GetImpl32()->GetSymbolPositions(symbols, SHT_SYMTAB,
1124                                     mem_offset, file_offset);
1125     GetImpl32()->GetSymbolPositions(symbols, SHT_DYNSYM,
1126                                     mem_offset, file_offset);
1127   } else if (IsElf64File()) {
1128     GetImpl64()->GetSymbolPositions(symbols, SHT_SYMTAB,
1129                                     mem_offset, file_offset);
1130     GetImpl64()->GetSymbolPositions(symbols, SHT_DYNSYM,
1131                                     mem_offset, file_offset);
1132   }
1133 }
1134 */
1135 
VisitSymbols(ElfReader::SymbolSink * sink)1136 void ElfReader::VisitSymbols(ElfReader::SymbolSink* sink) {
1137   VisitSymbols(sink, -1, -1);
1138 }
1139 
VisitSymbols(ElfReader::SymbolSink * sink,int symbol_binding,int symbol_type)1140 void ElfReader::VisitSymbols(ElfReader::SymbolSink* sink,
1141                              int symbol_binding,
1142                              int symbol_type) {
1143   VisitSymbols(sink, symbol_binding, symbol_type, false);
1144 }
1145 
VisitSymbols(ElfReader::SymbolSink * sink,int symbol_binding,int symbol_type,bool get_raw_symbol_values)1146 void ElfReader::VisitSymbols(ElfReader::SymbolSink* sink,
1147                              int symbol_binding,
1148                              int symbol_type,
1149                              bool get_raw_symbol_values) {
1150   if (IsElf32File()) {
1151     GetImpl32()->VisitRelocationEntries();
1152     GetImpl32()->VisitSymbols(SHT_SYMTAB, sink, symbol_binding, symbol_type,
1153                               get_raw_symbol_values);
1154     GetImpl32()->VisitSymbols(SHT_DYNSYM, sink, symbol_binding, symbol_type,
1155                               get_raw_symbol_values);
1156   } else if (IsElf64File()) {
1157     GetImpl64()->VisitRelocationEntries();
1158     GetImpl64()->VisitSymbols(SHT_SYMTAB, sink, symbol_binding, symbol_type,
1159                               get_raw_symbol_values);
1160     GetImpl64()->VisitSymbols(SHT_DYNSYM, sink, symbol_binding, symbol_type,
1161                               get_raw_symbol_values);
1162   }
1163 }
1164 
VaddrOfFirstLoadSegment()1165 uint64_t ElfReader::VaddrOfFirstLoadSegment() {
1166   if (IsElf32File()) {
1167     return GetImpl32()->VaddrOfFirstLoadSegment();
1168   } else if (IsElf64File()) {
1169     return GetImpl64()->VaddrOfFirstLoadSegment();
1170   } else {
1171     return 0;
1172   }
1173 }
1174 
GetSectionName(int shndx)1175 const char* ElfReader::GetSectionName(int shndx) {
1176   if (shndx < 0 || static_cast<unsigned int>(shndx) >= GetNumSections()) return NULL;
1177   if (IsElf32File()) {
1178     return GetImpl32()->GetSectionNameByIndex(shndx);
1179   } else if (IsElf64File()) {
1180     return GetImpl64()->GetSectionNameByIndex(shndx);
1181   } else {
1182     return NULL;
1183   }
1184 }
1185 
GetNumSections()1186 uint64_t ElfReader::GetNumSections() {
1187   if (IsElf32File()) {
1188     return GetImpl32()->GetNumSections();
1189   } else if (IsElf64File()) {
1190     return GetImpl64()->GetNumSections();
1191   } else {
1192     return 0;
1193   }
1194 }
1195 
GetSectionByIndex(int shndx,size_t * size)1196 const char* ElfReader::GetSectionByIndex(int shndx, size_t* size) {
1197   if (IsElf32File()) {
1198     return GetImpl32()->GetSectionContentsByIndex(shndx, size);
1199   } else if (IsElf64File()) {
1200     return GetImpl64()->GetSectionContentsByIndex(shndx, size);
1201   } else {
1202     return NULL;
1203   }
1204 }
1205 
GetSectionByName(const string & section_name,size_t * size)1206 const char* ElfReader::GetSectionByName(const string& section_name,
1207                                         size_t* size) {
1208   if (IsElf32File()) {
1209     return GetImpl32()->GetSectionContentsByName(section_name, size);
1210   } else if (IsElf64File()) {
1211     return GetImpl64()->GetSectionContentsByName(section_name, size);
1212   } else {
1213     return NULL;
1214   }
1215 }
1216 
GetSectionInfoByName(const string & section_name,SectionInfo * info)1217 const char* ElfReader::GetSectionInfoByName(const string& section_name,
1218                                             SectionInfo* info) {
1219   if (IsElf32File()) {
1220     return GetImpl32()->GetSectionInfoByName(section_name, info);
1221   } else if (IsElf64File()) {
1222     return GetImpl64()->GetSectionInfoByName(section_name, info);
1223   } else {
1224     return NULL;
1225   }
1226 }
1227 
SectionNamesMatch(std::string_view name,std::string_view sh_name)1228 bool ElfReader::SectionNamesMatch(std::string_view name,
1229                                   std::string_view sh_name) {
1230   std::string_view debug_prefix{".debug_"};
1231   std::string_view zdebug_prefix{".zdebug_"};
1232   if (StringViewStartsWith(name, debug_prefix) &&
1233       StringViewStartsWith(sh_name, zdebug_prefix)) {
1234     name.remove_prefix(debug_prefix.length());
1235     sh_name.remove_prefix(zdebug_prefix.length());
1236     return name == sh_name;
1237   }
1238   return name == sh_name;
1239 }
1240 
IsDynamicSharedObject()1241 bool ElfReader::IsDynamicSharedObject() {
1242   if (IsElf32File()) {
1243     return GetImpl32()->IsDynamicSharedObject();
1244   } else if (IsElf64File()) {
1245     return GetImpl64()->IsDynamicSharedObject();
1246   } else {
1247     return false;
1248   }
1249 }
1250 
GetImpl32()1251 ElfReaderImpl<Elf32>* ElfReader::GetImpl32() {
1252   if (impl32_ == NULL) {
1253     impl32_ = new ElfReaderImpl<Elf32>(path_, fd_);
1254   }
1255   return impl32_;
1256 }
1257 
GetImpl64()1258 ElfReaderImpl<Elf64>* ElfReader::GetImpl64() {
1259   if (impl64_ == NULL) {
1260     impl64_ = new ElfReaderImpl<Elf64>(path_, fd_);
1261   }
1262   return impl64_;
1263 }
1264 
1265 // Return true if file is an ELF binary of ElfArch, with unstripped
1266 // debug info (debug_only=true) or symbol table (debug_only=false).
1267 // Otherwise, return false.
1268 template <typename ElfArch>
IsNonStrippedELFBinaryImpl(const string & path,const int fd,bool debug_only)1269 static bool IsNonStrippedELFBinaryImpl(const string& path, const int fd,
1270                                        bool debug_only) {
1271   if (!ElfReaderImpl<ElfArch>::IsArchElfFile(fd, NULL)) return false;
1272   ElfReaderImpl<ElfArch> elf_reader(path, fd);
1273   return debug_only ?
1274       elf_reader.HasDebugSections()
1275       : (elf_reader.GetSectionByType(SHT_SYMTAB) != NULL);
1276 }
1277 
1278 // Helper for the IsNon[Debug]StrippedELFBinary functions.
IsNonStrippedELFBinaryHelper(const string & path,bool debug_only)1279 static bool IsNonStrippedELFBinaryHelper(const string& path,
1280                                          bool debug_only) {
1281   const int fd = open(path.c_str(), O_RDONLY);
1282   if (fd == -1) {
1283     return false;
1284   }
1285 
1286   if (IsNonStrippedELFBinaryImpl<Elf32>(path, fd, debug_only) ||
1287       IsNonStrippedELFBinaryImpl<Elf64>(path, fd, debug_only)) {
1288     close(fd);
1289     return true;
1290   }
1291   close(fd);
1292   return false;
1293 }
1294 
IsNonStrippedELFBinary(const string & path)1295 bool ElfReader::IsNonStrippedELFBinary(const string& path) {
1296   return IsNonStrippedELFBinaryHelper(path, false);
1297 }
1298 
IsNonDebugStrippedELFBinary(const string & path)1299 bool ElfReader::IsNonDebugStrippedELFBinary(const string& path) {
1300   return IsNonStrippedELFBinaryHelper(path, true);
1301 }
1302 }  // namespace google_breakpad
1303