xref: /aosp_15_r20/external/google-breakpad/src/common/dwarf/elf_reader.h (revision 9712c20fc9bbfbac4935993a2ca0b3958c5adad2)
1*9712c20fSFrederick Mayle // Copyright 2005 Google LLC
2*9712c20fSFrederick Mayle // Author: [email protected] (Andrew Chatham)
3*9712c20fSFrederick Mayle // Author: [email protected] (Satoru Takabayashi)
4*9712c20fSFrederick Mayle //
5*9712c20fSFrederick Mayle // ElfReader handles reading in ELF. It can extract symbols from the
6*9712c20fSFrederick Mayle // current process, which may be used to symbolize stack traces
7*9712c20fSFrederick Mayle // without having to make a potentially dangerous call to fork().
8*9712c20fSFrederick Mayle //
9*9712c20fSFrederick Mayle // ElfReader dynamically allocates memory, so it is not appropriate to
10*9712c20fSFrederick Mayle // use once the address space might be corrupted, such as during
11*9712c20fSFrederick Mayle // process death.
12*9712c20fSFrederick Mayle //
13*9712c20fSFrederick Mayle // ElfReader supports both 32-bit and 64-bit ELF binaries.
14*9712c20fSFrederick Mayle 
15*9712c20fSFrederick Mayle #ifndef COMMON_DWARF_ELF_READER_H__
16*9712c20fSFrederick Mayle #define COMMON_DWARF_ELF_READER_H__
17*9712c20fSFrederick Mayle 
18*9712c20fSFrederick Mayle #include <string>
19*9712c20fSFrederick Mayle #include <string_view>
20*9712c20fSFrederick Mayle #include <vector>
21*9712c20fSFrederick Mayle 
22*9712c20fSFrederick Mayle #include "common/dwarf/types.h"
23*9712c20fSFrederick Mayle #include "common/using_std_string.h"
24*9712c20fSFrederick Mayle 
25*9712c20fSFrederick Mayle using std::vector;
26*9712c20fSFrederick Mayle using std::pair;
27*9712c20fSFrederick Mayle 
28*9712c20fSFrederick Mayle namespace google_breakpad {
29*9712c20fSFrederick Mayle 
30*9712c20fSFrederick Mayle class SymbolMap;
31*9712c20fSFrederick Mayle class Elf32;
32*9712c20fSFrederick Mayle class Elf64;
33*9712c20fSFrederick Mayle template<typename ElfArch>
34*9712c20fSFrederick Mayle class ElfReaderImpl;
35*9712c20fSFrederick Mayle 
36*9712c20fSFrederick Mayle class ElfReader {
37*9712c20fSFrederick Mayle  public:
38*9712c20fSFrederick Mayle   explicit ElfReader(const string& path);
39*9712c20fSFrederick Mayle   ~ElfReader();
40*9712c20fSFrederick Mayle 
41*9712c20fSFrederick Mayle   // Parse the ELF prologue of this file and return whether it was
42*9712c20fSFrederick Mayle   // successfully parsed and matches the word size and byte order of
43*9712c20fSFrederick Mayle   // the current process.
44*9712c20fSFrederick Mayle   bool IsNativeElfFile() const;
45*9712c20fSFrederick Mayle 
46*9712c20fSFrederick Mayle   // Similar to IsNativeElfFile but checks if it's a 32-bit ELF file.
47*9712c20fSFrederick Mayle   bool IsElf32File() const;
48*9712c20fSFrederick Mayle 
49*9712c20fSFrederick Mayle   // Similar to IsNativeElfFile but checks if it's a 64-bit ELF file.
50*9712c20fSFrederick Mayle   bool IsElf64File() const;
51*9712c20fSFrederick Mayle 
52*9712c20fSFrederick Mayle   // Checks if it's an ELF file of type ET_DYN (shared object file).
53*9712c20fSFrederick Mayle   bool IsDynamicSharedObject();
54*9712c20fSFrederick Mayle 
55*9712c20fSFrederick Mayle   // Add symbols in the given ELF file into the provided SymbolMap,
56*9712c20fSFrederick Mayle   // assuming that the file has been loaded into the specified
57*9712c20fSFrederick Mayle   // offset.
58*9712c20fSFrederick Mayle   //
59*9712c20fSFrederick Mayle   // The remaining arguments are typically taken from a
60*9712c20fSFrederick Mayle   // ProcMapsIterator (base/sysinfo.h) and describe which portions of
61*9712c20fSFrederick Mayle   // the ELF file are mapped into which parts of memory:
62*9712c20fSFrederick Mayle   //
63*9712c20fSFrederick Mayle   // mem_offset - position at which the segment is mapped into memory
64*9712c20fSFrederick Mayle   // file_offset - offset in the file where the mapping begins
65*9712c20fSFrederick Mayle   // length - length of the mapped segment
66*9712c20fSFrederick Mayle   void AddSymbols(SymbolMap* symbols,
67*9712c20fSFrederick Mayle                   uint64_t mem_offset, uint64_t file_offset,
68*9712c20fSFrederick Mayle                   uint64_t length);
69*9712c20fSFrederick Mayle 
70*9712c20fSFrederick Mayle   class SymbolSink {
71*9712c20fSFrederick Mayle    public:
~SymbolSink()72*9712c20fSFrederick Mayle     virtual ~SymbolSink() {}
73*9712c20fSFrederick Mayle     virtual void AddSymbol(const char* name, uint64_t address,
74*9712c20fSFrederick Mayle                            uint64_t size) = 0;
75*9712c20fSFrederick Mayle   };
76*9712c20fSFrederick Mayle 
77*9712c20fSFrederick Mayle   // Like AddSymbols above, but with no address correction.
78*9712c20fSFrederick Mayle   // Processes any SHT_SYMTAB section, followed by any SHT_DYNSYM section.
79*9712c20fSFrederick Mayle   void VisitSymbols(SymbolSink* sink);
80*9712c20fSFrederick Mayle 
81*9712c20fSFrederick Mayle   // Like VisitSymbols above, but for a specific symbol binding/type.
82*9712c20fSFrederick Mayle   // A negative value for the binding and type parameters means any
83*9712c20fSFrederick Mayle   // binding or type.
84*9712c20fSFrederick Mayle   void VisitSymbols(SymbolSink* sink, int symbol_binding, int symbol_type);
85*9712c20fSFrederick Mayle 
86*9712c20fSFrederick Mayle   // Like VisitSymbols above but can optionally export raw symbol values instead
87*9712c20fSFrederick Mayle   // of adjusted ones.
88*9712c20fSFrederick Mayle   void VisitSymbols(SymbolSink* sink, int symbol_binding, int symbol_type,
89*9712c20fSFrederick Mayle                     bool get_raw_symbol_values);
90*9712c20fSFrederick Mayle 
91*9712c20fSFrederick Mayle   // p_vaddr of the first PT_LOAD segment (if any), or 0 if no PT_LOAD
92*9712c20fSFrederick Mayle   // segments are present. This is the address an ELF image was linked
93*9712c20fSFrederick Mayle   // (by static linker) to be loaded at. Usually (but not always) 0 for
94*9712c20fSFrederick Mayle   // shared libraries and position-independent executables.
95*9712c20fSFrederick Mayle   uint64_t VaddrOfFirstLoadSegment();
96*9712c20fSFrederick Mayle 
97*9712c20fSFrederick Mayle   // Return the name of section "shndx".  Returns NULL if the section
98*9712c20fSFrederick Mayle   // is not found.
99*9712c20fSFrederick Mayle   const char* GetSectionName(int shndx);
100*9712c20fSFrederick Mayle 
101*9712c20fSFrederick Mayle   // Return the number of sections in the given ELF file.
102*9712c20fSFrederick Mayle   uint64_t GetNumSections();
103*9712c20fSFrederick Mayle 
104*9712c20fSFrederick Mayle   // Get section "shndx" from the given ELF file.  On success, return
105*9712c20fSFrederick Mayle   // the pointer to the section and store the size in "size".
106*9712c20fSFrederick Mayle   // On error, return NULL.  The returned section data is only valid
107*9712c20fSFrederick Mayle   // until the ElfReader gets destroyed.
108*9712c20fSFrederick Mayle   const char* GetSectionByIndex(int shndx, size_t* size);
109*9712c20fSFrederick Mayle 
110*9712c20fSFrederick Mayle   // Get section with "section_name" (ex. ".text", ".symtab") in the
111*9712c20fSFrederick Mayle   // given ELF file.  On success, return the pointer to the section
112*9712c20fSFrederick Mayle   // and store the size in "size".  On error, return NULL.  The
113*9712c20fSFrederick Mayle   // returned section data is only valid until the ElfReader gets
114*9712c20fSFrederick Mayle   // destroyed.
115*9712c20fSFrederick Mayle   const char* GetSectionByName(const string& section_name, size_t* size);
116*9712c20fSFrederick Mayle 
117*9712c20fSFrederick Mayle   // This is like GetSectionByName() but it returns a lot of extra information
118*9712c20fSFrederick Mayle   // about the section. The SectionInfo structure is almost identical to
119*9712c20fSFrederick Mayle   // the typedef struct Elf64_Shdr defined in <elf.h>, but is redefined
120*9712c20fSFrederick Mayle   // here so that the many short macro names in <elf.h> don't have to be
121*9712c20fSFrederick Mayle   // added to our already cluttered namespace.
122*9712c20fSFrederick Mayle   struct SectionInfo {
123*9712c20fSFrederick Mayle     uint32_t type;              // Section type (SHT_xxx constant from elf.h).
124*9712c20fSFrederick Mayle     uint64_t flags;             // Section flags (SHF_xxx constants from elf.h).
125*9712c20fSFrederick Mayle     uint64_t addr;              // Section virtual address at execution.
126*9712c20fSFrederick Mayle     uint64_t offset;            // Section file offset.
127*9712c20fSFrederick Mayle     uint64_t size;              // Section size in bytes.
128*9712c20fSFrederick Mayle     uint32_t link;              // Link to another section.
129*9712c20fSFrederick Mayle     uint32_t info;              // Additional section information.
130*9712c20fSFrederick Mayle     uint64_t addralign;         // Section alignment.
131*9712c20fSFrederick Mayle     uint64_t entsize;           // Entry size if section holds a table.
132*9712c20fSFrederick Mayle   };
133*9712c20fSFrederick Mayle   const char* GetSectionInfoByName(const string& section_name,
134*9712c20fSFrederick Mayle                                    SectionInfo* info);
135*9712c20fSFrederick Mayle 
136*9712c20fSFrederick Mayle   // Check if "path" is an ELF binary that has not been stripped of symbol
137*9712c20fSFrederick Mayle   // tables.  This function supports both 32-bit and 64-bit ELF binaries.
138*9712c20fSFrederick Mayle   static bool IsNonStrippedELFBinary(const string& path);
139*9712c20fSFrederick Mayle 
140*9712c20fSFrederick Mayle   // Check if "path" is an ELF binary that has not been stripped of debug
141*9712c20fSFrederick Mayle   // info. Unlike IsNonStrippedELFBinary, this function will return
142*9712c20fSFrederick Mayle   // false for binaries passed through "strip -S".
143*9712c20fSFrederick Mayle   static bool IsNonDebugStrippedELFBinary(const string& path);
144*9712c20fSFrederick Mayle 
145*9712c20fSFrederick Mayle   // Match a requested section name with the section name as it
146*9712c20fSFrederick Mayle   // appears in the elf-file, adjusting for compressed debug section
147*9712c20fSFrederick Mayle   // names.  For example, returns true if name == ".debug_abbrev" and
148*9712c20fSFrederick Mayle   // sh_name == ".zdebug_abbrev"
149*9712c20fSFrederick Mayle   static bool SectionNamesMatch(std::string_view name,
150*9712c20fSFrederick Mayle                                 std::string_view sh_name);
151*9712c20fSFrederick Mayle 
152*9712c20fSFrederick Mayle  private:
153*9712c20fSFrederick Mayle   // Lazily initialize impl32_ and return it.
154*9712c20fSFrederick Mayle   ElfReaderImpl<Elf32>* GetImpl32();
155*9712c20fSFrederick Mayle   // Ditto for impl64_.
156*9712c20fSFrederick Mayle   ElfReaderImpl<Elf64>* GetImpl64();
157*9712c20fSFrederick Mayle 
158*9712c20fSFrederick Mayle   // Path of the file we're reading.
159*9712c20fSFrederick Mayle   const string path_;
160*9712c20fSFrederick Mayle   // Read-only file descriptor for the file. May be -1 if there was an
161*9712c20fSFrederick Mayle   // error during open.
162*9712c20fSFrederick Mayle   int fd_;
163*9712c20fSFrederick Mayle   ElfReaderImpl<Elf32>* impl32_;
164*9712c20fSFrederick Mayle   ElfReaderImpl<Elf64>* impl64_;
165*9712c20fSFrederick Mayle };
166*9712c20fSFrederick Mayle 
167*9712c20fSFrederick Mayle }  // namespace google_breakpad
168*9712c20fSFrederick Mayle 
169*9712c20fSFrederick Mayle #endif  // COMMON_DWARF_ELF_READER_H__
170