1*9712c20fSFrederick Mayle // Copyright 2005 Google LLC
2*9712c20fSFrederick Mayle // Author: [email protected] (Andrew Chatham)
3*9712c20fSFrederick Mayle // Author: [email protected] (Satoru Takabayashi)
4*9712c20fSFrederick Mayle //
5*9712c20fSFrederick Mayle // Code for reading in ELF files.
6*9712c20fSFrederick Mayle //
7*9712c20fSFrederick Mayle // For information on the ELF format, see
8*9712c20fSFrederick Mayle // http://www.x86.org/ftp/manuals/tools/elf.pdf
9*9712c20fSFrederick Mayle //
10*9712c20fSFrederick Mayle // I also liked:
11*9712c20fSFrederick Mayle // http://www.caldera.com/developers/gabi/1998-04-29/contents.html
12*9712c20fSFrederick Mayle //
13*9712c20fSFrederick Mayle // A note about types: When dealing with the file format, we use types
14*9712c20fSFrederick Mayle // like Elf32_Word, but in the public interfaces we treat all
15*9712c20fSFrederick Mayle // addresses as uint64. As a result, we should be able to symbolize
16*9712c20fSFrederick Mayle // 64-bit binaries from a 32-bit process (which we don't do,
17*9712c20fSFrederick Mayle // anyway). size_t should therefore be avoided, except where required
18*9712c20fSFrederick Mayle // by things like mmap().
19*9712c20fSFrederick Mayle //
20*9712c20fSFrederick Mayle // Although most of this code can deal with arbitrary ELF files of
21*9712c20fSFrederick Mayle // either word size, the public ElfReader interface only examines
22*9712c20fSFrederick Mayle // files loaded into the current address space, which must all match
23*9712c20fSFrederick Mayle // the machine's native word size. This code cannot handle ELF files
24*9712c20fSFrederick Mayle // with a non-native byte ordering.
25*9712c20fSFrederick Mayle //
26*9712c20fSFrederick Mayle // TODO(chatham): It would be nice if we could accomplish this task
27*9712c20fSFrederick Mayle // without using malloc(), so we could use it as the process is dying.
28*9712c20fSFrederick Mayle
29*9712c20fSFrederick Mayle #ifndef _GNU_SOURCE
30*9712c20fSFrederick Mayle #define _GNU_SOURCE // needed for pread()
31*9712c20fSFrederick Mayle #endif
32*9712c20fSFrederick Mayle
33*9712c20fSFrederick Mayle #ifdef HAVE_CONFIG_H
34*9712c20fSFrederick Mayle #include <config.h> // Must come first
35*9712c20fSFrederick Mayle #endif
36*9712c20fSFrederick Mayle
37*9712c20fSFrederick Mayle #include <fcntl.h>
38*9712c20fSFrederick Mayle #include <limits.h>
39*9712c20fSFrederick Mayle #include <string.h>
40*9712c20fSFrederick Mayle #include <sys/mman.h>
41*9712c20fSFrederick Mayle #include <sys/stat.h>
42*9712c20fSFrederick Mayle #include <sys/types.h>
43*9712c20fSFrederick Mayle #include <unistd.h>
44*9712c20fSFrederick Mayle
45*9712c20fSFrederick Mayle #include <algorithm>
46*9712c20fSFrederick Mayle #include <map>
47*9712c20fSFrederick Mayle #include <string>
48*9712c20fSFrederick Mayle #include <string_view>
49*9712c20fSFrederick Mayle #include <vector>
50*9712c20fSFrederick Mayle // TODO(saugustine): Add support for compressed debug.
51*9712c20fSFrederick Mayle // Also need to add configure tests for zlib.
52*9712c20fSFrederick Mayle //#include "zlib.h"
53*9712c20fSFrederick Mayle
54*9712c20fSFrederick Mayle #include "third_party/musl/include/elf.h"
55*9712c20fSFrederick Mayle #include "elf_reader.h"
56*9712c20fSFrederick Mayle #include "common/using_std_string.h"
57*9712c20fSFrederick Mayle
58*9712c20fSFrederick Mayle // EM_AARCH64 is not defined by elf.h of GRTE v3 on x86.
59*9712c20fSFrederick Mayle // TODO(dougkwan): Remove this when v17 is retired.
60*9712c20fSFrederick Mayle #if !defined(EM_AARCH64)
61*9712c20fSFrederick Mayle #define EM_AARCH64 183 /* ARM AARCH64 */
62*9712c20fSFrederick Mayle #endif
63*9712c20fSFrederick Mayle
64*9712c20fSFrederick Mayle // Map Linux macros to their Apple equivalents.
65*9712c20fSFrederick Mayle #if __APPLE__
66*9712c20fSFrederick Mayle #ifndef __LITTLE_ENDIAN
67*9712c20fSFrederick Mayle #define __LITTLE_ENDIAN __ORDER_LITTLE_ENDIAN__
68*9712c20fSFrederick Mayle #endif // __LITTLE_ENDIAN
69*9712c20fSFrederick Mayle #ifndef __BIG_ENDIAN
70*9712c20fSFrederick Mayle #define __BIG_ENDIAN __ORDER_BIG_ENDIAN__
71*9712c20fSFrederick Mayle #endif // __BIG_ENDIAN
72*9712c20fSFrederick Mayle #ifndef __BYTE_ORDER
73*9712c20fSFrederick Mayle #define __BYTE_ORDER __BYTE_ORDER__
74*9712c20fSFrederick Mayle #endif // __BYTE_ORDER
75*9712c20fSFrederick Mayle #endif // __APPLE__
76*9712c20fSFrederick Mayle
77*9712c20fSFrederick Mayle // TODO(dthomson): Can be removed once all Java code is using the Google3
78*9712c20fSFrederick Mayle // launcher. We need to avoid processing PLT functions as it causes memory
79*9712c20fSFrederick Mayle // fragmentation in malloc, which is fixed in tcmalloc - and if the Google3
80*9712c20fSFrederick Mayle // launcher is used the JVM will then use tcmalloc. b/13735638
81*9712c20fSFrederick Mayle //DEFINE_bool(elfreader_process_dynsyms, true,
82*9712c20fSFrederick Mayle // "Activate PLT function processing");
83*9712c20fSFrederick Mayle
84*9712c20fSFrederick Mayle using std::vector;
85*9712c20fSFrederick Mayle
86*9712c20fSFrederick Mayle namespace {
87*9712c20fSFrederick Mayle
88*9712c20fSFrederick Mayle // The lowest bit of an ARM symbol value is used to indicate a Thumb address.
89*9712c20fSFrederick Mayle const int kARMThumbBitOffset = 0;
90*9712c20fSFrederick Mayle
91*9712c20fSFrederick Mayle // Converts an ARM Thumb symbol value to a true aligned address value.
92*9712c20fSFrederick Mayle template <typename T>
AdjustARMThumbSymbolValue(const T & symbol_table_value)93*9712c20fSFrederick Mayle T AdjustARMThumbSymbolValue(const T& symbol_table_value) {
94*9712c20fSFrederick Mayle return symbol_table_value & ~(1 << kARMThumbBitOffset);
95*9712c20fSFrederick Mayle }
96*9712c20fSFrederick Mayle
97*9712c20fSFrederick Mayle // Names of PLT-related sections.
98*9712c20fSFrederick Mayle const char kElfPLTRelSectionName[] = ".rel.plt"; // Use Rel struct.
99*9712c20fSFrederick Mayle const char kElfPLTRelaSectionName[] = ".rela.plt"; // Use Rela struct.
100*9712c20fSFrederick Mayle const char kElfPLTSectionName[] = ".plt";
101*9712c20fSFrederick Mayle const char kElfDynSymSectionName[] = ".dynsym";
102*9712c20fSFrederick Mayle
103*9712c20fSFrederick Mayle const int kX86PLTCodeSize = 0x10; // Size of one x86 PLT function in bytes.
104*9712c20fSFrederick Mayle const int kARMPLTCodeSize = 0xc;
105*9712c20fSFrederick Mayle const int kAARCH64PLTCodeSize = 0x10;
106*9712c20fSFrederick Mayle
107*9712c20fSFrederick Mayle const int kX86PLT0Size = 0x10; // Size of the special PLT0 entry.
108*9712c20fSFrederick Mayle const int kARMPLT0Size = 0x14;
109*9712c20fSFrederick Mayle const int kAARCH64PLT0Size = 0x20;
110*9712c20fSFrederick Mayle
111*9712c20fSFrederick Mayle // Suffix for PLT functions when it needs to be explicitly identified as such.
112*9712c20fSFrederick Mayle const char kPLTFunctionSuffix[] = "@plt";
113*9712c20fSFrederick Mayle
114*9712c20fSFrederick Mayle // Replace callsites of this function to std::string_view::starts_with after
115*9712c20fSFrederick Mayle // adopting C++20.
StringViewStartsWith(std::string_view sv,std::string_view prefix)116*9712c20fSFrederick Mayle bool StringViewStartsWith(std::string_view sv, std::string_view prefix) {
117*9712c20fSFrederick Mayle return sv.compare(0, prefix.size(), prefix) == 0;
118*9712c20fSFrederick Mayle }
119*9712c20fSFrederick Mayle
120*9712c20fSFrederick Mayle } // namespace
121*9712c20fSFrederick Mayle
122*9712c20fSFrederick Mayle namespace google_breakpad {
123*9712c20fSFrederick Mayle
124*9712c20fSFrederick Mayle template <class ElfArch> class ElfReaderImpl;
125*9712c20fSFrederick Mayle
126*9712c20fSFrederick Mayle // 32-bit and 64-bit ELF files are processed exactly the same, except
127*9712c20fSFrederick Mayle // for various field sizes. Elf32 and Elf64 encompass all of the
128*9712c20fSFrederick Mayle // differences between the two formats, and all format-specific code
129*9712c20fSFrederick Mayle // in this file is templated on one of them.
130*9712c20fSFrederick Mayle class Elf32 {
131*9712c20fSFrederick Mayle public:
132*9712c20fSFrederick Mayle typedef Elf32_Ehdr Ehdr;
133*9712c20fSFrederick Mayle typedef Elf32_Shdr Shdr;
134*9712c20fSFrederick Mayle typedef Elf32_Phdr Phdr;
135*9712c20fSFrederick Mayle typedef Elf32_Word Word;
136*9712c20fSFrederick Mayle typedef Elf32_Sym Sym;
137*9712c20fSFrederick Mayle typedef Elf32_Rel Rel;
138*9712c20fSFrederick Mayle typedef Elf32_Rela Rela;
139*9712c20fSFrederick Mayle
140*9712c20fSFrederick Mayle // What should be in the EI_CLASS header.
141*9712c20fSFrederick Mayle static const int kElfClass = ELFCLASS32;
142*9712c20fSFrederick Mayle
143*9712c20fSFrederick Mayle // Given a symbol pointer, return the binding type (eg STB_WEAK).
Bind(const Elf32_Sym * sym)144*9712c20fSFrederick Mayle static char Bind(const Elf32_Sym* sym) {
145*9712c20fSFrederick Mayle return ELF32_ST_BIND(sym->st_info);
146*9712c20fSFrederick Mayle }
147*9712c20fSFrederick Mayle // Given a symbol pointer, return the symbol type (eg STT_FUNC).
Type(const Elf32_Sym * sym)148*9712c20fSFrederick Mayle static char Type(const Elf32_Sym* sym) {
149*9712c20fSFrederick Mayle return ELF32_ST_TYPE(sym->st_info);
150*9712c20fSFrederick Mayle }
151*9712c20fSFrederick Mayle
152*9712c20fSFrederick Mayle // Extract the symbol index from the r_info field of a relocation.
r_sym(const Elf32_Word r_info)153*9712c20fSFrederick Mayle static int r_sym(const Elf32_Word r_info) {
154*9712c20fSFrederick Mayle return ELF32_R_SYM(r_info);
155*9712c20fSFrederick Mayle }
156*9712c20fSFrederick Mayle };
157*9712c20fSFrederick Mayle
158*9712c20fSFrederick Mayle
159*9712c20fSFrederick Mayle class Elf64 {
160*9712c20fSFrederick Mayle public:
161*9712c20fSFrederick Mayle typedef Elf64_Ehdr Ehdr;
162*9712c20fSFrederick Mayle typedef Elf64_Shdr Shdr;
163*9712c20fSFrederick Mayle typedef Elf64_Phdr Phdr;
164*9712c20fSFrederick Mayle typedef Elf64_Word Word;
165*9712c20fSFrederick Mayle typedef Elf64_Sym Sym;
166*9712c20fSFrederick Mayle typedef Elf64_Rel Rel;
167*9712c20fSFrederick Mayle typedef Elf64_Rela Rela;
168*9712c20fSFrederick Mayle
169*9712c20fSFrederick Mayle // What should be in the EI_CLASS header.
170*9712c20fSFrederick Mayle static const int kElfClass = ELFCLASS64;
171*9712c20fSFrederick Mayle
Bind(const Elf64_Sym * sym)172*9712c20fSFrederick Mayle static char Bind(const Elf64_Sym* sym) {
173*9712c20fSFrederick Mayle return ELF64_ST_BIND(sym->st_info);
174*9712c20fSFrederick Mayle }
Type(const Elf64_Sym * sym)175*9712c20fSFrederick Mayle static char Type(const Elf64_Sym* sym) {
176*9712c20fSFrederick Mayle return ELF64_ST_TYPE(sym->st_info);
177*9712c20fSFrederick Mayle }
r_sym(const Elf64_Xword r_info)178*9712c20fSFrederick Mayle static int r_sym(const Elf64_Xword r_info) {
179*9712c20fSFrederick Mayle return ELF64_R_SYM(r_info);
180*9712c20fSFrederick Mayle }
181*9712c20fSFrederick Mayle };
182*9712c20fSFrederick Mayle
183*9712c20fSFrederick Mayle
184*9712c20fSFrederick Mayle // ElfSectionReader mmaps a section of an ELF file ("section" is ELF
185*9712c20fSFrederick Mayle // terminology). The ElfReaderImpl object providing the section header
186*9712c20fSFrederick Mayle // must exist for the lifetime of this object.
187*9712c20fSFrederick Mayle //
188*9712c20fSFrederick Mayle // The motivation for mmaping individual sections of the file is that
189*9712c20fSFrederick Mayle // many Google executables are large enough when unstripped that we
190*9712c20fSFrederick Mayle // have to worry about running out of virtual address space.
191*9712c20fSFrederick Mayle //
192*9712c20fSFrederick Mayle // For compressed sections we have no choice but to allocate memory.
193*9712c20fSFrederick Mayle template<class ElfArch>
194*9712c20fSFrederick Mayle class ElfSectionReader {
195*9712c20fSFrederick Mayle public:
ElfSectionReader(const char * cname,const string & path,int fd,const typename ElfArch::Shdr & section_header)196*9712c20fSFrederick Mayle ElfSectionReader(const char* cname, const string& path, int fd,
197*9712c20fSFrederick Mayle const typename ElfArch::Shdr& section_header)
198*9712c20fSFrederick Mayle : contents_aligned_(NULL),
199*9712c20fSFrederick Mayle contents_(NULL),
200*9712c20fSFrederick Mayle header_(section_header) {
201*9712c20fSFrederick Mayle // Back up to the beginning of the page we're interested in.
202*9712c20fSFrederick Mayle const size_t additional = header_.sh_offset % getpagesize();
203*9712c20fSFrederick Mayle const size_t offset_aligned = header_.sh_offset - additional;
204*9712c20fSFrederick Mayle section_size_ = header_.sh_size;
205*9712c20fSFrederick Mayle size_aligned_ = section_size_ + additional;
206*9712c20fSFrederick Mayle // If the section has been stripped or is empty, do not attempt
207*9712c20fSFrederick Mayle // to process its contents.
208*9712c20fSFrederick Mayle if (header_.sh_type == SHT_NOBITS || header_.sh_size == 0)
209*9712c20fSFrederick Mayle return;
210*9712c20fSFrederick Mayle // extra sh_type check for string table.
211*9712c20fSFrederick Mayle std::string_view name{cname};
212*9712c20fSFrederick Mayle if ((name == ".strtab" || name == ".shstrtab") &&
213*9712c20fSFrederick Mayle header_.sh_type != SHT_STRTAB) {
214*9712c20fSFrederick Mayle fprintf(stderr,
215*9712c20fSFrederick Mayle "Invalid sh_type for string table section: expected "
216*9712c20fSFrederick Mayle "SHT_STRTAB or SHT_DYNSYM, but got %d\n",
217*9712c20fSFrederick Mayle header_.sh_type);
218*9712c20fSFrederick Mayle return;
219*9712c20fSFrederick Mayle }
220*9712c20fSFrederick Mayle
221*9712c20fSFrederick Mayle contents_aligned_ = mmap(NULL, size_aligned_, PROT_READ, MAP_SHARED,
222*9712c20fSFrederick Mayle fd, offset_aligned);
223*9712c20fSFrederick Mayle // Set where the offset really should begin.
224*9712c20fSFrederick Mayle contents_ = reinterpret_cast<char*>(contents_aligned_) +
225*9712c20fSFrederick Mayle (header_.sh_offset - offset_aligned);
226*9712c20fSFrederick Mayle
227*9712c20fSFrederick Mayle // Check for and handle any compressed contents.
228*9712c20fSFrederick Mayle //if (StringViewStartsWith(name, ".zdebug_"))
229*9712c20fSFrederick Mayle // DecompressZlibContents();
230*9712c20fSFrederick Mayle // TODO(saugustine): Add support for proposed elf-section flag
231*9712c20fSFrederick Mayle // "SHF_COMPRESS".
232*9712c20fSFrederick Mayle }
233*9712c20fSFrederick Mayle
~ElfSectionReader()234*9712c20fSFrederick Mayle ~ElfSectionReader() {
235*9712c20fSFrederick Mayle if (contents_aligned_ != NULL)
236*9712c20fSFrederick Mayle munmap(contents_aligned_, size_aligned_);
237*9712c20fSFrederick Mayle else
238*9712c20fSFrederick Mayle delete[] contents_;
239*9712c20fSFrederick Mayle }
240*9712c20fSFrederick Mayle
241*9712c20fSFrederick Mayle // Return the section header for this section.
header() const242*9712c20fSFrederick Mayle typename ElfArch::Shdr const& header() const { return header_; }
243*9712c20fSFrederick Mayle
244*9712c20fSFrederick Mayle // Return memory at the given offset within this section.
GetOffset(typename ElfArch::Word bytes) const245*9712c20fSFrederick Mayle const char* GetOffset(typename ElfArch::Word bytes) const {
246*9712c20fSFrederick Mayle return contents_ + bytes;
247*9712c20fSFrederick Mayle }
248*9712c20fSFrederick Mayle
contents() const249*9712c20fSFrederick Mayle const char* contents() const { return contents_; }
section_size() const250*9712c20fSFrederick Mayle size_t section_size() const { return section_size_; }
251*9712c20fSFrederick Mayle
252*9712c20fSFrederick Mayle private:
253*9712c20fSFrederick Mayle // page-aligned file contents
254*9712c20fSFrederick Mayle void* contents_aligned_;
255*9712c20fSFrederick Mayle // contents as usable by the client. For non-compressed sections,
256*9712c20fSFrederick Mayle // pointer within contents_aligned_ to where the section data
257*9712c20fSFrederick Mayle // begins; for compressed sections, pointer to the decompressed
258*9712c20fSFrederick Mayle // data.
259*9712c20fSFrederick Mayle char* contents_;
260*9712c20fSFrederick Mayle // size of contents_aligned_
261*9712c20fSFrederick Mayle size_t size_aligned_;
262*9712c20fSFrederick Mayle // size of contents.
263*9712c20fSFrederick Mayle size_t section_size_;
264*9712c20fSFrederick Mayle const typename ElfArch::Shdr header_;
265*9712c20fSFrederick Mayle };
266*9712c20fSFrederick Mayle
267*9712c20fSFrederick Mayle // An iterator over symbols in a given section. It handles walking
268*9712c20fSFrederick Mayle // through the entries in the specified section and mapping symbol
269*9712c20fSFrederick Mayle // entries to their names in the appropriate string table (in
270*9712c20fSFrederick Mayle // another section).
271*9712c20fSFrederick Mayle template<class ElfArch>
272*9712c20fSFrederick Mayle class SymbolIterator {
273*9712c20fSFrederick Mayle public:
SymbolIterator(ElfReaderImpl<ElfArch> * reader,typename ElfArch::Word section_type)274*9712c20fSFrederick Mayle SymbolIterator(ElfReaderImpl<ElfArch>* reader,
275*9712c20fSFrederick Mayle typename ElfArch::Word section_type)
276*9712c20fSFrederick Mayle : symbol_section_(reader->GetSectionByType(section_type)),
277*9712c20fSFrederick Mayle string_section_(NULL),
278*9712c20fSFrederick Mayle num_symbols_in_section_(0),
279*9712c20fSFrederick Mayle symbol_within_section_(0) {
280*9712c20fSFrederick Mayle
281*9712c20fSFrederick Mayle // If this section type doesn't exist, leave
282*9712c20fSFrederick Mayle // num_symbols_in_section_ as zero, so this iterator is already
283*9712c20fSFrederick Mayle // done().
284*9712c20fSFrederick Mayle if (symbol_section_ != NULL) {
285*9712c20fSFrederick Mayle num_symbols_in_section_ = symbol_section_->header().sh_size /
286*9712c20fSFrederick Mayle symbol_section_->header().sh_entsize;
287*9712c20fSFrederick Mayle
288*9712c20fSFrederick Mayle // Symbol sections have sh_link set to the section number of
289*9712c20fSFrederick Mayle // the string section containing the symbol names.
290*9712c20fSFrederick Mayle string_section_ = reader->GetSection(symbol_section_->header().sh_link);
291*9712c20fSFrederick Mayle }
292*9712c20fSFrederick Mayle }
293*9712c20fSFrederick Mayle
294*9712c20fSFrederick Mayle // Return true iff we have passed all symbols in this section.
done() const295*9712c20fSFrederick Mayle bool done() const {
296*9712c20fSFrederick Mayle return symbol_within_section_ >= num_symbols_in_section_;
297*9712c20fSFrederick Mayle }
298*9712c20fSFrederick Mayle
299*9712c20fSFrederick Mayle // Advance to the next symbol in this section.
300*9712c20fSFrederick Mayle // REQUIRES: !done()
Next()301*9712c20fSFrederick Mayle void Next() { ++symbol_within_section_; }
302*9712c20fSFrederick Mayle
303*9712c20fSFrederick Mayle // Return a pointer to the current symbol.
304*9712c20fSFrederick Mayle // REQUIRES: !done()
GetSymbol() const305*9712c20fSFrederick Mayle const typename ElfArch::Sym* GetSymbol() const {
306*9712c20fSFrederick Mayle return reinterpret_cast<const typename ElfArch::Sym*>(
307*9712c20fSFrederick Mayle symbol_section_->GetOffset(symbol_within_section_ *
308*9712c20fSFrederick Mayle symbol_section_->header().sh_entsize));
309*9712c20fSFrederick Mayle }
310*9712c20fSFrederick Mayle
311*9712c20fSFrederick Mayle // Return the name of the current symbol, NULL if it has none.
312*9712c20fSFrederick Mayle // REQUIRES: !done()
GetSymbolName() const313*9712c20fSFrederick Mayle const char* GetSymbolName() const {
314*9712c20fSFrederick Mayle int name_offset = GetSymbol()->st_name;
315*9712c20fSFrederick Mayle if (name_offset == 0)
316*9712c20fSFrederick Mayle return NULL;
317*9712c20fSFrederick Mayle return string_section_->GetOffset(name_offset);
318*9712c20fSFrederick Mayle }
319*9712c20fSFrederick Mayle
GetCurrentSymbolIndex() const320*9712c20fSFrederick Mayle int GetCurrentSymbolIndex() const {
321*9712c20fSFrederick Mayle return symbol_within_section_;
322*9712c20fSFrederick Mayle }
323*9712c20fSFrederick Mayle
324*9712c20fSFrederick Mayle private:
325*9712c20fSFrederick Mayle const ElfSectionReader<ElfArch>* const symbol_section_;
326*9712c20fSFrederick Mayle const ElfSectionReader<ElfArch>* string_section_;
327*9712c20fSFrederick Mayle int num_symbols_in_section_;
328*9712c20fSFrederick Mayle int symbol_within_section_;
329*9712c20fSFrederick Mayle };
330*9712c20fSFrederick Mayle
331*9712c20fSFrederick Mayle
332*9712c20fSFrederick Mayle // Copied from strings/strutil.h. Per chatham,
333*9712c20fSFrederick Mayle // this library should not depend on strings.
334*9712c20fSFrederick Mayle
MyHasSuffixString(const string & str,const string & suffix)335*9712c20fSFrederick Mayle static inline bool MyHasSuffixString(const string& str, const string& suffix) {
336*9712c20fSFrederick Mayle int len = str.length();
337*9712c20fSFrederick Mayle int suflen = suffix.length();
338*9712c20fSFrederick Mayle return (suflen <= len) && (str.compare(len-suflen, suflen, suffix) == 0);
339*9712c20fSFrederick Mayle }
340*9712c20fSFrederick Mayle
341*9712c20fSFrederick Mayle
342*9712c20fSFrederick Mayle // ElfReader loads an ELF binary and can provide information about its
343*9712c20fSFrederick Mayle // contents. It is most useful for matching addresses to function
344*9712c20fSFrederick Mayle // names. It does not understand debugging formats (eg dwarf2), so it
345*9712c20fSFrederick Mayle // can't print line numbers. It takes a path to an elf file and a
346*9712c20fSFrederick Mayle // readable file descriptor for that file, which it does not assume
347*9712c20fSFrederick Mayle // ownership of.
348*9712c20fSFrederick Mayle template<class ElfArch>
349*9712c20fSFrederick Mayle class ElfReaderImpl {
350*9712c20fSFrederick Mayle public:
ElfReaderImpl(const string & path,int fd)351*9712c20fSFrederick Mayle explicit ElfReaderImpl(const string& path, int fd)
352*9712c20fSFrederick Mayle : path_(path),
353*9712c20fSFrederick Mayle fd_(fd),
354*9712c20fSFrederick Mayle section_headers_(NULL),
355*9712c20fSFrederick Mayle program_headers_(NULL),
356*9712c20fSFrederick Mayle opd_section_(NULL),
357*9712c20fSFrederick Mayle base_for_text_(0),
358*9712c20fSFrederick Mayle plts_supported_(false),
359*9712c20fSFrederick Mayle plt_code_size_(0),
360*9712c20fSFrederick Mayle plt0_size_(0),
361*9712c20fSFrederick Mayle visited_relocation_entries_(false) {
362*9712c20fSFrederick Mayle string error;
363*9712c20fSFrederick Mayle is_dwp_ = MyHasSuffixString(path, ".dwp");
364*9712c20fSFrederick Mayle ParseHeaders(fd, path);
365*9712c20fSFrederick Mayle // Currently we need some extra information for PowerPC64 binaries
366*9712c20fSFrederick Mayle // including a way to read the .opd section for function descriptors and a
367*9712c20fSFrederick Mayle // way to find the linked base for function symbols.
368*9712c20fSFrederick Mayle if (header_.e_machine == EM_PPC64) {
369*9712c20fSFrederick Mayle // "opd_section_" must always be checked for NULL before use.
370*9712c20fSFrederick Mayle opd_section_ = GetSectionInfoByName(".opd", &opd_info_);
371*9712c20fSFrederick Mayle for (unsigned int k = 0u; k < GetNumSections(); ++k) {
372*9712c20fSFrederick Mayle std::string_view name{GetSectionName(section_headers_[k].sh_name)};
373*9712c20fSFrederick Mayle if (StringViewStartsWith(name, ".text")) {
374*9712c20fSFrederick Mayle base_for_text_ =
375*9712c20fSFrederick Mayle section_headers_[k].sh_addr - section_headers_[k].sh_offset;
376*9712c20fSFrederick Mayle break;
377*9712c20fSFrederick Mayle }
378*9712c20fSFrederick Mayle }
379*9712c20fSFrederick Mayle }
380*9712c20fSFrederick Mayle // Turn on PLTs.
381*9712c20fSFrederick Mayle if (header_.e_machine == EM_386 || header_.e_machine == EM_X86_64) {
382*9712c20fSFrederick Mayle plt_code_size_ = kX86PLTCodeSize;
383*9712c20fSFrederick Mayle plt0_size_ = kX86PLT0Size;
384*9712c20fSFrederick Mayle plts_supported_ = true;
385*9712c20fSFrederick Mayle } else if (header_.e_machine == EM_ARM) {
386*9712c20fSFrederick Mayle plt_code_size_ = kARMPLTCodeSize;
387*9712c20fSFrederick Mayle plt0_size_ = kARMPLT0Size;
388*9712c20fSFrederick Mayle plts_supported_ = true;
389*9712c20fSFrederick Mayle } else if (header_.e_machine == EM_AARCH64) {
390*9712c20fSFrederick Mayle plt_code_size_ = kAARCH64PLTCodeSize;
391*9712c20fSFrederick Mayle plt0_size_ = kAARCH64PLT0Size;
392*9712c20fSFrederick Mayle plts_supported_ = true;
393*9712c20fSFrederick Mayle }
394*9712c20fSFrederick Mayle }
395*9712c20fSFrederick Mayle
~ElfReaderImpl()396*9712c20fSFrederick Mayle ~ElfReaderImpl() {
397*9712c20fSFrederick Mayle for (unsigned int i = 0u; i < sections_.size(); ++i)
398*9712c20fSFrederick Mayle delete sections_[i];
399*9712c20fSFrederick Mayle delete [] section_headers_;
400*9712c20fSFrederick Mayle delete [] program_headers_;
401*9712c20fSFrederick Mayle }
402*9712c20fSFrederick Mayle
403*9712c20fSFrederick Mayle // Examine the headers of the file and return whether the file looks
404*9712c20fSFrederick Mayle // like an ELF file for this architecture. Takes an already-open
405*9712c20fSFrederick Mayle // file descriptor for the candidate file, reading in the prologue
406*9712c20fSFrederick Mayle // to see if the ELF file appears to match the current
407*9712c20fSFrederick Mayle // architecture. If error is non-NULL, it will be set with a reason
408*9712c20fSFrederick Mayle // in case of failure.
IsArchElfFile(int fd,string * error)409*9712c20fSFrederick Mayle static bool IsArchElfFile(int fd, string* error) {
410*9712c20fSFrederick Mayle unsigned char header[EI_NIDENT];
411*9712c20fSFrederick Mayle if (pread(fd, header, sizeof(header), 0) != sizeof(header)) {
412*9712c20fSFrederick Mayle if (error != NULL) *error = "Could not read header";
413*9712c20fSFrederick Mayle return false;
414*9712c20fSFrederick Mayle }
415*9712c20fSFrederick Mayle
416*9712c20fSFrederick Mayle if (memcmp(header, ELFMAG, SELFMAG) != 0) {
417*9712c20fSFrederick Mayle if (error != NULL) *error = "Missing ELF magic";
418*9712c20fSFrederick Mayle return false;
419*9712c20fSFrederick Mayle }
420*9712c20fSFrederick Mayle
421*9712c20fSFrederick Mayle if (header[EI_CLASS] != ElfArch::kElfClass) {
422*9712c20fSFrederick Mayle if (error != NULL) *error = "Different word size";
423*9712c20fSFrederick Mayle return false;
424*9712c20fSFrederick Mayle }
425*9712c20fSFrederick Mayle
426*9712c20fSFrederick Mayle int endian = 0;
427*9712c20fSFrederick Mayle if (header[EI_DATA] == ELFDATA2LSB)
428*9712c20fSFrederick Mayle endian = __LITTLE_ENDIAN;
429*9712c20fSFrederick Mayle else if (header[EI_DATA] == ELFDATA2MSB)
430*9712c20fSFrederick Mayle endian = __BIG_ENDIAN;
431*9712c20fSFrederick Mayle if (endian != __BYTE_ORDER) {
432*9712c20fSFrederick Mayle if (error != NULL) *error = "Different byte order";
433*9712c20fSFrederick Mayle return false;
434*9712c20fSFrederick Mayle }
435*9712c20fSFrederick Mayle
436*9712c20fSFrederick Mayle return true;
437*9712c20fSFrederick Mayle }
438*9712c20fSFrederick Mayle
439*9712c20fSFrederick Mayle // Return true if we can use this symbol in Address-to-Symbol map.
CanUseSymbol(const char * name,const typename ElfArch::Sym * sym)440*9712c20fSFrederick Mayle bool CanUseSymbol(const char* name, const typename ElfArch::Sym* sym) {
441*9712c20fSFrederick Mayle // For now we only save FUNC and NOTYPE symbols. For now we just
442*9712c20fSFrederick Mayle // care about functions, but some functions written in assembler
443*9712c20fSFrederick Mayle // don't have a proper ELF type attached to them, so we store
444*9712c20fSFrederick Mayle // NOTYPE symbols as well. The remaining significant type is
445*9712c20fSFrederick Mayle // OBJECT (eg global variables), which represent about 25% of
446*9712c20fSFrederick Mayle // the symbols in a typical google3 binary.
447*9712c20fSFrederick Mayle if (ElfArch::Type(sym) != STT_FUNC &&
448*9712c20fSFrederick Mayle ElfArch::Type(sym) != STT_NOTYPE) {
449*9712c20fSFrederick Mayle return false;
450*9712c20fSFrederick Mayle }
451*9712c20fSFrederick Mayle
452*9712c20fSFrederick Mayle // Target specific filtering.
453*9712c20fSFrederick Mayle switch (header_.e_machine) {
454*9712c20fSFrederick Mayle case EM_AARCH64:
455*9712c20fSFrederick Mayle case EM_ARM:
456*9712c20fSFrederick Mayle // Filter out '$x' special local symbols used by tools
457*9712c20fSFrederick Mayle return name[0] != '$' || ElfArch::Bind(sym) != STB_LOCAL;
458*9712c20fSFrederick Mayle case EM_X86_64:
459*9712c20fSFrederick Mayle // Filter out read-only constants like .LC123.
460*9712c20fSFrederick Mayle return name[0] != '.' || ElfArch::Bind(sym) != STB_LOCAL;
461*9712c20fSFrederick Mayle default:
462*9712c20fSFrederick Mayle return true;
463*9712c20fSFrederick Mayle }
464*9712c20fSFrederick Mayle }
465*9712c20fSFrederick Mayle
466*9712c20fSFrederick Mayle // Iterate over the symbols in a section, either SHT_DYNSYM or
467*9712c20fSFrederick Mayle // SHT_SYMTAB. Add all symbols to the given SymbolMap.
468*9712c20fSFrederick Mayle /*
469*9712c20fSFrederick Mayle void GetSymbolPositions(SymbolMap* symbols,
470*9712c20fSFrederick Mayle typename ElfArch::Word section_type,
471*9712c20fSFrederick Mayle uint64_t mem_offset,
472*9712c20fSFrederick Mayle uint64_t file_offset) {
473*9712c20fSFrederick Mayle // This map is used to filter out "nested" functions.
474*9712c20fSFrederick Mayle // See comment below.
475*9712c20fSFrederick Mayle AddrToSymMap addr_to_sym_map;
476*9712c20fSFrederick Mayle for (SymbolIterator<ElfArch> it(this, section_type);
477*9712c20fSFrederick Mayle !it.done(); it.Next()) {
478*9712c20fSFrederick Mayle const char* name = it.GetSymbolName();
479*9712c20fSFrederick Mayle if (name == NULL)
480*9712c20fSFrederick Mayle continue;
481*9712c20fSFrederick Mayle const typename ElfArch::Sym* sym = it.GetSymbol();
482*9712c20fSFrederick Mayle if (CanUseSymbol(name, sym)) {
483*9712c20fSFrederick Mayle const int sec = sym->st_shndx;
484*9712c20fSFrederick Mayle
485*9712c20fSFrederick Mayle // We don't support special section indices. The most common
486*9712c20fSFrederick Mayle // is SHN_ABS, for absolute symbols used deep in the bowels of
487*9712c20fSFrederick Mayle // glibc. Also ignore any undefined symbols.
488*9712c20fSFrederick Mayle if (sec == SHN_UNDEF ||
489*9712c20fSFrederick Mayle (sec >= SHN_LORESERVE && sec <= SHN_HIRESERVE)) {
490*9712c20fSFrederick Mayle continue;
491*9712c20fSFrederick Mayle }
492*9712c20fSFrederick Mayle
493*9712c20fSFrederick Mayle const typename ElfArch::Shdr& hdr = section_headers_[sec];
494*9712c20fSFrederick Mayle
495*9712c20fSFrederick Mayle // Adjust for difference between where we expected to mmap
496*9712c20fSFrederick Mayle // this section, and where it was actually mmapped.
497*9712c20fSFrederick Mayle const int64_t expected_base = hdr.sh_addr - hdr.sh_offset;
498*9712c20fSFrederick Mayle const int64_t real_base = mem_offset - file_offset;
499*9712c20fSFrederick Mayle const int64_t adjust = real_base - expected_base;
500*9712c20fSFrederick Mayle
501*9712c20fSFrederick Mayle uint64_t start = sym->st_value + adjust;
502*9712c20fSFrederick Mayle
503*9712c20fSFrederick Mayle // Adjust function symbols for PowerPC64 by dereferencing and adjusting
504*9712c20fSFrederick Mayle // the function descriptor to get the function address.
505*9712c20fSFrederick Mayle if (header_.e_machine == EM_PPC64 && ElfArch::Type(sym) == STT_FUNC) {
506*9712c20fSFrederick Mayle const uint64_t opd_addr =
507*9712c20fSFrederick Mayle AdjustPPC64FunctionDescriptorSymbolValue(sym->st_value);
508*9712c20fSFrederick Mayle // Only adjust the returned value if the function address was found.
509*9712c20fSFrederick Mayle if (opd_addr != sym->st_value) {
510*9712c20fSFrederick Mayle const int64_t adjust_function_symbols =
511*9712c20fSFrederick Mayle real_base - base_for_text_;
512*9712c20fSFrederick Mayle start = opd_addr + adjust_function_symbols;
513*9712c20fSFrederick Mayle }
514*9712c20fSFrederick Mayle }
515*9712c20fSFrederick Mayle
516*9712c20fSFrederick Mayle addr_to_sym_map.push_back(std::make_pair(start, sym));
517*9712c20fSFrederick Mayle }
518*9712c20fSFrederick Mayle }
519*9712c20fSFrederick Mayle std::sort(addr_to_sym_map.begin(), addr_to_sym_map.end(), &AddrToSymSorter);
520*9712c20fSFrederick Mayle addr_to_sym_map.erase(std::unique(addr_to_sym_map.begin(),
521*9712c20fSFrederick Mayle addr_to_sym_map.end(), &AddrToSymEquals),
522*9712c20fSFrederick Mayle addr_to_sym_map.end());
523*9712c20fSFrederick Mayle
524*9712c20fSFrederick Mayle // Squeeze out any "nested functions".
525*9712c20fSFrederick Mayle // Nested functions are not allowed in C, but libc plays tricks.
526*9712c20fSFrederick Mayle //
527*9712c20fSFrederick Mayle // For example, here is disassembly of /lib64/tls/libc-2.3.5.so:
528*9712c20fSFrederick Mayle // 0x00000000000aa380 <read+0>: cmpl $0x0,0x2781b9(%rip)
529*9712c20fSFrederick Mayle // 0x00000000000aa387 <read+7>: jne 0xaa39b <read+27>
530*9712c20fSFrederick Mayle // 0x00000000000aa389 <__read_nocancel+0>: mov $0x0,%rax
531*9712c20fSFrederick Mayle // 0x00000000000aa390 <__read_nocancel+7>: syscall
532*9712c20fSFrederick Mayle // 0x00000000000aa392 <__read_nocancel+9>: cmp $0xfffffffffffff001,%rax
533*9712c20fSFrederick Mayle // 0x00000000000aa398 <__read_nocancel+15>: jae 0xaa3ef <read+111>
534*9712c20fSFrederick Mayle // 0x00000000000aa39a <__read_nocancel+17>: retq
535*9712c20fSFrederick Mayle // 0x00000000000aa39b <read+27>: sub $0x28,%rsp
536*9712c20fSFrederick Mayle // 0x00000000000aa39f <read+31>: mov %rdi,0x8(%rsp)
537*9712c20fSFrederick Mayle // ...
538*9712c20fSFrederick Mayle // Without removing __read_nocancel, symbolizer will return NULL
539*9712c20fSFrederick Mayle // given e.g. 0xaa39f (because the lower bound is __read_nocancel,
540*9712c20fSFrederick Mayle // but 0xaa39f is beyond its end.
541*9712c20fSFrederick Mayle if (addr_to_sym_map.empty()) {
542*9712c20fSFrederick Mayle return;
543*9712c20fSFrederick Mayle }
544*9712c20fSFrederick Mayle const ElfSectionReader<ElfArch>* const symbol_section =
545*9712c20fSFrederick Mayle this->GetSectionByType(section_type);
546*9712c20fSFrederick Mayle const ElfSectionReader<ElfArch>* const string_section =
547*9712c20fSFrederick Mayle this->GetSection(symbol_section->header().sh_link);
548*9712c20fSFrederick Mayle
549*9712c20fSFrederick Mayle typename AddrToSymMap::iterator curr = addr_to_sym_map.begin();
550*9712c20fSFrederick Mayle // Always insert the first symbol.
551*9712c20fSFrederick Mayle symbols->AddSymbol(string_section->GetOffset(curr->second->st_name),
552*9712c20fSFrederick Mayle curr->first, curr->second->st_size);
553*9712c20fSFrederick Mayle typename AddrToSymMap::iterator prev = curr++;
554*9712c20fSFrederick Mayle for (; curr != addr_to_sym_map.end(); ++curr) {
555*9712c20fSFrederick Mayle const uint64_t prev_addr = prev->first;
556*9712c20fSFrederick Mayle const uint64_t curr_addr = curr->first;
557*9712c20fSFrederick Mayle const typename ElfArch::Sym* const prev_sym = prev->second;
558*9712c20fSFrederick Mayle const typename ElfArch::Sym* const curr_sym = curr->second;
559*9712c20fSFrederick Mayle if (prev_addr + prev_sym->st_size <= curr_addr ||
560*9712c20fSFrederick Mayle // The next condition is true if two symbols overlap like this:
561*9712c20fSFrederick Mayle //
562*9712c20fSFrederick Mayle // Previous symbol |----------------------------|
563*9712c20fSFrederick Mayle // Current symbol |-------------------------------|
564*9712c20fSFrederick Mayle //
565*9712c20fSFrederick Mayle // These symbols are not found in google3 codebase, but in
566*9712c20fSFrederick Mayle // jdk1.6.0_01_gg1/jre/lib/i386/server/libjvm.so.
567*9712c20fSFrederick Mayle //
568*9712c20fSFrederick Mayle // 0619e040 00000046 t CardTableModRefBS::write_region_work()
569*9712c20fSFrederick Mayle // 0619e070 00000046 t CardTableModRefBS::write_ref_array_work()
570*9712c20fSFrederick Mayle //
571*9712c20fSFrederick Mayle // We allow overlapped symbols rather than ignore these.
572*9712c20fSFrederick Mayle // Due to the way SymbolMap::GetSymbolAtPosition() works,
573*9712c20fSFrederick Mayle // lookup for any address in [curr_addr, curr_addr + its size)
574*9712c20fSFrederick Mayle // (e.g. 0619e071) will produce the current symbol,
575*9712c20fSFrederick Mayle // which is the desired outcome.
576*9712c20fSFrederick Mayle prev_addr + prev_sym->st_size < curr_addr + curr_sym->st_size) {
577*9712c20fSFrederick Mayle const char* name = string_section->GetOffset(curr_sym->st_name);
578*9712c20fSFrederick Mayle symbols->AddSymbol(name, curr_addr, curr_sym->st_size);
579*9712c20fSFrederick Mayle prev = curr;
580*9712c20fSFrederick Mayle } else {
581*9712c20fSFrederick Mayle // Current symbol is "nested" inside previous one like this:
582*9712c20fSFrederick Mayle //
583*9712c20fSFrederick Mayle // Previous symbol |----------------------------|
584*9712c20fSFrederick Mayle // Current symbol |---------------------|
585*9712c20fSFrederick Mayle //
586*9712c20fSFrederick Mayle // This happens within glibc, e.g. __read_nocancel is nested
587*9712c20fSFrederick Mayle // "inside" __read. Ignore "inner" symbol.
588*9712c20fSFrederick Mayle //DCHECK_LE(curr_addr + curr_sym->st_size,
589*9712c20fSFrederick Mayle // prev_addr + prev_sym->st_size);
590*9712c20fSFrederick Mayle ;
591*9712c20fSFrederick Mayle }
592*9712c20fSFrederick Mayle }
593*9712c20fSFrederick Mayle }
594*9712c20fSFrederick Mayle */
595*9712c20fSFrederick Mayle
VisitSymbols(typename ElfArch::Word section_type,ElfReader::SymbolSink * sink)596*9712c20fSFrederick Mayle void VisitSymbols(typename ElfArch::Word section_type,
597*9712c20fSFrederick Mayle ElfReader::SymbolSink* sink) {
598*9712c20fSFrederick Mayle VisitSymbols(section_type, sink, -1, -1, false);
599*9712c20fSFrederick Mayle }
600*9712c20fSFrederick Mayle
VisitSymbols(typename ElfArch::Word section_type,ElfReader::SymbolSink * sink,int symbol_binding,int symbol_type,bool get_raw_symbol_values)601*9712c20fSFrederick Mayle void VisitSymbols(typename ElfArch::Word section_type,
602*9712c20fSFrederick Mayle ElfReader::SymbolSink* sink,
603*9712c20fSFrederick Mayle int symbol_binding,
604*9712c20fSFrederick Mayle int symbol_type,
605*9712c20fSFrederick Mayle bool get_raw_symbol_values) {
606*9712c20fSFrederick Mayle for (SymbolIterator<ElfArch> it(this, section_type);
607*9712c20fSFrederick Mayle !it.done(); it.Next()) {
608*9712c20fSFrederick Mayle const char* name = it.GetSymbolName();
609*9712c20fSFrederick Mayle if (!name) continue;
610*9712c20fSFrederick Mayle const typename ElfArch::Sym* sym = it.GetSymbol();
611*9712c20fSFrederick Mayle if ((symbol_binding < 0 || ElfArch::Bind(sym) == symbol_binding) &&
612*9712c20fSFrederick Mayle (symbol_type < 0 || ElfArch::Type(sym) == symbol_type)) {
613*9712c20fSFrederick Mayle typename ElfArch::Sym symbol = *sym;
614*9712c20fSFrederick Mayle // Add a PLT symbol in addition to the main undefined symbol.
615*9712c20fSFrederick Mayle // Only do this for SHT_DYNSYM, because PLT symbols are dynamic.
616*9712c20fSFrederick Mayle int symbol_index = it.GetCurrentSymbolIndex();
617*9712c20fSFrederick Mayle // TODO(dthomson): Can be removed once all Java code is using the
618*9712c20fSFrederick Mayle // Google3 launcher.
619*9712c20fSFrederick Mayle if (section_type == SHT_DYNSYM &&
620*9712c20fSFrederick Mayle static_cast<unsigned int>(symbol_index) < symbols_plt_offsets_.size() &&
621*9712c20fSFrederick Mayle symbols_plt_offsets_[symbol_index] != 0) {
622*9712c20fSFrederick Mayle string plt_name = string(name) + kPLTFunctionSuffix;
623*9712c20fSFrederick Mayle if (plt_function_names_[symbol_index].empty()) {
624*9712c20fSFrederick Mayle plt_function_names_[symbol_index] = plt_name;
625*9712c20fSFrederick Mayle } else if (plt_function_names_[symbol_index] != plt_name) {
626*9712c20fSFrederick Mayle ;
627*9712c20fSFrederick Mayle }
628*9712c20fSFrederick Mayle sink->AddSymbol(plt_function_names_[symbol_index].c_str(),
629*9712c20fSFrederick Mayle symbols_plt_offsets_[it.GetCurrentSymbolIndex()],
630*9712c20fSFrederick Mayle plt_code_size_);
631*9712c20fSFrederick Mayle }
632*9712c20fSFrederick Mayle if (!get_raw_symbol_values)
633*9712c20fSFrederick Mayle AdjustSymbolValue(&symbol);
634*9712c20fSFrederick Mayle sink->AddSymbol(name, symbol.st_value, symbol.st_size);
635*9712c20fSFrederick Mayle }
636*9712c20fSFrederick Mayle }
637*9712c20fSFrederick Mayle }
638*9712c20fSFrederick Mayle
VisitRelocationEntries()639*9712c20fSFrederick Mayle void VisitRelocationEntries() {
640*9712c20fSFrederick Mayle if (visited_relocation_entries_) {
641*9712c20fSFrederick Mayle return;
642*9712c20fSFrederick Mayle }
643*9712c20fSFrederick Mayle visited_relocation_entries_ = true;
644*9712c20fSFrederick Mayle
645*9712c20fSFrederick Mayle if (!plts_supported_) {
646*9712c20fSFrederick Mayle return;
647*9712c20fSFrederick Mayle }
648*9712c20fSFrederick Mayle // First determine if PLTs exist. If not, then there is nothing to do.
649*9712c20fSFrederick Mayle ElfReader::SectionInfo plt_section_info;
650*9712c20fSFrederick Mayle const char* plt_section =
651*9712c20fSFrederick Mayle GetSectionInfoByName(kElfPLTSectionName, &plt_section_info);
652*9712c20fSFrederick Mayle if (!plt_section) {
653*9712c20fSFrederick Mayle return;
654*9712c20fSFrederick Mayle }
655*9712c20fSFrederick Mayle if (plt_section_info.size == 0) {
656*9712c20fSFrederick Mayle return;
657*9712c20fSFrederick Mayle }
658*9712c20fSFrederick Mayle
659*9712c20fSFrederick Mayle // The PLTs could be referenced by either a Rel or Rela (Rel with Addend)
660*9712c20fSFrederick Mayle // section.
661*9712c20fSFrederick Mayle ElfReader::SectionInfo rel_section_info;
662*9712c20fSFrederick Mayle ElfReader::SectionInfo rela_section_info;
663*9712c20fSFrederick Mayle const char* rel_section =
664*9712c20fSFrederick Mayle GetSectionInfoByName(kElfPLTRelSectionName, &rel_section_info);
665*9712c20fSFrederick Mayle const char* rela_section =
666*9712c20fSFrederick Mayle GetSectionInfoByName(kElfPLTRelaSectionName, &rela_section_info);
667*9712c20fSFrederick Mayle
668*9712c20fSFrederick Mayle const typename ElfArch::Rel* rel =
669*9712c20fSFrederick Mayle reinterpret_cast<const typename ElfArch::Rel*>(rel_section);
670*9712c20fSFrederick Mayle const typename ElfArch::Rela* rela =
671*9712c20fSFrederick Mayle reinterpret_cast<const typename ElfArch::Rela*>(rela_section);
672*9712c20fSFrederick Mayle
673*9712c20fSFrederick Mayle if (!rel_section && !rela_section) {
674*9712c20fSFrederick Mayle return;
675*9712c20fSFrederick Mayle }
676*9712c20fSFrederick Mayle
677*9712c20fSFrederick Mayle // Use either Rel or Rela section, depending on which one exists.
678*9712c20fSFrederick Mayle size_t section_size = rel_section ? rel_section_info.size
679*9712c20fSFrederick Mayle : rela_section_info.size;
680*9712c20fSFrederick Mayle size_t entry_size = rel_section ? sizeof(typename ElfArch::Rel)
681*9712c20fSFrederick Mayle : sizeof(typename ElfArch::Rela);
682*9712c20fSFrederick Mayle
683*9712c20fSFrederick Mayle // Determine the number of entries in the dynamic symbol table.
684*9712c20fSFrederick Mayle ElfReader::SectionInfo dynsym_section_info;
685*9712c20fSFrederick Mayle const char* dynsym_section =
686*9712c20fSFrederick Mayle GetSectionInfoByName(kElfDynSymSectionName, &dynsym_section_info);
687*9712c20fSFrederick Mayle // The dynsym section might not exist, or it might be empty. In either case
688*9712c20fSFrederick Mayle // there is nothing to be done so return.
689*9712c20fSFrederick Mayle if (!dynsym_section || dynsym_section_info.size == 0) {
690*9712c20fSFrederick Mayle return;
691*9712c20fSFrederick Mayle }
692*9712c20fSFrederick Mayle size_t num_dynamic_symbols =
693*9712c20fSFrederick Mayle dynsym_section_info.size / dynsym_section_info.entsize;
694*9712c20fSFrederick Mayle symbols_plt_offsets_.resize(num_dynamic_symbols, 0);
695*9712c20fSFrederick Mayle
696*9712c20fSFrederick Mayle // TODO(dthomson): Can be removed once all Java code is using the
697*9712c20fSFrederick Mayle // Google3 launcher.
698*9712c20fSFrederick Mayle // Make storage room for PLT function name strings.
699*9712c20fSFrederick Mayle plt_function_names_.resize(num_dynamic_symbols);
700*9712c20fSFrederick Mayle
701*9712c20fSFrederick Mayle for (size_t i = 0; i < section_size / entry_size; ++i) {
702*9712c20fSFrederick Mayle // Determine symbol index from the |r_info| field.
703*9712c20fSFrederick Mayle int sym_index = ElfArch::r_sym(rel_section ? rel[i].r_info
704*9712c20fSFrederick Mayle : rela[i].r_info);
705*9712c20fSFrederick Mayle if (static_cast<unsigned int>(sym_index) >= symbols_plt_offsets_.size()) {
706*9712c20fSFrederick Mayle continue;
707*9712c20fSFrederick Mayle }
708*9712c20fSFrederick Mayle symbols_plt_offsets_[sym_index] =
709*9712c20fSFrederick Mayle plt_section_info.addr + plt0_size_ + i * plt_code_size_;
710*9712c20fSFrederick Mayle }
711*9712c20fSFrederick Mayle }
712*9712c20fSFrederick Mayle
713*9712c20fSFrederick Mayle // Return an ElfSectionReader for the first section of the given
714*9712c20fSFrederick Mayle // type by iterating through all section headers. Returns NULL if
715*9712c20fSFrederick Mayle // the section type is not found.
GetSectionByType(typename ElfArch::Word section_type)716*9712c20fSFrederick Mayle const ElfSectionReader<ElfArch>* GetSectionByType(
717*9712c20fSFrederick Mayle typename ElfArch::Word section_type) {
718*9712c20fSFrederick Mayle for (unsigned int k = 0u; k < GetNumSections(); ++k) {
719*9712c20fSFrederick Mayle if (section_headers_[k].sh_type == section_type) {
720*9712c20fSFrederick Mayle return GetSection(k);
721*9712c20fSFrederick Mayle }
722*9712c20fSFrederick Mayle }
723*9712c20fSFrederick Mayle return NULL;
724*9712c20fSFrederick Mayle }
725*9712c20fSFrederick Mayle
726*9712c20fSFrederick Mayle // Return the name of section "shndx". Returns NULL if the section
727*9712c20fSFrederick Mayle // is not found.
GetSectionNameByIndex(int shndx)728*9712c20fSFrederick Mayle const char* GetSectionNameByIndex(int shndx) {
729*9712c20fSFrederick Mayle return GetSectionName(section_headers_[shndx].sh_name);
730*9712c20fSFrederick Mayle }
731*9712c20fSFrederick Mayle
732*9712c20fSFrederick Mayle // Return a pointer to section "shndx", and store the size in
733*9712c20fSFrederick Mayle // "size". Returns NULL if the section is not found.
GetSectionContentsByIndex(int shndx,size_t * size)734*9712c20fSFrederick Mayle const char* GetSectionContentsByIndex(int shndx, size_t* size) {
735*9712c20fSFrederick Mayle const ElfSectionReader<ElfArch>* section = GetSection(shndx);
736*9712c20fSFrederick Mayle if (section != NULL) {
737*9712c20fSFrederick Mayle *size = section->section_size();
738*9712c20fSFrederick Mayle return section->contents();
739*9712c20fSFrederick Mayle }
740*9712c20fSFrederick Mayle return NULL;
741*9712c20fSFrederick Mayle }
742*9712c20fSFrederick Mayle
743*9712c20fSFrederick Mayle // Return a pointer to the first section of the given name by
744*9712c20fSFrederick Mayle // iterating through all section headers, and store the size in
745*9712c20fSFrederick Mayle // "size". Returns NULL if the section name is not found.
GetSectionContentsByName(const string & section_name,size_t * size)746*9712c20fSFrederick Mayle const char* GetSectionContentsByName(const string& section_name,
747*9712c20fSFrederick Mayle size_t* size) {
748*9712c20fSFrederick Mayle for (unsigned int k = 0u; k < GetNumSections(); ++k) {
749*9712c20fSFrederick Mayle // When searching for sections in a .dwp file, the sections
750*9712c20fSFrederick Mayle // we're looking for will always be at the end of the section
751*9712c20fSFrederick Mayle // table, so reverse the direction of iteration.
752*9712c20fSFrederick Mayle int shndx = is_dwp_ ? GetNumSections() - k - 1 : k;
753*9712c20fSFrederick Mayle const char* name = GetSectionName(section_headers_[shndx].sh_name);
754*9712c20fSFrederick Mayle if (name != NULL && ElfReader::SectionNamesMatch(section_name, name)) {
755*9712c20fSFrederick Mayle const ElfSectionReader<ElfArch>* section = GetSection(shndx);
756*9712c20fSFrederick Mayle if (section == NULL) {
757*9712c20fSFrederick Mayle return NULL;
758*9712c20fSFrederick Mayle } else {
759*9712c20fSFrederick Mayle *size = section->section_size();
760*9712c20fSFrederick Mayle return section->contents();
761*9712c20fSFrederick Mayle }
762*9712c20fSFrederick Mayle }
763*9712c20fSFrederick Mayle }
764*9712c20fSFrederick Mayle return NULL;
765*9712c20fSFrederick Mayle }
766*9712c20fSFrederick Mayle
767*9712c20fSFrederick Mayle // This is like GetSectionContentsByName() but it returns a lot of extra
768*9712c20fSFrederick Mayle // information about the section.
GetSectionInfoByName(const string & section_name,ElfReader::SectionInfo * info)769*9712c20fSFrederick Mayle const char* GetSectionInfoByName(const string& section_name,
770*9712c20fSFrederick Mayle ElfReader::SectionInfo* info) {
771*9712c20fSFrederick Mayle for (unsigned int k = 0u; k < GetNumSections(); ++k) {
772*9712c20fSFrederick Mayle // When searching for sections in a .dwp file, the sections
773*9712c20fSFrederick Mayle // we're looking for will always be at the end of the section
774*9712c20fSFrederick Mayle // table, so reverse the direction of iteration.
775*9712c20fSFrederick Mayle int shndx = is_dwp_ ? GetNumSections() - k - 1 : k;
776*9712c20fSFrederick Mayle const char* name = GetSectionName(section_headers_[shndx].sh_name);
777*9712c20fSFrederick Mayle if (name != NULL && ElfReader::SectionNamesMatch(section_name, name)) {
778*9712c20fSFrederick Mayle const ElfSectionReader<ElfArch>* section = GetSection(shndx);
779*9712c20fSFrederick Mayle if (section == NULL) {
780*9712c20fSFrederick Mayle return NULL;
781*9712c20fSFrederick Mayle } else {
782*9712c20fSFrederick Mayle info->type = section->header().sh_type;
783*9712c20fSFrederick Mayle info->flags = section->header().sh_flags;
784*9712c20fSFrederick Mayle info->addr = section->header().sh_addr;
785*9712c20fSFrederick Mayle info->offset = section->header().sh_offset;
786*9712c20fSFrederick Mayle info->size = section->header().sh_size;
787*9712c20fSFrederick Mayle info->link = section->header().sh_link;
788*9712c20fSFrederick Mayle info->info = section->header().sh_info;
789*9712c20fSFrederick Mayle info->addralign = section->header().sh_addralign;
790*9712c20fSFrederick Mayle info->entsize = section->header().sh_entsize;
791*9712c20fSFrederick Mayle return section->contents();
792*9712c20fSFrederick Mayle }
793*9712c20fSFrederick Mayle }
794*9712c20fSFrederick Mayle }
795*9712c20fSFrederick Mayle return NULL;
796*9712c20fSFrederick Mayle }
797*9712c20fSFrederick Mayle
798*9712c20fSFrederick Mayle // p_vaddr of the first PT_LOAD segment (if any), or 0 if no PT_LOAD
799*9712c20fSFrederick Mayle // segments are present. This is the address an ELF image was linked
800*9712c20fSFrederick Mayle // (by static linker) to be loaded at. Usually (but not always) 0 for
801*9712c20fSFrederick Mayle // shared libraries and position-independent executables.
VaddrOfFirstLoadSegment() const802*9712c20fSFrederick Mayle uint64_t VaddrOfFirstLoadSegment() const {
803*9712c20fSFrederick Mayle // Relocatable objects (of type ET_REL) do not have LOAD segments.
804*9712c20fSFrederick Mayle if (header_.e_type == ET_REL) {
805*9712c20fSFrederick Mayle return 0;
806*9712c20fSFrederick Mayle }
807*9712c20fSFrederick Mayle for (int i = 0; i < GetNumProgramHeaders(); ++i) {
808*9712c20fSFrederick Mayle if (program_headers_[i].p_type == PT_LOAD) {
809*9712c20fSFrederick Mayle return program_headers_[i].p_vaddr;
810*9712c20fSFrederick Mayle }
811*9712c20fSFrederick Mayle }
812*9712c20fSFrederick Mayle return 0;
813*9712c20fSFrederick Mayle }
814*9712c20fSFrederick Mayle
815*9712c20fSFrederick Mayle // According to the LSB ("ELF special sections"), sections with debug
816*9712c20fSFrederick Mayle // info are prefixed by ".debug". The names are not specified, but they
817*9712c20fSFrederick Mayle // look like ".debug_line", ".debug_info", etc.
HasDebugSections()818*9712c20fSFrederick Mayle bool HasDebugSections() {
819*9712c20fSFrederick Mayle // Debug sections are likely to be near the end, so reverse the
820*9712c20fSFrederick Mayle // direction of iteration.
821*9712c20fSFrederick Mayle for (int k = GetNumSections() - 1; k >= 0; --k) {
822*9712c20fSFrederick Mayle std::string_view name{GetSectionName(section_headers_[k].sh_name)};
823*9712c20fSFrederick Mayle if (StringViewStartsWith(name, ".debug") ||
824*9712c20fSFrederick Mayle StringViewStartsWith(name, ".zdebug")) {
825*9712c20fSFrederick Mayle return true;
826*9712c20fSFrederick Mayle }
827*9712c20fSFrederick Mayle }
828*9712c20fSFrederick Mayle return false;
829*9712c20fSFrederick Mayle }
830*9712c20fSFrederick Mayle
IsDynamicSharedObject() const831*9712c20fSFrederick Mayle bool IsDynamicSharedObject() const {
832*9712c20fSFrederick Mayle return header_.e_type == ET_DYN;
833*9712c20fSFrederick Mayle }
834*9712c20fSFrederick Mayle
835*9712c20fSFrederick Mayle // Return the number of sections.
GetNumSections() const836*9712c20fSFrederick Mayle uint64_t GetNumSections() const {
837*9712c20fSFrederick Mayle if (HasManySections())
838*9712c20fSFrederick Mayle return first_section_header_.sh_size;
839*9712c20fSFrederick Mayle return header_.e_shnum;
840*9712c20fSFrederick Mayle }
841*9712c20fSFrederick Mayle
842*9712c20fSFrederick Mayle private:
843*9712c20fSFrederick Mayle typedef vector<pair<uint64_t, const typename ElfArch::Sym*> > AddrToSymMap;
844*9712c20fSFrederick Mayle
AddrToSymSorter(const typename AddrToSymMap::value_type & lhs,const typename AddrToSymMap::value_type & rhs)845*9712c20fSFrederick Mayle static bool AddrToSymSorter(const typename AddrToSymMap::value_type& lhs,
846*9712c20fSFrederick Mayle const typename AddrToSymMap::value_type& rhs) {
847*9712c20fSFrederick Mayle return lhs.first < rhs.first;
848*9712c20fSFrederick Mayle }
849*9712c20fSFrederick Mayle
AddrToSymEquals(const typename AddrToSymMap::value_type & lhs,const typename AddrToSymMap::value_type & rhs)850*9712c20fSFrederick Mayle static bool AddrToSymEquals(const typename AddrToSymMap::value_type& lhs,
851*9712c20fSFrederick Mayle const typename AddrToSymMap::value_type& rhs) {
852*9712c20fSFrederick Mayle return lhs.first == rhs.first;
853*9712c20fSFrederick Mayle }
854*9712c20fSFrederick Mayle
855*9712c20fSFrederick Mayle // Does this ELF file have too many sections to fit in the program header?
HasManySections() const856*9712c20fSFrederick Mayle bool HasManySections() const {
857*9712c20fSFrederick Mayle return header_.e_shnum == SHN_UNDEF;
858*9712c20fSFrederick Mayle }
859*9712c20fSFrederick Mayle
860*9712c20fSFrederick Mayle // Return the number of program headers.
GetNumProgramHeaders() const861*9712c20fSFrederick Mayle int GetNumProgramHeaders() const {
862*9712c20fSFrederick Mayle if (HasManySections() && header_.e_phnum == 0xffff &&
863*9712c20fSFrederick Mayle first_section_header_.sh_info != 0)
864*9712c20fSFrederick Mayle return first_section_header_.sh_info;
865*9712c20fSFrederick Mayle return header_.e_phnum;
866*9712c20fSFrederick Mayle }
867*9712c20fSFrederick Mayle
868*9712c20fSFrederick Mayle // Return the index of the string table.
GetStringTableIndex() const869*9712c20fSFrederick Mayle int GetStringTableIndex() const {
870*9712c20fSFrederick Mayle if (HasManySections()) {
871*9712c20fSFrederick Mayle if (header_.e_shstrndx == 0xffff)
872*9712c20fSFrederick Mayle return first_section_header_.sh_link;
873*9712c20fSFrederick Mayle else if (header_.e_shstrndx >= GetNumSections())
874*9712c20fSFrederick Mayle return 0;
875*9712c20fSFrederick Mayle }
876*9712c20fSFrederick Mayle return header_.e_shstrndx;
877*9712c20fSFrederick Mayle }
878*9712c20fSFrederick Mayle
879*9712c20fSFrederick Mayle // Given an offset into the section header string table, return the
880*9712c20fSFrederick Mayle // section name.
GetSectionName(typename ElfArch::Word sh_name)881*9712c20fSFrederick Mayle const char* GetSectionName(typename ElfArch::Word sh_name) {
882*9712c20fSFrederick Mayle const ElfSectionReader<ElfArch>* shstrtab =
883*9712c20fSFrederick Mayle GetSection(GetStringTableIndex());
884*9712c20fSFrederick Mayle if (shstrtab != NULL) {
885*9712c20fSFrederick Mayle return shstrtab->GetOffset(sh_name);
886*9712c20fSFrederick Mayle }
887*9712c20fSFrederick Mayle return NULL;
888*9712c20fSFrederick Mayle }
889*9712c20fSFrederick Mayle
890*9712c20fSFrederick Mayle // Return an ElfSectionReader for the given section. The reader will
891*9712c20fSFrederick Mayle // be freed when this object is destroyed.
GetSection(int num)892*9712c20fSFrederick Mayle const ElfSectionReader<ElfArch>* GetSection(int num) {
893*9712c20fSFrederick Mayle const char* name;
894*9712c20fSFrederick Mayle // Hard-coding the name for the section-name string table prevents
895*9712c20fSFrederick Mayle // infinite recursion.
896*9712c20fSFrederick Mayle if (num == GetStringTableIndex())
897*9712c20fSFrederick Mayle name = ".shstrtab";
898*9712c20fSFrederick Mayle else
899*9712c20fSFrederick Mayle name = GetSectionNameByIndex(num);
900*9712c20fSFrederick Mayle ElfSectionReader<ElfArch>*& reader = sections_[num];
901*9712c20fSFrederick Mayle if (reader == NULL)
902*9712c20fSFrederick Mayle reader = new ElfSectionReader<ElfArch>(name, path_, fd_,
903*9712c20fSFrederick Mayle section_headers_[num]);
904*9712c20fSFrederick Mayle return reader->contents() ? reader : nullptr;
905*9712c20fSFrederick Mayle }
906*9712c20fSFrederick Mayle
907*9712c20fSFrederick Mayle // Parse out the overall header information from the file and assert
908*9712c20fSFrederick Mayle // that it looks sane. This contains information like the magic
909*9712c20fSFrederick Mayle // number and target architecture.
ParseHeaders(int fd,const string & path)910*9712c20fSFrederick Mayle bool ParseHeaders(int fd, const string& path) {
911*9712c20fSFrederick Mayle // Read in the global ELF header.
912*9712c20fSFrederick Mayle if (pread(fd, &header_, sizeof(header_), 0) != sizeof(header_)) {
913*9712c20fSFrederick Mayle return false;
914*9712c20fSFrederick Mayle }
915*9712c20fSFrederick Mayle
916*9712c20fSFrederick Mayle // Must be an executable, dynamic shared object or relocatable object
917*9712c20fSFrederick Mayle if (header_.e_type != ET_EXEC &&
918*9712c20fSFrederick Mayle header_.e_type != ET_DYN &&
919*9712c20fSFrederick Mayle header_.e_type != ET_REL) {
920*9712c20fSFrederick Mayle return false;
921*9712c20fSFrederick Mayle }
922*9712c20fSFrederick Mayle // Need a section header.
923*9712c20fSFrederick Mayle if (header_.e_shoff == 0) {
924*9712c20fSFrederick Mayle return false;
925*9712c20fSFrederick Mayle }
926*9712c20fSFrederick Mayle
927*9712c20fSFrederick Mayle if (header_.e_shnum == SHN_UNDEF) {
928*9712c20fSFrederick Mayle // The number of sections in the program header is only a 16-bit value. In
929*9712c20fSFrederick Mayle // the event of overflow (greater than SHN_LORESERVE sections), e_shnum
930*9712c20fSFrederick Mayle // will read SHN_UNDEF and the true number of section header table entries
931*9712c20fSFrederick Mayle // is found in the sh_size field of the first section header.
932*9712c20fSFrederick Mayle // See: http://www.sco.com/developers/gabi/2003-12-17/ch4.sheader.html
933*9712c20fSFrederick Mayle if (pread(fd, &first_section_header_, sizeof(first_section_header_),
934*9712c20fSFrederick Mayle header_.e_shoff) != sizeof(first_section_header_)) {
935*9712c20fSFrederick Mayle return false;
936*9712c20fSFrederick Mayle }
937*9712c20fSFrederick Mayle }
938*9712c20fSFrederick Mayle
939*9712c20fSFrederick Mayle // Dynamically allocate enough space to store the section headers
940*9712c20fSFrederick Mayle // and read them out of the file.
941*9712c20fSFrederick Mayle const int section_headers_size =
942*9712c20fSFrederick Mayle GetNumSections() * sizeof(*section_headers_);
943*9712c20fSFrederick Mayle section_headers_ = new typename ElfArch::Shdr[section_headers_size];
944*9712c20fSFrederick Mayle if (pread(fd, section_headers_, section_headers_size, header_.e_shoff) !=
945*9712c20fSFrederick Mayle section_headers_size) {
946*9712c20fSFrederick Mayle return false;
947*9712c20fSFrederick Mayle }
948*9712c20fSFrederick Mayle
949*9712c20fSFrederick Mayle // Dynamically allocate enough space to store the program headers
950*9712c20fSFrederick Mayle // and read them out of the file.
951*9712c20fSFrederick Mayle //const int program_headers_size =
952*9712c20fSFrederick Mayle // GetNumProgramHeaders() * sizeof(*program_headers_);
953*9712c20fSFrederick Mayle program_headers_ = new typename ElfArch::Phdr[GetNumProgramHeaders()];
954*9712c20fSFrederick Mayle
955*9712c20fSFrederick Mayle // Presize the sections array for efficiency.
956*9712c20fSFrederick Mayle sections_.resize(GetNumSections(), NULL);
957*9712c20fSFrederick Mayle return true;
958*9712c20fSFrederick Mayle }
959*9712c20fSFrederick Mayle
960*9712c20fSFrederick Mayle // Given the "value" of a function descriptor return the address of the
961*9712c20fSFrederick Mayle // function (i.e. the dereferenced value). Otherwise return "value".
AdjustPPC64FunctionDescriptorSymbolValue(uint64_t value)962*9712c20fSFrederick Mayle uint64_t AdjustPPC64FunctionDescriptorSymbolValue(uint64_t value) {
963*9712c20fSFrederick Mayle if (opd_section_ != NULL &&
964*9712c20fSFrederick Mayle opd_info_.addr <= value &&
965*9712c20fSFrederick Mayle value < opd_info_.addr + opd_info_.size) {
966*9712c20fSFrederick Mayle uint64_t offset = value - opd_info_.addr;
967*9712c20fSFrederick Mayle return (*reinterpret_cast<const uint64_t*>(opd_section_ + offset));
968*9712c20fSFrederick Mayle }
969*9712c20fSFrederick Mayle return value;
970*9712c20fSFrederick Mayle }
971*9712c20fSFrederick Mayle
AdjustSymbolValue(typename ElfArch::Sym * sym)972*9712c20fSFrederick Mayle void AdjustSymbolValue(typename ElfArch::Sym* sym) {
973*9712c20fSFrederick Mayle switch (header_.e_machine) {
974*9712c20fSFrederick Mayle case EM_ARM:
975*9712c20fSFrederick Mayle // For ARM architecture, if the LSB of the function symbol offset is set,
976*9712c20fSFrederick Mayle // it indicates a Thumb function. This bit should not be taken literally.
977*9712c20fSFrederick Mayle // Clear it.
978*9712c20fSFrederick Mayle if (ElfArch::Type(sym) == STT_FUNC)
979*9712c20fSFrederick Mayle sym->st_value = AdjustARMThumbSymbolValue(sym->st_value);
980*9712c20fSFrederick Mayle break;
981*9712c20fSFrederick Mayle case EM_386:
982*9712c20fSFrederick Mayle // No adjustment needed for Intel x86 architecture. However, explicitly
983*9712c20fSFrederick Mayle // define this case as we use it quite often.
984*9712c20fSFrederick Mayle break;
985*9712c20fSFrederick Mayle case EM_PPC64:
986*9712c20fSFrederick Mayle // PowerPC64 currently has function descriptors as part of the ABI.
987*9712c20fSFrederick Mayle // Function symbols need to be adjusted accordingly.
988*9712c20fSFrederick Mayle if (ElfArch::Type(sym) == STT_FUNC)
989*9712c20fSFrederick Mayle sym->st_value = AdjustPPC64FunctionDescriptorSymbolValue(sym->st_value);
990*9712c20fSFrederick Mayle break;
991*9712c20fSFrederick Mayle default:
992*9712c20fSFrederick Mayle break;
993*9712c20fSFrederick Mayle }
994*9712c20fSFrederick Mayle }
995*9712c20fSFrederick Mayle
996*9712c20fSFrederick Mayle friend class SymbolIterator<ElfArch>;
997*9712c20fSFrederick Mayle
998*9712c20fSFrederick Mayle // The file we're reading.
999*9712c20fSFrederick Mayle const string path_;
1000*9712c20fSFrederick Mayle // Open file descriptor for path_. Not owned by this object.
1001*9712c20fSFrederick Mayle const int fd_;
1002*9712c20fSFrederick Mayle
1003*9712c20fSFrederick Mayle // The global header of the ELF file.
1004*9712c20fSFrederick Mayle typename ElfArch::Ehdr header_;
1005*9712c20fSFrederick Mayle
1006*9712c20fSFrederick Mayle // The header of the first section. This may be used to supplement the ELF
1007*9712c20fSFrederick Mayle // file header.
1008*9712c20fSFrederick Mayle typename ElfArch::Shdr first_section_header_;
1009*9712c20fSFrederick Mayle
1010*9712c20fSFrederick Mayle // Array of GetNumSections() section headers, allocated when we read
1011*9712c20fSFrederick Mayle // in the global header.
1012*9712c20fSFrederick Mayle typename ElfArch::Shdr* section_headers_;
1013*9712c20fSFrederick Mayle
1014*9712c20fSFrederick Mayle // Array of GetNumProgramHeaders() program headers, allocated when we read
1015*9712c20fSFrederick Mayle // in the global header.
1016*9712c20fSFrederick Mayle typename ElfArch::Phdr* program_headers_;
1017*9712c20fSFrederick Mayle
1018*9712c20fSFrederick Mayle // An array of pointers to ElfSectionReaders. Sections are
1019*9712c20fSFrederick Mayle // mmaped as they're needed and not released until this object is
1020*9712c20fSFrederick Mayle // destroyed.
1021*9712c20fSFrederick Mayle vector<ElfSectionReader<ElfArch>*> sections_;
1022*9712c20fSFrederick Mayle
1023*9712c20fSFrederick Mayle // For PowerPC64 we need to keep track of function descriptors when looking up
1024*9712c20fSFrederick Mayle // values for funtion symbols values. Function descriptors are kept in the
1025*9712c20fSFrederick Mayle // .opd section and are dereferenced to find the function address.
1026*9712c20fSFrederick Mayle ElfReader::SectionInfo opd_info_;
1027*9712c20fSFrederick Mayle const char* opd_section_; // Must be checked for NULL before use.
1028*9712c20fSFrederick Mayle int64_t base_for_text_;
1029*9712c20fSFrederick Mayle
1030*9712c20fSFrederick Mayle // Read PLT-related sections for the current architecture.
1031*9712c20fSFrederick Mayle bool plts_supported_;
1032*9712c20fSFrederick Mayle // Code size of each PLT function for the current architecture.
1033*9712c20fSFrederick Mayle size_t plt_code_size_;
1034*9712c20fSFrederick Mayle // Size of the special first entry in the .plt section that calls the runtime
1035*9712c20fSFrederick Mayle // loader resolution routine, and that all other entries jump to when doing
1036*9712c20fSFrederick Mayle // lazy symbol binding.
1037*9712c20fSFrederick Mayle size_t plt0_size_;
1038*9712c20fSFrederick Mayle
1039*9712c20fSFrederick Mayle // Maps a dynamic symbol index to a PLT offset.
1040*9712c20fSFrederick Mayle // The vector entry index is the dynamic symbol index.
1041*9712c20fSFrederick Mayle std::vector<uint64_t> symbols_plt_offsets_;
1042*9712c20fSFrederick Mayle
1043*9712c20fSFrederick Mayle // Container for PLT function name strings. These strings are passed by
1044*9712c20fSFrederick Mayle // reference to SymbolSink::AddSymbol() so they need to be stored somewhere.
1045*9712c20fSFrederick Mayle std::vector<string> plt_function_names_;
1046*9712c20fSFrederick Mayle
1047*9712c20fSFrederick Mayle bool visited_relocation_entries_;
1048*9712c20fSFrederick Mayle
1049*9712c20fSFrederick Mayle // True if this is a .dwp file.
1050*9712c20fSFrederick Mayle bool is_dwp_;
1051*9712c20fSFrederick Mayle };
1052*9712c20fSFrederick Mayle
ElfReader(const string & path)1053*9712c20fSFrederick Mayle ElfReader::ElfReader(const string& path)
1054*9712c20fSFrederick Mayle : path_(path), fd_(-1), impl32_(NULL), impl64_(NULL) {
1055*9712c20fSFrederick Mayle // linux 2.6.XX kernel can show deleted files like this:
1056*9712c20fSFrederick Mayle // /var/run/nscd/dbYLJYaE (deleted)
1057*9712c20fSFrederick Mayle // and the kernel-supplied vdso and vsyscall mappings like this:
1058*9712c20fSFrederick Mayle // [vdso]
1059*9712c20fSFrederick Mayle // [vsyscall]
1060*9712c20fSFrederick Mayle if (MyHasSuffixString(path, " (deleted)"))
1061*9712c20fSFrederick Mayle return;
1062*9712c20fSFrederick Mayle if (path == "[vdso]")
1063*9712c20fSFrederick Mayle return;
1064*9712c20fSFrederick Mayle if (path == "[vsyscall]")
1065*9712c20fSFrederick Mayle return;
1066*9712c20fSFrederick Mayle
1067*9712c20fSFrederick Mayle fd_ = open(path.c_str(), O_RDONLY);
1068*9712c20fSFrederick Mayle }
1069*9712c20fSFrederick Mayle
~ElfReader()1070*9712c20fSFrederick Mayle ElfReader::~ElfReader() {
1071*9712c20fSFrederick Mayle if (fd_ != -1)
1072*9712c20fSFrederick Mayle close(fd_);
1073*9712c20fSFrederick Mayle if (impl32_ != NULL)
1074*9712c20fSFrederick Mayle delete impl32_;
1075*9712c20fSFrederick Mayle if (impl64_ != NULL)
1076*9712c20fSFrederick Mayle delete impl64_;
1077*9712c20fSFrederick Mayle }
1078*9712c20fSFrederick Mayle
1079*9712c20fSFrederick Mayle
1080*9712c20fSFrederick Mayle // The only word-size specific part of this file is IsNativeElfFile().
1081*9712c20fSFrederick Mayle #if ULONG_MAX == 0xffffffff
1082*9712c20fSFrederick Mayle #define NATIVE_ELF_ARCH Elf32
1083*9712c20fSFrederick Mayle #elif ULONG_MAX == 0xffffffffffffffff
1084*9712c20fSFrederick Mayle #define NATIVE_ELF_ARCH Elf64
1085*9712c20fSFrederick Mayle #else
1086*9712c20fSFrederick Mayle #error "Invalid word size"
1087*9712c20fSFrederick Mayle #endif
1088*9712c20fSFrederick Mayle
1089*9712c20fSFrederick Mayle template <typename ElfArch>
IsElfFile(const int fd,const string & path)1090*9712c20fSFrederick Mayle static bool IsElfFile(const int fd, const string& path) {
1091*9712c20fSFrederick Mayle if (fd < 0)
1092*9712c20fSFrederick Mayle return false;
1093*9712c20fSFrederick Mayle if (!ElfReaderImpl<ElfArch>::IsArchElfFile(fd, NULL)) {
1094*9712c20fSFrederick Mayle // No error message here. IsElfFile gets called many times.
1095*9712c20fSFrederick Mayle return false;
1096*9712c20fSFrederick Mayle }
1097*9712c20fSFrederick Mayle return true;
1098*9712c20fSFrederick Mayle }
1099*9712c20fSFrederick Mayle
IsNativeElfFile() const1100*9712c20fSFrederick Mayle bool ElfReader::IsNativeElfFile() const {
1101*9712c20fSFrederick Mayle return IsElfFile<NATIVE_ELF_ARCH>(fd_, path_);
1102*9712c20fSFrederick Mayle }
1103*9712c20fSFrederick Mayle
IsElf32File() const1104*9712c20fSFrederick Mayle bool ElfReader::IsElf32File() const {
1105*9712c20fSFrederick Mayle return IsElfFile<Elf32>(fd_, path_);
1106*9712c20fSFrederick Mayle }
1107*9712c20fSFrederick Mayle
IsElf64File() const1108*9712c20fSFrederick Mayle bool ElfReader::IsElf64File() const {
1109*9712c20fSFrederick Mayle return IsElfFile<Elf64>(fd_, path_);
1110*9712c20fSFrederick Mayle }
1111*9712c20fSFrederick Mayle
1112*9712c20fSFrederick Mayle /*
1113*9712c20fSFrederick Mayle void ElfReader::AddSymbols(SymbolMap* symbols,
1114*9712c20fSFrederick Mayle uint64_t mem_offset, uint64_t file_offset,
1115*9712c20fSFrederick Mayle uint64_t length) {
1116*9712c20fSFrederick Mayle if (fd_ < 0)
1117*9712c20fSFrederick Mayle return;
1118*9712c20fSFrederick Mayle // TODO(chatham): Actually use the information about file offset and
1119*9712c20fSFrederick Mayle // the length of the mapped section. On some machines the data
1120*9712c20fSFrederick Mayle // section gets mapped as executable, and we'll end up reading the
1121*9712c20fSFrederick Mayle // file twice and getting some of the offsets wrong.
1122*9712c20fSFrederick Mayle if (IsElf32File()) {
1123*9712c20fSFrederick Mayle GetImpl32()->GetSymbolPositions(symbols, SHT_SYMTAB,
1124*9712c20fSFrederick Mayle mem_offset, file_offset);
1125*9712c20fSFrederick Mayle GetImpl32()->GetSymbolPositions(symbols, SHT_DYNSYM,
1126*9712c20fSFrederick Mayle mem_offset, file_offset);
1127*9712c20fSFrederick Mayle } else if (IsElf64File()) {
1128*9712c20fSFrederick Mayle GetImpl64()->GetSymbolPositions(symbols, SHT_SYMTAB,
1129*9712c20fSFrederick Mayle mem_offset, file_offset);
1130*9712c20fSFrederick Mayle GetImpl64()->GetSymbolPositions(symbols, SHT_DYNSYM,
1131*9712c20fSFrederick Mayle mem_offset, file_offset);
1132*9712c20fSFrederick Mayle }
1133*9712c20fSFrederick Mayle }
1134*9712c20fSFrederick Mayle */
1135*9712c20fSFrederick Mayle
VisitSymbols(ElfReader::SymbolSink * sink)1136*9712c20fSFrederick Mayle void ElfReader::VisitSymbols(ElfReader::SymbolSink* sink) {
1137*9712c20fSFrederick Mayle VisitSymbols(sink, -1, -1);
1138*9712c20fSFrederick Mayle }
1139*9712c20fSFrederick Mayle
VisitSymbols(ElfReader::SymbolSink * sink,int symbol_binding,int symbol_type)1140*9712c20fSFrederick Mayle void ElfReader::VisitSymbols(ElfReader::SymbolSink* sink,
1141*9712c20fSFrederick Mayle int symbol_binding,
1142*9712c20fSFrederick Mayle int symbol_type) {
1143*9712c20fSFrederick Mayle VisitSymbols(sink, symbol_binding, symbol_type, false);
1144*9712c20fSFrederick Mayle }
1145*9712c20fSFrederick Mayle
VisitSymbols(ElfReader::SymbolSink * sink,int symbol_binding,int symbol_type,bool get_raw_symbol_values)1146*9712c20fSFrederick Mayle void ElfReader::VisitSymbols(ElfReader::SymbolSink* sink,
1147*9712c20fSFrederick Mayle int symbol_binding,
1148*9712c20fSFrederick Mayle int symbol_type,
1149*9712c20fSFrederick Mayle bool get_raw_symbol_values) {
1150*9712c20fSFrederick Mayle if (IsElf32File()) {
1151*9712c20fSFrederick Mayle GetImpl32()->VisitRelocationEntries();
1152*9712c20fSFrederick Mayle GetImpl32()->VisitSymbols(SHT_SYMTAB, sink, symbol_binding, symbol_type,
1153*9712c20fSFrederick Mayle get_raw_symbol_values);
1154*9712c20fSFrederick Mayle GetImpl32()->VisitSymbols(SHT_DYNSYM, sink, symbol_binding, symbol_type,
1155*9712c20fSFrederick Mayle get_raw_symbol_values);
1156*9712c20fSFrederick Mayle } else if (IsElf64File()) {
1157*9712c20fSFrederick Mayle GetImpl64()->VisitRelocationEntries();
1158*9712c20fSFrederick Mayle GetImpl64()->VisitSymbols(SHT_SYMTAB, sink, symbol_binding, symbol_type,
1159*9712c20fSFrederick Mayle get_raw_symbol_values);
1160*9712c20fSFrederick Mayle GetImpl64()->VisitSymbols(SHT_DYNSYM, sink, symbol_binding, symbol_type,
1161*9712c20fSFrederick Mayle get_raw_symbol_values);
1162*9712c20fSFrederick Mayle }
1163*9712c20fSFrederick Mayle }
1164*9712c20fSFrederick Mayle
VaddrOfFirstLoadSegment()1165*9712c20fSFrederick Mayle uint64_t ElfReader::VaddrOfFirstLoadSegment() {
1166*9712c20fSFrederick Mayle if (IsElf32File()) {
1167*9712c20fSFrederick Mayle return GetImpl32()->VaddrOfFirstLoadSegment();
1168*9712c20fSFrederick Mayle } else if (IsElf64File()) {
1169*9712c20fSFrederick Mayle return GetImpl64()->VaddrOfFirstLoadSegment();
1170*9712c20fSFrederick Mayle } else {
1171*9712c20fSFrederick Mayle return 0;
1172*9712c20fSFrederick Mayle }
1173*9712c20fSFrederick Mayle }
1174*9712c20fSFrederick Mayle
GetSectionName(int shndx)1175*9712c20fSFrederick Mayle const char* ElfReader::GetSectionName(int shndx) {
1176*9712c20fSFrederick Mayle if (shndx < 0 || static_cast<unsigned int>(shndx) >= GetNumSections()) return NULL;
1177*9712c20fSFrederick Mayle if (IsElf32File()) {
1178*9712c20fSFrederick Mayle return GetImpl32()->GetSectionNameByIndex(shndx);
1179*9712c20fSFrederick Mayle } else if (IsElf64File()) {
1180*9712c20fSFrederick Mayle return GetImpl64()->GetSectionNameByIndex(shndx);
1181*9712c20fSFrederick Mayle } else {
1182*9712c20fSFrederick Mayle return NULL;
1183*9712c20fSFrederick Mayle }
1184*9712c20fSFrederick Mayle }
1185*9712c20fSFrederick Mayle
GetNumSections()1186*9712c20fSFrederick Mayle uint64_t ElfReader::GetNumSections() {
1187*9712c20fSFrederick Mayle if (IsElf32File()) {
1188*9712c20fSFrederick Mayle return GetImpl32()->GetNumSections();
1189*9712c20fSFrederick Mayle } else if (IsElf64File()) {
1190*9712c20fSFrederick Mayle return GetImpl64()->GetNumSections();
1191*9712c20fSFrederick Mayle } else {
1192*9712c20fSFrederick Mayle return 0;
1193*9712c20fSFrederick Mayle }
1194*9712c20fSFrederick Mayle }
1195*9712c20fSFrederick Mayle
GetSectionByIndex(int shndx,size_t * size)1196*9712c20fSFrederick Mayle const char* ElfReader::GetSectionByIndex(int shndx, size_t* size) {
1197*9712c20fSFrederick Mayle if (IsElf32File()) {
1198*9712c20fSFrederick Mayle return GetImpl32()->GetSectionContentsByIndex(shndx, size);
1199*9712c20fSFrederick Mayle } else if (IsElf64File()) {
1200*9712c20fSFrederick Mayle return GetImpl64()->GetSectionContentsByIndex(shndx, size);
1201*9712c20fSFrederick Mayle } else {
1202*9712c20fSFrederick Mayle return NULL;
1203*9712c20fSFrederick Mayle }
1204*9712c20fSFrederick Mayle }
1205*9712c20fSFrederick Mayle
GetSectionByName(const string & section_name,size_t * size)1206*9712c20fSFrederick Mayle const char* ElfReader::GetSectionByName(const string& section_name,
1207*9712c20fSFrederick Mayle size_t* size) {
1208*9712c20fSFrederick Mayle if (IsElf32File()) {
1209*9712c20fSFrederick Mayle return GetImpl32()->GetSectionContentsByName(section_name, size);
1210*9712c20fSFrederick Mayle } else if (IsElf64File()) {
1211*9712c20fSFrederick Mayle return GetImpl64()->GetSectionContentsByName(section_name, size);
1212*9712c20fSFrederick Mayle } else {
1213*9712c20fSFrederick Mayle return NULL;
1214*9712c20fSFrederick Mayle }
1215*9712c20fSFrederick Mayle }
1216*9712c20fSFrederick Mayle
GetSectionInfoByName(const string & section_name,SectionInfo * info)1217*9712c20fSFrederick Mayle const char* ElfReader::GetSectionInfoByName(const string& section_name,
1218*9712c20fSFrederick Mayle SectionInfo* info) {
1219*9712c20fSFrederick Mayle if (IsElf32File()) {
1220*9712c20fSFrederick Mayle return GetImpl32()->GetSectionInfoByName(section_name, info);
1221*9712c20fSFrederick Mayle } else if (IsElf64File()) {
1222*9712c20fSFrederick Mayle return GetImpl64()->GetSectionInfoByName(section_name, info);
1223*9712c20fSFrederick Mayle } else {
1224*9712c20fSFrederick Mayle return NULL;
1225*9712c20fSFrederick Mayle }
1226*9712c20fSFrederick Mayle }
1227*9712c20fSFrederick Mayle
SectionNamesMatch(std::string_view name,std::string_view sh_name)1228*9712c20fSFrederick Mayle bool ElfReader::SectionNamesMatch(std::string_view name,
1229*9712c20fSFrederick Mayle std::string_view sh_name) {
1230*9712c20fSFrederick Mayle std::string_view debug_prefix{".debug_"};
1231*9712c20fSFrederick Mayle std::string_view zdebug_prefix{".zdebug_"};
1232*9712c20fSFrederick Mayle if (StringViewStartsWith(name, debug_prefix) &&
1233*9712c20fSFrederick Mayle StringViewStartsWith(sh_name, zdebug_prefix)) {
1234*9712c20fSFrederick Mayle name.remove_prefix(debug_prefix.length());
1235*9712c20fSFrederick Mayle sh_name.remove_prefix(zdebug_prefix.length());
1236*9712c20fSFrederick Mayle return name == sh_name;
1237*9712c20fSFrederick Mayle }
1238*9712c20fSFrederick Mayle return name == sh_name;
1239*9712c20fSFrederick Mayle }
1240*9712c20fSFrederick Mayle
IsDynamicSharedObject()1241*9712c20fSFrederick Mayle bool ElfReader::IsDynamicSharedObject() {
1242*9712c20fSFrederick Mayle if (IsElf32File()) {
1243*9712c20fSFrederick Mayle return GetImpl32()->IsDynamicSharedObject();
1244*9712c20fSFrederick Mayle } else if (IsElf64File()) {
1245*9712c20fSFrederick Mayle return GetImpl64()->IsDynamicSharedObject();
1246*9712c20fSFrederick Mayle } else {
1247*9712c20fSFrederick Mayle return false;
1248*9712c20fSFrederick Mayle }
1249*9712c20fSFrederick Mayle }
1250*9712c20fSFrederick Mayle
GetImpl32()1251*9712c20fSFrederick Mayle ElfReaderImpl<Elf32>* ElfReader::GetImpl32() {
1252*9712c20fSFrederick Mayle if (impl32_ == NULL) {
1253*9712c20fSFrederick Mayle impl32_ = new ElfReaderImpl<Elf32>(path_, fd_);
1254*9712c20fSFrederick Mayle }
1255*9712c20fSFrederick Mayle return impl32_;
1256*9712c20fSFrederick Mayle }
1257*9712c20fSFrederick Mayle
GetImpl64()1258*9712c20fSFrederick Mayle ElfReaderImpl<Elf64>* ElfReader::GetImpl64() {
1259*9712c20fSFrederick Mayle if (impl64_ == NULL) {
1260*9712c20fSFrederick Mayle impl64_ = new ElfReaderImpl<Elf64>(path_, fd_);
1261*9712c20fSFrederick Mayle }
1262*9712c20fSFrederick Mayle return impl64_;
1263*9712c20fSFrederick Mayle }
1264*9712c20fSFrederick Mayle
1265*9712c20fSFrederick Mayle // Return true if file is an ELF binary of ElfArch, with unstripped
1266*9712c20fSFrederick Mayle // debug info (debug_only=true) or symbol table (debug_only=false).
1267*9712c20fSFrederick Mayle // Otherwise, return false.
1268*9712c20fSFrederick Mayle template <typename ElfArch>
IsNonStrippedELFBinaryImpl(const string & path,const int fd,bool debug_only)1269*9712c20fSFrederick Mayle static bool IsNonStrippedELFBinaryImpl(const string& path, const int fd,
1270*9712c20fSFrederick Mayle bool debug_only) {
1271*9712c20fSFrederick Mayle if (!ElfReaderImpl<ElfArch>::IsArchElfFile(fd, NULL)) return false;
1272*9712c20fSFrederick Mayle ElfReaderImpl<ElfArch> elf_reader(path, fd);
1273*9712c20fSFrederick Mayle return debug_only ?
1274*9712c20fSFrederick Mayle elf_reader.HasDebugSections()
1275*9712c20fSFrederick Mayle : (elf_reader.GetSectionByType(SHT_SYMTAB) != NULL);
1276*9712c20fSFrederick Mayle }
1277*9712c20fSFrederick Mayle
1278*9712c20fSFrederick Mayle // Helper for the IsNon[Debug]StrippedELFBinary functions.
IsNonStrippedELFBinaryHelper(const string & path,bool debug_only)1279*9712c20fSFrederick Mayle static bool IsNonStrippedELFBinaryHelper(const string& path,
1280*9712c20fSFrederick Mayle bool debug_only) {
1281*9712c20fSFrederick Mayle const int fd = open(path.c_str(), O_RDONLY);
1282*9712c20fSFrederick Mayle if (fd == -1) {
1283*9712c20fSFrederick Mayle return false;
1284*9712c20fSFrederick Mayle }
1285*9712c20fSFrederick Mayle
1286*9712c20fSFrederick Mayle if (IsNonStrippedELFBinaryImpl<Elf32>(path, fd, debug_only) ||
1287*9712c20fSFrederick Mayle IsNonStrippedELFBinaryImpl<Elf64>(path, fd, debug_only)) {
1288*9712c20fSFrederick Mayle close(fd);
1289*9712c20fSFrederick Mayle return true;
1290*9712c20fSFrederick Mayle }
1291*9712c20fSFrederick Mayle close(fd);
1292*9712c20fSFrederick Mayle return false;
1293*9712c20fSFrederick Mayle }
1294*9712c20fSFrederick Mayle
IsNonStrippedELFBinary(const string & path)1295*9712c20fSFrederick Mayle bool ElfReader::IsNonStrippedELFBinary(const string& path) {
1296*9712c20fSFrederick Mayle return IsNonStrippedELFBinaryHelper(path, false);
1297*9712c20fSFrederick Mayle }
1298*9712c20fSFrederick Mayle
IsNonDebugStrippedELFBinary(const string & path)1299*9712c20fSFrederick Mayle bool ElfReader::IsNonDebugStrippedELFBinary(const string& path) {
1300*9712c20fSFrederick Mayle return IsNonStrippedELFBinaryHelper(path, true);
1301*9712c20fSFrederick Mayle }
1302*9712c20fSFrederick Mayle } // namespace google_breakpad
1303