1 // Copyright 2017 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 // Allow dynamic symbol lookup in an in-memory Elf image.
16 //
17 
18 #include "absl/debugging/internal/elf_mem_image.h"
19 
20 #ifdef ABSL_HAVE_ELF_MEM_IMAGE  // defined in elf_mem_image.h
21 
22 #include <string.h>
23 #include <cassert>
24 #include <cstddef>
25 #include "absl/base/config.h"
26 #include "absl/base/internal/raw_logging.h"
27 
28 // From binutils/include/elf/common.h (this doesn't appear to be documented
29 // anywhere else).
30 //
31 //   /* This flag appears in a Versym structure.  It means that the symbol
32 //      is hidden, and is only visible with an explicit version number.
33 //      This is a GNU extension.  */
34 //   #define VERSYM_HIDDEN           0x8000
35 //
36 //   /* This is the mask for the rest of the Versym information.  */
37 //   #define VERSYM_VERSION          0x7fff
38 
39 #define VERSYM_VERSION 0x7fff
40 
41 namespace absl {
42 ABSL_NAMESPACE_BEGIN
43 namespace debugging_internal {
44 
45 namespace {
46 
47 #if __SIZEOF_POINTER__ == 4
48 const int kElfClass = ELFCLASS32;
ElfBind(const ElfW (Sym)* symbol)49 int ElfBind(const ElfW(Sym) *symbol) { return ELF32_ST_BIND(symbol->st_info); }
ElfType(const ElfW (Sym)* symbol)50 int ElfType(const ElfW(Sym) *symbol) { return ELF32_ST_TYPE(symbol->st_info); }
51 #elif __SIZEOF_POINTER__ == 8
52 const int kElfClass = ELFCLASS64;
53 int ElfBind(const ElfW(Sym) *symbol) { return ELF64_ST_BIND(symbol->st_info); }
54 int ElfType(const ElfW(Sym) *symbol) { return ELF64_ST_TYPE(symbol->st_info); }
55 #else
56 const int kElfClass = -1;
57 int ElfBind(const ElfW(Sym) *) {
58   ABSL_RAW_LOG(FATAL, "Unexpected word size");
59   return 0;
60 }
61 int ElfType(const ElfW(Sym) *) {
62   ABSL_RAW_LOG(FATAL, "Unexpected word size");
63   return 0;
64 }
65 #endif
66 
67 // Extract an element from one of the ELF tables, cast it to desired type.
68 // This is just a simple arithmetic and a glorified cast.
69 // Callers are responsible for bounds checking.
70 template <typename T>
GetTableElement(const ElfW (Ehdr)* ehdr,ElfW (Off)table_offset,ElfW (Word)element_size,size_t index)71 const T *GetTableElement(const ElfW(Ehdr) * ehdr, ElfW(Off) table_offset,
72                          ElfW(Word) element_size, size_t index) {
73   return reinterpret_cast<const T*>(reinterpret_cast<const char *>(ehdr)
74                                     + table_offset
75                                     + index * element_size);
76 }
77 
78 }  // namespace
79 
80 // The value of this variable doesn't matter; it's used only for its
81 // unique address.
82 const int ElfMemImage::kInvalidBaseSentinel = 0;
83 
ElfMemImage(const void * base)84 ElfMemImage::ElfMemImage(const void *base) {
85   ABSL_RAW_CHECK(base != kInvalidBase, "bad pointer");
86   Init(base);
87 }
88 
GetNumSymbols() const89 int ElfMemImage::GetNumSymbols() const {
90   if (!hash_) {
91     return 0;
92   }
93   // See http://www.caldera.com/developers/gabi/latest/ch5.dynamic.html#hash
94   return static_cast<int>(hash_[1]);
95 }
96 
ElfW(Sym)97 const ElfW(Sym) *ElfMemImage::GetDynsym(int index) const {
98   ABSL_RAW_CHECK(index < GetNumSymbols(), "index out of range");
99   return dynsym_ + index;
100 }
101 
ElfW(Versym)102 const ElfW(Versym) *ElfMemImage::GetVersym(int index) const {
103   ABSL_RAW_CHECK(index < GetNumSymbols(), "index out of range");
104   return versym_ + index;
105 }
106 
ElfW(Phdr)107 const ElfW(Phdr) *ElfMemImage::GetPhdr(int index) const {
108   ABSL_RAW_CHECK(index >= 0 && index < ehdr_->e_phnum, "index out of range");
109   return GetTableElement<ElfW(Phdr)>(ehdr_, ehdr_->e_phoff, ehdr_->e_phentsize,
110                                      static_cast<size_t>(index));
111 }
112 
GetDynstr(ElfW (Word)offset) const113 const char *ElfMemImage::GetDynstr(ElfW(Word) offset) const {
114   ABSL_RAW_CHECK(offset < strsize_, "offset out of range");
115   return dynstr_ + offset;
116 }
117 
GetSymAddr(const ElfW (Sym)* sym) const118 const void *ElfMemImage::GetSymAddr(const ElfW(Sym) *sym) const {
119   if (sym->st_shndx == SHN_UNDEF || sym->st_shndx >= SHN_LORESERVE) {
120     // Symbol corresponds to "special" (e.g. SHN_ABS) section.
121     return reinterpret_cast<const void *>(sym->st_value);
122   }
123   ABSL_RAW_CHECK(link_base_ < sym->st_value, "symbol out of range");
124   return GetTableElement<char>(ehdr_, 0, 1, sym->st_value - link_base_);
125 }
126 
ElfW(Verdef)127 const ElfW(Verdef) *ElfMemImage::GetVerdef(int index) const {
128   ABSL_RAW_CHECK(0 <= index && static_cast<size_t>(index) <= verdefnum_,
129                  "index out of range");
130   const ElfW(Verdef) *version_definition = verdef_;
131   while (version_definition->vd_ndx < index && version_definition->vd_next) {
132     const char *const version_definition_as_char =
133         reinterpret_cast<const char *>(version_definition);
134     version_definition =
135         reinterpret_cast<const ElfW(Verdef) *>(version_definition_as_char +
136                                                version_definition->vd_next);
137   }
138   return version_definition->vd_ndx == index ? version_definition : nullptr;
139 }
140 
ElfW(Verdaux)141 const ElfW(Verdaux) *ElfMemImage::GetVerdefAux(
142     const ElfW(Verdef) *verdef) const {
143   return reinterpret_cast<const ElfW(Verdaux) *>(verdef+1);
144 }
145 
GetVerstr(ElfW (Word)offset) const146 const char *ElfMemImage::GetVerstr(ElfW(Word) offset) const {
147   ABSL_RAW_CHECK(offset < strsize_, "offset out of range");
148   return dynstr_ + offset;
149 }
150 
Init(const void * base)151 void ElfMemImage::Init(const void *base) {
152   ehdr_      = nullptr;
153   dynsym_    = nullptr;
154   dynstr_    = nullptr;
155   versym_    = nullptr;
156   verdef_    = nullptr;
157   hash_      = nullptr;
158   strsize_   = 0;
159   verdefnum_ = 0;
160   // Sentinel: PT_LOAD .p_vaddr can't possibly be this.
161   link_base_ = ~ElfW(Addr){0};  // NOLINT(readability/braces)
162   if (!base) {
163     return;
164   }
165   const char *const base_as_char = reinterpret_cast<const char *>(base);
166   if (base_as_char[EI_MAG0] != ELFMAG0 || base_as_char[EI_MAG1] != ELFMAG1 ||
167       base_as_char[EI_MAG2] != ELFMAG2 || base_as_char[EI_MAG3] != ELFMAG3) {
168     assert(false);
169     return;
170   }
171   int elf_class = base_as_char[EI_CLASS];
172   if (elf_class != kElfClass) {
173     assert(false);
174     return;
175   }
176   switch (base_as_char[EI_DATA]) {
177     case ELFDATA2LSB: {
178 #ifndef ABSL_IS_LITTLE_ENDIAN
179       assert(false);
180       return;
181 #endif
182       break;
183     }
184     case ELFDATA2MSB: {
185 #ifndef ABSL_IS_BIG_ENDIAN
186       assert(false);
187       return;
188 #endif
189       break;
190     }
191     default: {
192       assert(false);
193       return;
194     }
195   }
196 
197   ehdr_ = reinterpret_cast<const ElfW(Ehdr) *>(base);
198   const ElfW(Phdr) *dynamic_program_header = nullptr;
199   for (int i = 0; i < ehdr_->e_phnum; ++i) {
200     const ElfW(Phdr) *const program_header = GetPhdr(i);
201     switch (program_header->p_type) {
202       case PT_LOAD:
203         if (!~link_base_) {
204           link_base_ = program_header->p_vaddr;
205         }
206         break;
207       case PT_DYNAMIC:
208         dynamic_program_header = program_header;
209         break;
210     }
211   }
212   if (!~link_base_ || !dynamic_program_header) {
213     assert(false);
214     // Mark this image as not present. Can not recur infinitely.
215     Init(nullptr);
216     return;
217   }
218   ptrdiff_t relocation =
219       base_as_char - reinterpret_cast<const char *>(link_base_);
220   ElfW(Dyn)* dynamic_entry = reinterpret_cast<ElfW(Dyn)*>(
221       static_cast<intptr_t>(dynamic_program_header->p_vaddr) + relocation);
222   for (; dynamic_entry->d_tag != DT_NULL; ++dynamic_entry) {
223     const auto value =
224         static_cast<intptr_t>(dynamic_entry->d_un.d_val) + relocation;
225     switch (dynamic_entry->d_tag) {
226       case DT_HASH:
227         hash_ = reinterpret_cast<ElfW(Word) *>(value);
228         break;
229       case DT_SYMTAB:
230         dynsym_ = reinterpret_cast<ElfW(Sym) *>(value);
231         break;
232       case DT_STRTAB:
233         dynstr_ = reinterpret_cast<const char *>(value);
234         break;
235       case DT_VERSYM:
236         versym_ = reinterpret_cast<ElfW(Versym) *>(value);
237         break;
238       case DT_VERDEF:
239         verdef_ = reinterpret_cast<ElfW(Verdef) *>(value);
240         break;
241       case DT_VERDEFNUM:
242         verdefnum_ = static_cast<size_t>(dynamic_entry->d_un.d_val);
243         break;
244       case DT_STRSZ:
245         strsize_ = static_cast<size_t>(dynamic_entry->d_un.d_val);
246         break;
247       default:
248         // Unrecognized entries explicitly ignored.
249         break;
250     }
251   }
252   if (!hash_ || !dynsym_ || !dynstr_ || !versym_ ||
253       !verdef_ || !verdefnum_ || !strsize_) {
254     assert(false);  // invalid VDSO
255     // Mark this image as not present. Can not recur infinitely.
256     Init(nullptr);
257     return;
258   }
259 }
260 
LookupSymbol(const char * name,const char * version,int type,SymbolInfo * info_out) const261 bool ElfMemImage::LookupSymbol(const char *name,
262                                const char *version,
263                                int type,
264                                SymbolInfo *info_out) const {
265   for (const SymbolInfo& info : *this) {
266     if (strcmp(info.name, name) == 0 && strcmp(info.version, version) == 0 &&
267         ElfType(info.symbol) == type) {
268       if (info_out) {
269         *info_out = info;
270       }
271       return true;
272     }
273   }
274   return false;
275 }
276 
LookupSymbolByAddress(const void * address,SymbolInfo * info_out) const277 bool ElfMemImage::LookupSymbolByAddress(const void *address,
278                                         SymbolInfo *info_out) const {
279   for (const SymbolInfo& info : *this) {
280     const char *const symbol_start =
281         reinterpret_cast<const char *>(info.address);
282     const char *const symbol_end = symbol_start + info.symbol->st_size;
283     if (symbol_start <= address && address < symbol_end) {
284       if (info_out) {
285         // Client wants to know details for that symbol (the usual case).
286         if (ElfBind(info.symbol) == STB_GLOBAL) {
287           // Strong symbol; just return it.
288           *info_out = info;
289           return true;
290         } else {
291           // Weak or local. Record it, but keep looking for a strong one.
292           *info_out = info;
293         }
294       } else {
295         // Client only cares if there is an overlapping symbol.
296         return true;
297       }
298     }
299   }
300   return false;
301 }
302 
SymbolIterator(const void * const image,int index)303 ElfMemImage::SymbolIterator::SymbolIterator(const void *const image, int index)
304     : index_(index), image_(image) {
305 }
306 
operator ->() const307 const ElfMemImage::SymbolInfo *ElfMemImage::SymbolIterator::operator->() const {
308   return &info_;
309 }
310 
operator *() const311 const ElfMemImage::SymbolInfo& ElfMemImage::SymbolIterator::operator*() const {
312   return info_;
313 }
314 
operator ==(const SymbolIterator & rhs) const315 bool ElfMemImage::SymbolIterator::operator==(const SymbolIterator &rhs) const {
316   return this->image_ == rhs.image_ && this->index_ == rhs.index_;
317 }
318 
operator !=(const SymbolIterator & rhs) const319 bool ElfMemImage::SymbolIterator::operator!=(const SymbolIterator &rhs) const {
320   return !(*this == rhs);
321 }
322 
operator ++()323 ElfMemImage::SymbolIterator &ElfMemImage::SymbolIterator::operator++() {
324   this->Update(1);
325   return *this;
326 }
327 
begin() const328 ElfMemImage::SymbolIterator ElfMemImage::begin() const {
329   SymbolIterator it(this, 0);
330   it.Update(0);
331   return it;
332 }
333 
end() const334 ElfMemImage::SymbolIterator ElfMemImage::end() const {
335   return SymbolIterator(this, GetNumSymbols());
336 }
337 
Update(int increment)338 void ElfMemImage::SymbolIterator::Update(int increment) {
339   const ElfMemImage *image = reinterpret_cast<const ElfMemImage *>(image_);
340   ABSL_RAW_CHECK(image->IsPresent() || increment == 0, "");
341   if (!image->IsPresent()) {
342     return;
343   }
344   index_ += increment;
345   if (index_ >= image->GetNumSymbols()) {
346     index_ = image->GetNumSymbols();
347     return;
348   }
349   const ElfW(Sym)    *symbol = image->GetDynsym(index_);
350   const ElfW(Versym) *version_symbol = image->GetVersym(index_);
351   ABSL_RAW_CHECK(symbol && version_symbol, "");
352   const char *const symbol_name = image->GetDynstr(symbol->st_name);
353 #if defined(__NetBSD__)
354   const int version_index = version_symbol->vs_vers & VERSYM_VERSION;
355 #else
356   const ElfW(Versym) version_index = version_symbol[0] & VERSYM_VERSION;
357 #endif
358   const ElfW(Verdef) *version_definition = nullptr;
359   const char *version_name = "";
360   if (symbol->st_shndx == SHN_UNDEF) {
361     // Undefined symbols reference DT_VERNEED, not DT_VERDEF, and
362     // version_index could well be greater than verdefnum_, so calling
363     // GetVerdef(version_index) may trigger assertion.
364   } else {
365     version_definition = image->GetVerdef(version_index);
366   }
367   if (version_definition) {
368     // I am expecting 1 or 2 auxiliary entries: 1 for the version itself,
369     // optional 2nd if the version has a parent.
370     ABSL_RAW_CHECK(
371         version_definition->vd_cnt == 1 || version_definition->vd_cnt == 2,
372         "wrong number of entries");
373     const ElfW(Verdaux) *version_aux = image->GetVerdefAux(version_definition);
374     version_name = image->GetVerstr(version_aux->vda_name);
375   }
376   info_.name    = symbol_name;
377   info_.version = version_name;
378   info_.address = image->GetSymAddr(symbol);
379   info_.symbol  = symbol;
380 }
381 
382 }  // namespace debugging_internal
383 ABSL_NAMESPACE_END
384 }  // namespace absl
385 
386 #endif  // ABSL_HAVE_ELF_MEM_IMAGE
387