1// Copyright 2018 The Abseil Authors. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// https://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15// This library provides Symbolize() function that symbolizes program 16// counters to their corresponding symbol names on linux platforms. 17// This library has a minimal implementation of an ELF symbol table 18// reader (i.e. it doesn't depend on libelf, etc.). 19// 20// The algorithm used in Symbolize() is as follows. 21// 22// 1. Go through a list of maps in /proc/self/maps and find the map 23// containing the program counter. 24// 25// 2. Open the mapped file and find a regular symbol table inside. 26// Iterate over symbols in the symbol table and look for the symbol 27// containing the program counter. If such a symbol is found, 28// obtain the symbol name, and demangle the symbol if possible. 29// If the symbol isn't found in the regular symbol table (binary is 30// stripped), try the same thing with a dynamic symbol table. 31// 32// Note that Symbolize() is originally implemented to be used in 33// signal handlers, hence it doesn't use malloc() and other unsafe 34// operations. It should be both thread-safe and async-signal-safe. 35// 36// Implementation note: 37// 38// We don't use heaps but only use stacks. We want to reduce the 39// stack consumption so that the symbolizer can run on small stacks. 40// 41// Here are some numbers collected with GCC 4.1.0 on x86: 42// - sizeof(Elf32_Sym) = 16 43// - sizeof(Elf32_Shdr) = 40 44// - sizeof(Elf64_Sym) = 24 45// - sizeof(Elf64_Shdr) = 64 46// 47// This implementation is intended to be async-signal-safe but uses some 48// functions which are not guaranteed to be so, such as memchr() and 49// memmove(). We assume they are async-signal-safe. 50 51#include <dlfcn.h> 52#include <elf.h> 53#include <fcntl.h> 54#include <link.h> // For ElfW() macro. 55#include <sys/stat.h> 56#include <sys/types.h> 57#include <unistd.h> 58 59#include <algorithm> 60#include <array> 61#include <atomic> 62#include <cerrno> 63#include <cinttypes> 64#include <climits> 65#include <cstdint> 66#include <cstdio> 67#include <cstdlib> 68#include <cstring> 69 70#include "absl/base/casts.h" 71#include "absl/base/dynamic_annotations.h" 72#include "absl/base/internal/low_level_alloc.h" 73#include "absl/base/internal/raw_logging.h" 74#include "absl/base/internal/spinlock.h" 75#include "absl/base/port.h" 76#include "absl/debugging/internal/demangle.h" 77#include "absl/debugging/internal/vdso_support.h" 78#include "absl/strings/string_view.h" 79 80#if defined(__FreeBSD__) && !defined(ElfW) 81#define ElfW(x) __ElfN(x) 82#endif 83 84namespace absl { 85ABSL_NAMESPACE_BEGIN 86 87// Value of argv[0]. Used by MaybeInitializeObjFile(). 88static char *argv0_value = nullptr; 89 90void InitializeSymbolizer(const char *argv0) { 91#ifdef ABSL_HAVE_VDSO_SUPPORT 92 // We need to make sure VDSOSupport::Init() is called before any setuid or 93 // chroot calls, so InitializeSymbolizer() should be called very early in the 94 // life of a program. 95 absl::debugging_internal::VDSOSupport::Init(); 96#endif 97 if (argv0_value != nullptr) { 98 free(argv0_value); 99 argv0_value = nullptr; 100 } 101 if (argv0 != nullptr && argv0[0] != '\0') { 102 argv0_value = strdup(argv0); 103 } 104} 105 106namespace debugging_internal { 107namespace { 108 109// Re-runs fn until it doesn't cause EINTR. 110#define NO_INTR(fn) \ 111 do { \ 112 } while ((fn) < 0 && errno == EINTR) 113 114// On Linux, ELF_ST_* are defined in <linux/elf.h>. To make this portable 115// we define our own ELF_ST_BIND and ELF_ST_TYPE if not available. 116#ifndef ELF_ST_BIND 117#define ELF_ST_BIND(info) (((unsigned char)(info)) >> 4) 118#endif 119 120#ifndef ELF_ST_TYPE 121#define ELF_ST_TYPE(info) (((unsigned char)(info)) & 0xF) 122#endif 123 124// Some platforms use a special .opd section to store function pointers. 125const char kOpdSectionName[] = ".opd"; 126 127#if (defined(__powerpc__) && !(_CALL_ELF > 1)) || defined(__ia64) 128// Use opd section for function descriptors on these platforms, the function 129// address is the first word of the descriptor. 130enum { kPlatformUsesOPDSections = 1 }; 131#else // not PPC or IA64 132enum { kPlatformUsesOPDSections = 0 }; 133#endif 134 135// This works for PowerPC & IA64 only. A function descriptor consist of two 136// pointers and the first one is the function's entry. 137const size_t kFunctionDescriptorSize = sizeof(void *) * 2; 138 139const int kMaxDecorators = 10; // Seems like a reasonable upper limit. 140 141struct InstalledSymbolDecorator { 142 SymbolDecorator fn; 143 void *arg; 144 int ticket; 145}; 146 147int g_num_decorators; 148InstalledSymbolDecorator g_decorators[kMaxDecorators]; 149 150struct FileMappingHint { 151 const void *start; 152 const void *end; 153 uint64_t offset; 154 const char *filename; 155}; 156 157// Protects g_decorators. 158// We are using SpinLock and not a Mutex here, because we may be called 159// from inside Mutex::Lock itself, and it prohibits recursive calls. 160// This happens in e.g. base/stacktrace_syscall_unittest. 161// Moreover, we are using only TryLock(), if the decorator list 162// is being modified (is busy), we skip all decorators, and possibly 163// loose some info. Sorry, that's the best we could do. 164ABSL_CONST_INIT absl::base_internal::SpinLock g_decorators_mu( 165 absl::kConstInit, absl::base_internal::SCHEDULE_KERNEL_ONLY); 166 167const int kMaxFileMappingHints = 8; 168int g_num_file_mapping_hints; 169FileMappingHint g_file_mapping_hints[kMaxFileMappingHints]; 170// Protects g_file_mapping_hints. 171ABSL_CONST_INIT absl::base_internal::SpinLock g_file_mapping_mu( 172 absl::kConstInit, absl::base_internal::SCHEDULE_KERNEL_ONLY); 173 174// Async-signal-safe function to zero a buffer. 175// memset() is not guaranteed to be async-signal-safe. 176static void SafeMemZero(void* p, size_t size) { 177 unsigned char *c = static_cast<unsigned char *>(p); 178 while (size--) { 179 *c++ = 0; 180 } 181} 182 183struct ObjFile { 184 ObjFile() 185 : filename(nullptr), 186 start_addr(nullptr), 187 end_addr(nullptr), 188 offset(0), 189 fd(-1), 190 elf_type(-1) { 191 SafeMemZero(&elf_header, sizeof(elf_header)); 192 SafeMemZero(&phdr[0], sizeof(phdr)); 193 } 194 195 char *filename; 196 const void *start_addr; 197 const void *end_addr; 198 uint64_t offset; 199 200 // The following fields are initialized on the first access to the 201 // object file. 202 int fd; 203 int elf_type; 204 ElfW(Ehdr) elf_header; 205 206 // PT_LOAD program header describing executable code. 207 // Normally we expect just one, but SWIFT binaries have two. 208 // CUDA binaries have 3 (see cr/473913254 description). 209 std::array<ElfW(Phdr), 4> phdr; 210}; 211 212// Build 4-way associative cache for symbols. Within each cache line, symbols 213// are replaced in LRU order. 214enum { 215 ASSOCIATIVITY = 4, 216}; 217struct SymbolCacheLine { 218 const void *pc[ASSOCIATIVITY]; 219 char *name[ASSOCIATIVITY]; 220 221 // age[i] is incremented when a line is accessed. it's reset to zero if the 222 // i'th entry is read. 223 uint32_t age[ASSOCIATIVITY]; 224}; 225 226// --------------------------------------------------------------- 227// An async-signal-safe arena for LowLevelAlloc 228static std::atomic<base_internal::LowLevelAlloc::Arena *> g_sig_safe_arena; 229 230static base_internal::LowLevelAlloc::Arena *SigSafeArena() { 231 return g_sig_safe_arena.load(std::memory_order_acquire); 232} 233 234static void InitSigSafeArena() { 235 if (SigSafeArena() == nullptr) { 236 base_internal::LowLevelAlloc::Arena *new_arena = 237 base_internal::LowLevelAlloc::NewArena( 238 base_internal::LowLevelAlloc::kAsyncSignalSafe); 239 base_internal::LowLevelAlloc::Arena *old_value = nullptr; 240 if (!g_sig_safe_arena.compare_exchange_strong(old_value, new_arena, 241 std::memory_order_release, 242 std::memory_order_relaxed)) { 243 // We lost a race to allocate an arena; deallocate. 244 base_internal::LowLevelAlloc::DeleteArena(new_arena); 245 } 246 } 247} 248 249// --------------------------------------------------------------- 250// An AddrMap is a vector of ObjFile, using SigSafeArena() for allocation. 251 252class AddrMap { 253 public: 254 AddrMap() : size_(0), allocated_(0), obj_(nullptr) {} 255 ~AddrMap() { base_internal::LowLevelAlloc::Free(obj_); } 256 size_t Size() const { return size_; } 257 ObjFile *At(size_t i) { return &obj_[i]; } 258 ObjFile *Add(); 259 void Clear(); 260 261 private: 262 size_t size_; // count of valid elements (<= allocated_) 263 size_t allocated_; // count of allocated elements 264 ObjFile *obj_; // array of allocated_ elements 265 AddrMap(const AddrMap &) = delete; 266 AddrMap &operator=(const AddrMap &) = delete; 267}; 268 269void AddrMap::Clear() { 270 for (size_t i = 0; i != size_; i++) { 271 At(i)->~ObjFile(); 272 } 273 size_ = 0; 274} 275 276ObjFile *AddrMap::Add() { 277 if (size_ == allocated_) { 278 size_t new_allocated = allocated_ * 2 + 50; 279 ObjFile *new_obj_ = 280 static_cast<ObjFile *>(base_internal::LowLevelAlloc::AllocWithArena( 281 new_allocated * sizeof(*new_obj_), SigSafeArena())); 282 if (obj_) { 283 memcpy(new_obj_, obj_, allocated_ * sizeof(*new_obj_)); 284 base_internal::LowLevelAlloc::Free(obj_); 285 } 286 obj_ = new_obj_; 287 allocated_ = new_allocated; 288 } 289 return new (&obj_[size_++]) ObjFile; 290} 291 292class CachingFile { 293 public: 294 // Setup reader for fd that uses buf[0, buf_size-1] as a cache. 295 CachingFile(int fd, char *buf, size_t buf_size) 296 : fd_(fd), 297 cache_(buf), 298 cache_size_(buf_size), 299 cache_start_(0), 300 cache_limit_(0) {} 301 302 int fd() const { return fd_; } 303 ssize_t ReadFromOffset(void *buf, size_t count, off_t offset); 304 bool ReadFromOffsetExact(void *buf, size_t count, off_t offset); 305 306 private: 307 // Bytes [cache_start_, cache_limit_-1] from fd_ are stored in 308 // a prefix of cache_[0, cache_size_-1]. 309 int fd_; 310 char *cache_; 311 size_t cache_size_; 312 off_t cache_start_; 313 off_t cache_limit_; 314}; 315 316// --------------------------------------------------------------- 317 318enum FindSymbolResult { SYMBOL_NOT_FOUND = 1, SYMBOL_TRUNCATED, SYMBOL_FOUND }; 319 320class Symbolizer { 321 public: 322 Symbolizer(); 323 ~Symbolizer(); 324 const char *GetSymbol(const void *const pc); 325 326 private: 327 char *CopyString(const char *s) { 328 size_t len = strlen(s); 329 char *dst = static_cast<char *>( 330 base_internal::LowLevelAlloc::AllocWithArena(len + 1, SigSafeArena())); 331 ABSL_RAW_CHECK(dst != nullptr, "out of memory"); 332 memcpy(dst, s, len + 1); 333 return dst; 334 } 335 ObjFile *FindObjFile(const void *const start, 336 size_t size) ABSL_ATTRIBUTE_NOINLINE; 337 static bool RegisterObjFile(const char *filename, 338 const void *const start_addr, 339 const void *const end_addr, uint64_t offset, 340 void *arg); 341 SymbolCacheLine *GetCacheLine(const void *const pc); 342 const char *FindSymbolInCache(const void *const pc); 343 const char *InsertSymbolInCache(const void *const pc, const char *name); 344 void AgeSymbols(SymbolCacheLine *line); 345 void ClearAddrMap(); 346 FindSymbolResult GetSymbolFromObjectFile(const ObjFile &obj, 347 const void *const pc, 348 const ptrdiff_t relocation, 349 char *out, size_t out_size, 350 char *tmp_buf, size_t tmp_buf_size); 351 const char *GetUncachedSymbol(const void *pc); 352 353 enum { 354 SYMBOL_BUF_SIZE = 3072, 355 TMP_BUF_SIZE = 1024, 356 SYMBOL_CACHE_LINES = 128, 357 FILE_CACHE_SIZE = 8192, 358 }; 359 360 AddrMap addr_map_; 361 362 bool ok_; 363 bool addr_map_read_; 364 365 char symbol_buf_[SYMBOL_BUF_SIZE]; 366 char file_cache_[FILE_CACHE_SIZE]; 367 368 // tmp_buf_ will be used to store arrays of ElfW(Shdr) and ElfW(Sym) 369 // so we ensure that tmp_buf_ is properly aligned to store either. 370 alignas(16) char tmp_buf_[TMP_BUF_SIZE]; 371 static_assert(alignof(ElfW(Shdr)) <= 16, 372 "alignment of tmp buf too small for Shdr"); 373 static_assert(alignof(ElfW(Sym)) <= 16, 374 "alignment of tmp buf too small for Sym"); 375 376 SymbolCacheLine symbol_cache_[SYMBOL_CACHE_LINES]; 377}; 378 379static std::atomic<Symbolizer *> g_cached_symbolizer; 380 381} // namespace 382 383static size_t SymbolizerSize() { 384#if defined(__wasm__) || defined(__asmjs__) 385 auto pagesize = static_cast<size_t>(getpagesize()); 386#else 387 auto pagesize = static_cast<size_t>(sysconf(_SC_PAGESIZE)); 388#endif 389 return ((sizeof(Symbolizer) - 1) / pagesize + 1) * pagesize; 390} 391 392// Return (and set null) g_cached_symbolized_state if it is not null. 393// Otherwise return a new symbolizer. 394static Symbolizer *AllocateSymbolizer() { 395 InitSigSafeArena(); 396 Symbolizer *symbolizer = 397 g_cached_symbolizer.exchange(nullptr, std::memory_order_acquire); 398 if (symbolizer != nullptr) { 399 return symbolizer; 400 } 401 return new (base_internal::LowLevelAlloc::AllocWithArena( 402 SymbolizerSize(), SigSafeArena())) Symbolizer(); 403} 404 405// Set g_cached_symbolize_state to s if it is null, otherwise 406// delete s. 407static void FreeSymbolizer(Symbolizer *s) { 408 Symbolizer *old_cached_symbolizer = nullptr; 409 if (!g_cached_symbolizer.compare_exchange_strong(old_cached_symbolizer, s, 410 std::memory_order_release, 411 std::memory_order_relaxed)) { 412 s->~Symbolizer(); 413 base_internal::LowLevelAlloc::Free(s); 414 } 415} 416 417Symbolizer::Symbolizer() : ok_(true), addr_map_read_(false) { 418 for (SymbolCacheLine &symbol_cache_line : symbol_cache_) { 419 for (size_t j = 0; j < ABSL_ARRAYSIZE(symbol_cache_line.name); ++j) { 420 symbol_cache_line.pc[j] = nullptr; 421 symbol_cache_line.name[j] = nullptr; 422 symbol_cache_line.age[j] = 0; 423 } 424 } 425} 426 427Symbolizer::~Symbolizer() { 428 for (SymbolCacheLine &symbol_cache_line : symbol_cache_) { 429 for (char *s : symbol_cache_line.name) { 430 base_internal::LowLevelAlloc::Free(s); 431 } 432 } 433 ClearAddrMap(); 434} 435 436// We don't use assert() since it's not guaranteed to be 437// async-signal-safe. Instead we define a minimal assertion 438// macro. So far, we don't need pretty printing for __FILE__, etc. 439#define SAFE_ASSERT(expr) ((expr) ? static_cast<void>(0) : abort()) 440 441// Read up to "count" bytes from file descriptor "fd" into the buffer 442// starting at "buf" while handling short reads and EINTR. On 443// success, return the number of bytes read. Otherwise, return -1. 444static ssize_t ReadPersistent(int fd, void *buf, size_t count) { 445 SAFE_ASSERT(fd >= 0); 446 SAFE_ASSERT(count <= SSIZE_MAX); 447 char *buf0 = reinterpret_cast<char *>(buf); 448 size_t num_bytes = 0; 449 while (num_bytes < count) { 450 ssize_t len; 451 NO_INTR(len = read(fd, buf0 + num_bytes, count - num_bytes)); 452 if (len < 0) { // There was an error other than EINTR. 453 ABSL_RAW_LOG(WARNING, "read failed: errno=%d", errno); 454 return -1; 455 } 456 if (len == 0) { // Reached EOF. 457 break; 458 } 459 num_bytes += static_cast<size_t>(len); 460 } 461 SAFE_ASSERT(num_bytes <= count); 462 return static_cast<ssize_t>(num_bytes); 463} 464 465// Read up to "count" bytes from "offset" into the buffer starting at "buf", 466// while handling short reads and EINTR. On success, return the number of bytes 467// read. Otherwise, return -1. 468ssize_t CachingFile::ReadFromOffset(void *buf, size_t count, off_t offset) { 469 char *dst = static_cast<char *>(buf); 470 size_t read = 0; 471 while (read < count) { 472 // Look in cache first. 473 if (offset >= cache_start_ && offset < cache_limit_) { 474 const char *hit_start = &cache_[offset - cache_start_]; 475 const size_t n = 476 std::min(count - read, static_cast<size_t>(cache_limit_ - offset)); 477 memcpy(dst, hit_start, n); 478 dst += n; 479 read += static_cast<size_t>(n); 480 offset += static_cast<off_t>(n); 481 continue; 482 } 483 484 cache_start_ = 0; 485 cache_limit_ = 0; 486 ssize_t n = pread(fd_, cache_, cache_size_, offset); 487 if (n < 0) { 488 if (errno == EINTR) { 489 continue; 490 } 491 ABSL_RAW_LOG(WARNING, "read failed: errno=%d", errno); 492 return -1; 493 } 494 if (n == 0) { // Reached EOF. 495 break; 496 } 497 498 cache_start_ = offset; 499 cache_limit_ = offset + static_cast<off_t>(n); 500 // Next iteration will copy from cache into dst. 501 } 502 return static_cast<ssize_t>(read); 503} 504 505// Try reading exactly "count" bytes from "offset" bytes into the buffer 506// starting at "buf" while handling short reads and EINTR. On success, return 507// true. Otherwise, return false. 508bool CachingFile::ReadFromOffsetExact(void *buf, size_t count, off_t offset) { 509 ssize_t len = ReadFromOffset(buf, count, offset); 510 return len >= 0 && static_cast<size_t>(len) == count; 511} 512 513// Returns elf_header.e_type if the file pointed by fd is an ELF binary. 514static int FileGetElfType(CachingFile *file) { 515 ElfW(Ehdr) elf_header; 516 if (!file->ReadFromOffsetExact(&elf_header, sizeof(elf_header), 0)) { 517 return -1; 518 } 519 if (memcmp(elf_header.e_ident, ELFMAG, SELFMAG) != 0) { 520 return -1; 521 } 522 return elf_header.e_type; 523} 524 525// Read the section headers in the given ELF binary, and if a section 526// of the specified type is found, set the output to this section header 527// and return true. Otherwise, return false. 528// To keep stack consumption low, we would like this function to not get 529// inlined. 530static ABSL_ATTRIBUTE_NOINLINE bool GetSectionHeaderByType( 531 CachingFile *file, ElfW(Half) sh_num, const off_t sh_offset, 532 ElfW(Word) type, ElfW(Shdr) * out, char *tmp_buf, size_t tmp_buf_size) { 533 ElfW(Shdr) *buf = reinterpret_cast<ElfW(Shdr) *>(tmp_buf); 534 const size_t buf_entries = tmp_buf_size / sizeof(buf[0]); 535 const size_t buf_bytes = buf_entries * sizeof(buf[0]); 536 537 for (size_t i = 0; static_cast<int>(i) < sh_num;) { 538 const size_t num_bytes_left = 539 (static_cast<size_t>(sh_num) - i) * sizeof(buf[0]); 540 const size_t num_bytes_to_read = 541 (buf_bytes > num_bytes_left) ? num_bytes_left : buf_bytes; 542 const off_t offset = sh_offset + static_cast<off_t>(i * sizeof(buf[0])); 543 const ssize_t len = file->ReadFromOffset(buf, num_bytes_to_read, offset); 544 if (len < 0) { 545 ABSL_RAW_LOG( 546 WARNING, 547 "Reading %zu bytes from offset %ju returned %zd which is negative.", 548 num_bytes_to_read, static_cast<intmax_t>(offset), len); 549 return false; 550 } 551 if (static_cast<size_t>(len) % sizeof(buf[0]) != 0) { 552 ABSL_RAW_LOG( 553 WARNING, 554 "Reading %zu bytes from offset %jd returned %zd which is not a " 555 "multiple of %zu.", 556 num_bytes_to_read, static_cast<intmax_t>(offset), len, 557 sizeof(buf[0])); 558 return false; 559 } 560 const size_t num_headers_in_buf = static_cast<size_t>(len) / sizeof(buf[0]); 561 SAFE_ASSERT(num_headers_in_buf <= buf_entries); 562 for (size_t j = 0; j < num_headers_in_buf; ++j) { 563 if (buf[j].sh_type == type) { 564 *out = buf[j]; 565 return true; 566 } 567 } 568 i += num_headers_in_buf; 569 } 570 return false; 571} 572 573// There is no particular reason to limit section name to 63 characters, 574// but there has (as yet) been no need for anything longer either. 575const int kMaxSectionNameLen = 64; 576 577// Small cache to use for miscellaneous file reads. 578const int kSmallFileCacheSize = 100; 579 580bool ForEachSection(int fd, 581 const std::function<bool(absl::string_view name, 582 const ElfW(Shdr) &)> &callback) { 583 char buf[kSmallFileCacheSize]; 584 CachingFile file(fd, buf, sizeof(buf)); 585 586 ElfW(Ehdr) elf_header; 587 if (!file.ReadFromOffsetExact(&elf_header, sizeof(elf_header), 0)) { 588 return false; 589 } 590 591 // Technically it can be larger, but in practice this never happens. 592 if (elf_header.e_shentsize != sizeof(ElfW(Shdr))) { 593 return false; 594 } 595 596 ElfW(Shdr) shstrtab; 597 off_t shstrtab_offset = static_cast<off_t>(elf_header.e_shoff) + 598 elf_header.e_shentsize * elf_header.e_shstrndx; 599 if (!file.ReadFromOffsetExact(&shstrtab, sizeof(shstrtab), shstrtab_offset)) { 600 return false; 601 } 602 603 for (int i = 0; i < elf_header.e_shnum; ++i) { 604 ElfW(Shdr) out; 605 off_t section_header_offset = 606 static_cast<off_t>(elf_header.e_shoff) + elf_header.e_shentsize * i; 607 if (!file.ReadFromOffsetExact(&out, sizeof(out), section_header_offset)) { 608 return false; 609 } 610 off_t name_offset = static_cast<off_t>(shstrtab.sh_offset) + out.sh_name; 611 char header_name[kMaxSectionNameLen]; 612 ssize_t n_read = 613 file.ReadFromOffset(&header_name, kMaxSectionNameLen, name_offset); 614 if (n_read < 0) { 615 return false; 616 } else if (n_read > kMaxSectionNameLen) { 617 // Long read? 618 return false; 619 } 620 621 absl::string_view name(header_name, 622 strnlen(header_name, static_cast<size_t>(n_read))); 623 if (!callback(name, out)) { 624 break; 625 } 626 } 627 return true; 628} 629 630// name_len should include terminating '\0'. 631bool GetSectionHeaderByName(int fd, const char *name, size_t name_len, 632 ElfW(Shdr) * out) { 633 char header_name[kMaxSectionNameLen]; 634 if (sizeof(header_name) < name_len) { 635 ABSL_RAW_LOG(WARNING, 636 "Section name '%s' is too long (%zu); " 637 "section will not be found (even if present).", 638 name, name_len); 639 // No point in even trying. 640 return false; 641 } 642 643 char buf[kSmallFileCacheSize]; 644 CachingFile file(fd, buf, sizeof(buf)); 645 ElfW(Ehdr) elf_header; 646 if (!file.ReadFromOffsetExact(&elf_header, sizeof(elf_header), 0)) { 647 return false; 648 } 649 650 // Technically it can be larger, but in practice this never happens. 651 if (elf_header.e_shentsize != sizeof(ElfW(Shdr))) { 652 return false; 653 } 654 655 ElfW(Shdr) shstrtab; 656 off_t shstrtab_offset = static_cast<off_t>(elf_header.e_shoff) + 657 elf_header.e_shentsize * elf_header.e_shstrndx; 658 if (!file.ReadFromOffsetExact(&shstrtab, sizeof(shstrtab), shstrtab_offset)) { 659 return false; 660 } 661 662 for (int i = 0; i < elf_header.e_shnum; ++i) { 663 off_t section_header_offset = 664 static_cast<off_t>(elf_header.e_shoff) + elf_header.e_shentsize * i; 665 if (!file.ReadFromOffsetExact(out, sizeof(*out), section_header_offset)) { 666 return false; 667 } 668 off_t name_offset = static_cast<off_t>(shstrtab.sh_offset) + out->sh_name; 669 ssize_t n_read = file.ReadFromOffset(&header_name, name_len, name_offset); 670 if (n_read < 0) { 671 return false; 672 } else if (static_cast<size_t>(n_read) != name_len) { 673 // Short read -- name could be at end of file. 674 continue; 675 } 676 if (memcmp(header_name, name, name_len) == 0) { 677 return true; 678 } 679 } 680 return false; 681} 682 683// Compare symbols at in the same address. 684// Return true if we should pick symbol1. 685static bool ShouldPickFirstSymbol(const ElfW(Sym) & symbol1, 686 const ElfW(Sym) & symbol2) { 687 // If one of the symbols is weak and the other is not, pick the one 688 // this is not a weak symbol. 689 char bind1 = ELF_ST_BIND(symbol1.st_info); 690 char bind2 = ELF_ST_BIND(symbol1.st_info); 691 if (bind1 == STB_WEAK && bind2 != STB_WEAK) return false; 692 if (bind2 == STB_WEAK && bind1 != STB_WEAK) return true; 693 694 // If one of the symbols has zero size and the other is not, pick the 695 // one that has non-zero size. 696 if (symbol1.st_size != 0 && symbol2.st_size == 0) { 697 return true; 698 } 699 if (symbol1.st_size == 0 && symbol2.st_size != 0) { 700 return false; 701 } 702 703 // If one of the symbols has no type and the other is not, pick the 704 // one that has a type. 705 char type1 = ELF_ST_TYPE(symbol1.st_info); 706 char type2 = ELF_ST_TYPE(symbol1.st_info); 707 if (type1 != STT_NOTYPE && type2 == STT_NOTYPE) { 708 return true; 709 } 710 if (type1 == STT_NOTYPE && type2 != STT_NOTYPE) { 711 return false; 712 } 713 714 // Pick the first one, if we still cannot decide. 715 return true; 716} 717 718// Return true if an address is inside a section. 719static bool InSection(const void *address, ptrdiff_t relocation, 720 const ElfW(Shdr) * section) { 721 const char *start = reinterpret_cast<const char *>( 722 section->sh_addr + static_cast<ElfW(Addr)>(relocation)); 723 size_t size = static_cast<size_t>(section->sh_size); 724 return start <= address && address < (start + size); 725} 726 727static const char *ComputeOffset(const char *base, ptrdiff_t offset) { 728 // Note: cast to intptr_t to avoid undefined behavior when base evaluates to 729 // zero and offset is non-zero. 730 return reinterpret_cast<const char *>(reinterpret_cast<intptr_t>(base) + 731 offset); 732} 733 734// Read a symbol table and look for the symbol containing the 735// pc. Iterate over symbols in a symbol table and look for the symbol 736// containing "pc". If the symbol is found, and its name fits in 737// out_size, the name is written into out and SYMBOL_FOUND is returned. 738// If the name does not fit, truncated name is written into out, 739// and SYMBOL_TRUNCATED is returned. Out is NUL-terminated. 740// If the symbol is not found, SYMBOL_NOT_FOUND is returned; 741// To keep stack consumption low, we would like this function to not get 742// inlined. 743static ABSL_ATTRIBUTE_NOINLINE FindSymbolResult FindSymbol( 744 const void *const pc, CachingFile *file, char *out, size_t out_size, 745 ptrdiff_t relocation, const ElfW(Shdr) * strtab, const ElfW(Shdr) * symtab, 746 const ElfW(Shdr) * opd, char *tmp_buf, size_t tmp_buf_size) { 747 if (symtab == nullptr) { 748 return SYMBOL_NOT_FOUND; 749 } 750 751 // Read multiple symbols at once to save read() calls. 752 ElfW(Sym) *buf = reinterpret_cast<ElfW(Sym) *>(tmp_buf); 753 const size_t buf_entries = tmp_buf_size / sizeof(buf[0]); 754 755 const size_t num_symbols = symtab->sh_size / symtab->sh_entsize; 756 757 // On platforms using an .opd section (PowerPC & IA64), a function symbol 758 // has the address of a function descriptor, which contains the real 759 // starting address. However, we do not always want to use the real 760 // starting address because we sometimes want to symbolize a function 761 // pointer into the .opd section, e.g. FindSymbol(&foo,...). 762 const bool pc_in_opd = kPlatformUsesOPDSections && opd != nullptr && 763 InSection(pc, relocation, opd); 764 const bool deref_function_descriptor_pointer = 765 kPlatformUsesOPDSections && opd != nullptr && !pc_in_opd; 766 767 ElfW(Sym) best_match; 768 SafeMemZero(&best_match, sizeof(best_match)); 769 bool found_match = false; 770 for (size_t i = 0; i < num_symbols;) { 771 off_t offset = 772 static_cast<off_t>(symtab->sh_offset + i * symtab->sh_entsize); 773 const size_t num_remaining_symbols = num_symbols - i; 774 const size_t entries_in_chunk = 775 std::min(num_remaining_symbols, buf_entries); 776 const size_t bytes_in_chunk = entries_in_chunk * sizeof(buf[0]); 777 const ssize_t len = file->ReadFromOffset(buf, bytes_in_chunk, offset); 778 SAFE_ASSERT(len >= 0); 779 SAFE_ASSERT(static_cast<size_t>(len) % sizeof(buf[0]) == 0); 780 const size_t num_symbols_in_buf = static_cast<size_t>(len) / sizeof(buf[0]); 781 SAFE_ASSERT(num_symbols_in_buf <= entries_in_chunk); 782 for (size_t j = 0; j < num_symbols_in_buf; ++j) { 783 const ElfW(Sym) &symbol = buf[j]; 784 785 // For a DSO, a symbol address is relocated by the loading address. 786 // We keep the original address for opd redirection below. 787 const char *const original_start_address = 788 reinterpret_cast<const char *>(symbol.st_value); 789 const char *start_address = 790 ComputeOffset(original_start_address, relocation); 791 792#ifdef __arm__ 793 // ARM functions are always aligned to multiples of two bytes; the 794 // lowest-order bit in start_address is ignored by the CPU and indicates 795 // whether the function contains ARM (0) or Thumb (1) code. We don't care 796 // about what encoding is being used; we just want the real start address 797 // of the function. 798 start_address = reinterpret_cast<const char *>( 799 reinterpret_cast<uintptr_t>(start_address) & ~1u); 800#endif 801 802 if (deref_function_descriptor_pointer && 803 InSection(original_start_address, /*relocation=*/0, opd)) { 804 // The opd section is mapped into memory. Just dereference 805 // start_address to get the first double word, which points to the 806 // function entry. 807 start_address = *reinterpret_cast<const char *const *>(start_address); 808 } 809 810 // If pc is inside the .opd section, it points to a function descriptor. 811 const size_t size = pc_in_opd ? kFunctionDescriptorSize : symbol.st_size; 812 const void *const end_address = 813 ComputeOffset(start_address, static_cast<ptrdiff_t>(size)); 814 if (symbol.st_value != 0 && // Skip null value symbols. 815 symbol.st_shndx != 0 && // Skip undefined symbols. 816#ifdef STT_TLS 817 ELF_ST_TYPE(symbol.st_info) != STT_TLS && // Skip thread-local data. 818#endif // STT_TLS 819 ((start_address <= pc && pc < end_address) || 820 (start_address == pc && pc == end_address))) { 821 if (!found_match || ShouldPickFirstSymbol(symbol, best_match)) { 822 found_match = true; 823 best_match = symbol; 824 } 825 } 826 } 827 i += num_symbols_in_buf; 828 } 829 830 if (found_match) { 831 const off_t off = 832 static_cast<off_t>(strtab->sh_offset) + best_match.st_name; 833 const ssize_t n_read = file->ReadFromOffset(out, out_size, off); 834 if (n_read <= 0) { 835 // This should never happen. 836 ABSL_RAW_LOG(WARNING, 837 "Unable to read from fd %d at offset %lld: n_read = %zd", 838 file->fd(), static_cast<long long>(off), n_read); 839 return SYMBOL_NOT_FOUND; 840 } 841 ABSL_RAW_CHECK(static_cast<size_t>(n_read) <= out_size, 842 "ReadFromOffset read too much data."); 843 844 // strtab->sh_offset points into .strtab-like section that contains 845 // NUL-terminated strings: '\0foo\0barbaz\0...". 846 // 847 // sh_offset+st_name points to the start of symbol name, but we don't know 848 // how long the symbol is, so we try to read as much as we have space for, 849 // and usually over-read (i.e. there is a NUL somewhere before n_read). 850 if (memchr(out, '\0', static_cast<size_t>(n_read)) == nullptr) { 851 // Either out_size was too small (n_read == out_size and no NUL), or 852 // we tried to read past the EOF (n_read < out_size) and .strtab is 853 // corrupt (missing terminating NUL; should never happen for valid ELF). 854 out[n_read - 1] = '\0'; 855 return SYMBOL_TRUNCATED; 856 } 857 return SYMBOL_FOUND; 858 } 859 860 return SYMBOL_NOT_FOUND; 861} 862 863// Get the symbol name of "pc" from the file pointed by "fd". Process 864// both regular and dynamic symbol tables if necessary. 865// See FindSymbol() comment for description of return value. 866FindSymbolResult Symbolizer::GetSymbolFromObjectFile( 867 const ObjFile &obj, const void *const pc, const ptrdiff_t relocation, 868 char *out, size_t out_size, char *tmp_buf, size_t tmp_buf_size) { 869 ElfW(Shdr) symtab; 870 ElfW(Shdr) strtab; 871 ElfW(Shdr) opd; 872 ElfW(Shdr) *opd_ptr = nullptr; 873 874 // On platforms using an .opd sections for function descriptor, read 875 // the section header. The .opd section is in data segment and should be 876 // loaded but we check that it is mapped just to be extra careful. 877 if (kPlatformUsesOPDSections) { 878 if (GetSectionHeaderByName(obj.fd, kOpdSectionName, 879 sizeof(kOpdSectionName) - 1, &opd) && 880 FindObjFile(reinterpret_cast<const char *>(opd.sh_addr) + relocation, 881 opd.sh_size) != nullptr) { 882 opd_ptr = &opd; 883 } else { 884 return SYMBOL_NOT_FOUND; 885 } 886 } 887 888 CachingFile file(obj.fd, file_cache_, sizeof(file_cache_)); 889 890 // Consult a regular symbol table, then fall back to the dynamic symbol table. 891 for (const auto symbol_table_type : {SHT_SYMTAB, SHT_DYNSYM}) { 892 if (!GetSectionHeaderByType(&file, obj.elf_header.e_shnum, 893 static_cast<off_t>(obj.elf_header.e_shoff), 894 static_cast<ElfW(Word)>(symbol_table_type), 895 &symtab, tmp_buf, tmp_buf_size)) { 896 continue; 897 } 898 if (!file.ReadFromOffsetExact( 899 &strtab, sizeof(strtab), 900 static_cast<off_t>(obj.elf_header.e_shoff + 901 symtab.sh_link * sizeof(symtab)))) { 902 continue; 903 } 904 const FindSymbolResult rc = 905 FindSymbol(pc, &file, out, out_size, relocation, &strtab, &symtab, 906 opd_ptr, tmp_buf, tmp_buf_size); 907 if (rc != SYMBOL_NOT_FOUND) { 908 return rc; 909 } 910 } 911 912 return SYMBOL_NOT_FOUND; 913} 914 915namespace { 916// Thin wrapper around a file descriptor so that the file descriptor 917// gets closed for sure. 918class FileDescriptor { 919 public: 920 explicit FileDescriptor(int fd) : fd_(fd) {} 921 FileDescriptor(const FileDescriptor &) = delete; 922 FileDescriptor &operator=(const FileDescriptor &) = delete; 923 924 ~FileDescriptor() { 925 if (fd_ >= 0) { 926 close(fd_); 927 } 928 } 929 930 int get() const { return fd_; } 931 932 private: 933 const int fd_; 934}; 935 936// Helper class for reading lines from file. 937// 938// Note: we don't use ProcMapsIterator since the object is big (it has 939// a 5k array member) and uses async-unsafe functions such as sscanf() 940// and snprintf(). 941class LineReader { 942 public: 943 explicit LineReader(int fd, char *buf, size_t buf_len) 944 : fd_(fd), 945 buf_len_(buf_len), 946 buf_(buf), 947 bol_(buf), 948 eol_(buf), 949 eod_(buf) {} 950 951 LineReader(const LineReader &) = delete; 952 LineReader &operator=(const LineReader &) = delete; 953 954 // Read '\n'-terminated line from file. On success, modify "bol" 955 // and "eol", then return true. Otherwise, return false. 956 // 957 // Note: if the last line doesn't end with '\n', the line will be 958 // dropped. It's an intentional behavior to make the code simple. 959 bool ReadLine(const char **bol, const char **eol) { 960 if (BufferIsEmpty()) { // First time. 961 const ssize_t num_bytes = ReadPersistent(fd_, buf_, buf_len_); 962 if (num_bytes <= 0) { // EOF or error. 963 return false; 964 } 965 eod_ = buf_ + num_bytes; 966 bol_ = buf_; 967 } else { 968 bol_ = eol_ + 1; // Advance to the next line in the buffer. 969 SAFE_ASSERT(bol_ <= eod_); // "bol_" can point to "eod_". 970 if (!HasCompleteLine()) { 971 const auto incomplete_line_length = static_cast<size_t>(eod_ - bol_); 972 // Move the trailing incomplete line to the beginning. 973 memmove(buf_, bol_, incomplete_line_length); 974 // Read text from file and append it. 975 char *const append_pos = buf_ + incomplete_line_length; 976 const size_t capacity_left = buf_len_ - incomplete_line_length; 977 const ssize_t num_bytes = 978 ReadPersistent(fd_, append_pos, capacity_left); 979 if (num_bytes <= 0) { // EOF or error. 980 return false; 981 } 982 eod_ = append_pos + num_bytes; 983 bol_ = buf_; 984 } 985 } 986 eol_ = FindLineFeed(); 987 if (eol_ == nullptr) { // '\n' not found. Malformed line. 988 return false; 989 } 990 *eol_ = '\0'; // Replace '\n' with '\0'. 991 992 *bol = bol_; 993 *eol = eol_; 994 return true; 995 } 996 997 private: 998 char *FindLineFeed() const { 999 return reinterpret_cast<char *>( 1000 memchr(bol_, '\n', static_cast<size_t>(eod_ - bol_))); 1001 } 1002 1003 bool BufferIsEmpty() const { return buf_ == eod_; } 1004 1005 bool HasCompleteLine() const { 1006 return !BufferIsEmpty() && FindLineFeed() != nullptr; 1007 } 1008 1009 const int fd_; 1010 const size_t buf_len_; 1011 char *const buf_; 1012 char *bol_; 1013 char *eol_; 1014 const char *eod_; // End of data in "buf_". 1015}; 1016} // namespace 1017 1018// Place the hex number read from "start" into "*hex". The pointer to 1019// the first non-hex character or "end" is returned. 1020static const char *GetHex(const char *start, const char *end, 1021 uint64_t *const value) { 1022 uint64_t hex = 0; 1023 const char *p; 1024 for (p = start; p < end; ++p) { 1025 int ch = *p; 1026 if ((ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'F') || 1027 (ch >= 'a' && ch <= 'f')) { 1028 hex = (hex << 4) | 1029 static_cast<uint64_t>(ch < 'A' ? ch - '0' : (ch & 0xF) + 9); 1030 } else { // Encountered the first non-hex character. 1031 break; 1032 } 1033 } 1034 SAFE_ASSERT(p <= end); 1035 *value = hex; 1036 return p; 1037} 1038 1039static const char *GetHex(const char *start, const char *end, 1040 const void **const addr) { 1041 uint64_t hex = 0; 1042 const char *p = GetHex(start, end, &hex); 1043 *addr = reinterpret_cast<void *>(hex); 1044 return p; 1045} 1046 1047// Normally we are only interested in "r?x" maps. 1048// On the PowerPC, function pointers point to descriptors in the .opd 1049// section. The descriptors themselves are not executable code, so 1050// we need to relax the check below to "r??". 1051static bool ShouldUseMapping(const char *const flags) { 1052 return flags[0] == 'r' && (kPlatformUsesOPDSections || flags[2] == 'x'); 1053} 1054 1055// Read /proc/self/maps and run "callback" for each mmapped file found. If 1056// "callback" returns false, stop scanning and return true. Else continue 1057// scanning /proc/self/maps. Return true if no parse error is found. 1058static ABSL_ATTRIBUTE_NOINLINE bool ReadAddrMap( 1059 bool (*callback)(const char *filename, const void *const start_addr, 1060 const void *const end_addr, uint64_t offset, void *arg), 1061 void *arg, void *tmp_buf, size_t tmp_buf_size) { 1062 // Use /proc/self/task/<pid>/maps instead of /proc/self/maps. The latter 1063 // requires kernel to stop all threads, and is significantly slower when there 1064 // are 1000s of threads. 1065 char maps_path[80]; 1066 snprintf(maps_path, sizeof(maps_path), "/proc/self/task/%d/maps", getpid()); 1067 1068 int maps_fd; 1069 NO_INTR(maps_fd = open(maps_path, O_RDONLY)); 1070 FileDescriptor wrapped_maps_fd(maps_fd); 1071 if (wrapped_maps_fd.get() < 0) { 1072 ABSL_RAW_LOG(WARNING, "%s: errno=%d", maps_path, errno); 1073 return false; 1074 } 1075 1076 // Iterate over maps and look for the map containing the pc. Then 1077 // look into the symbol tables inside. 1078 LineReader reader(wrapped_maps_fd.get(), static_cast<char *>(tmp_buf), 1079 tmp_buf_size); 1080 while (true) { 1081 const char *cursor; 1082 const char *eol; 1083 if (!reader.ReadLine(&cursor, &eol)) { // EOF or malformed line. 1084 break; 1085 } 1086 1087 const char *line = cursor; 1088 const void *start_address; 1089 // Start parsing line in /proc/self/maps. Here is an example: 1090 // 1091 // 08048000-0804c000 r-xp 00000000 08:01 2142121 /bin/cat 1092 // 1093 // We want start address (08048000), end address (0804c000), flags 1094 // (r-xp) and file name (/bin/cat). 1095 1096 // Read start address. 1097 cursor = GetHex(cursor, eol, &start_address); 1098 if (cursor == eol || *cursor != '-') { 1099 ABSL_RAW_LOG(WARNING, "Corrupt /proc/self/maps line: %s", line); 1100 return false; 1101 } 1102 ++cursor; // Skip '-'. 1103 1104 // Read end address. 1105 const void *end_address; 1106 cursor = GetHex(cursor, eol, &end_address); 1107 if (cursor == eol || *cursor != ' ') { 1108 ABSL_RAW_LOG(WARNING, "Corrupt /proc/self/maps line: %s", line); 1109 return false; 1110 } 1111 ++cursor; // Skip ' '. 1112 1113 // Read flags. Skip flags until we encounter a space or eol. 1114 const char *const flags_start = cursor; 1115 while (cursor < eol && *cursor != ' ') { 1116 ++cursor; 1117 } 1118 // We expect at least four letters for flags (ex. "r-xp"). 1119 if (cursor == eol || cursor < flags_start + 4) { 1120 ABSL_RAW_LOG(WARNING, "Corrupt /proc/self/maps: %s", line); 1121 return false; 1122 } 1123 1124 // Check flags. 1125 if (!ShouldUseMapping(flags_start)) { 1126 continue; // We skip this map. 1127 } 1128 ++cursor; // Skip ' '. 1129 1130 // Read file offset. 1131 uint64_t offset; 1132 cursor = GetHex(cursor, eol, &offset); 1133 ++cursor; // Skip ' '. 1134 1135 // Skip to file name. "cursor" now points to dev. We need to skip at least 1136 // two spaces for dev and inode. 1137 int num_spaces = 0; 1138 while (cursor < eol) { 1139 if (*cursor == ' ') { 1140 ++num_spaces; 1141 } else if (num_spaces >= 2) { 1142 // The first non-space character after skipping two spaces 1143 // is the beginning of the file name. 1144 break; 1145 } 1146 ++cursor; 1147 } 1148 1149 // Check whether this entry corresponds to our hint table for the true 1150 // filename. 1151 bool hinted = 1152 GetFileMappingHint(&start_address, &end_address, &offset, &cursor); 1153 if (!hinted && (cursor == eol || cursor[0] == '[')) { 1154 // not an object file, typically [vdso] or [vsyscall] 1155 continue; 1156 } 1157 if (!callback(cursor, start_address, end_address, offset, arg)) break; 1158 } 1159 return true; 1160} 1161 1162// Find the objfile mapped in address region containing [addr, addr + len). 1163ObjFile *Symbolizer::FindObjFile(const void *const addr, size_t len) { 1164 for (int i = 0; i < 2; ++i) { 1165 if (!ok_) return nullptr; 1166 1167 // Read /proc/self/maps if necessary 1168 if (!addr_map_read_) { 1169 addr_map_read_ = true; 1170 if (!ReadAddrMap(RegisterObjFile, this, tmp_buf_, TMP_BUF_SIZE)) { 1171 ok_ = false; 1172 return nullptr; 1173 } 1174 } 1175 1176 size_t lo = 0; 1177 size_t hi = addr_map_.Size(); 1178 while (lo < hi) { 1179 size_t mid = (lo + hi) / 2; 1180 if (addr < addr_map_.At(mid)->end_addr) { 1181 hi = mid; 1182 } else { 1183 lo = mid + 1; 1184 } 1185 } 1186 if (lo != addr_map_.Size()) { 1187 ObjFile *obj = addr_map_.At(lo); 1188 SAFE_ASSERT(obj->end_addr > addr); 1189 if (addr >= obj->start_addr && 1190 reinterpret_cast<const char *>(addr) + len <= obj->end_addr) 1191 return obj; 1192 } 1193 1194 // The address mapping may have changed since it was last read. Retry. 1195 ClearAddrMap(); 1196 } 1197 return nullptr; 1198} 1199 1200void Symbolizer::ClearAddrMap() { 1201 for (size_t i = 0; i != addr_map_.Size(); i++) { 1202 ObjFile *o = addr_map_.At(i); 1203 base_internal::LowLevelAlloc::Free(o->filename); 1204 if (o->fd >= 0) { 1205 close(o->fd); 1206 } 1207 } 1208 addr_map_.Clear(); 1209 addr_map_read_ = false; 1210} 1211 1212// Callback for ReadAddrMap to register objfiles in an in-memory table. 1213bool Symbolizer::RegisterObjFile(const char *filename, 1214 const void *const start_addr, 1215 const void *const end_addr, uint64_t offset, 1216 void *arg) { 1217 Symbolizer *impl = static_cast<Symbolizer *>(arg); 1218 1219 // Files are supposed to be added in the increasing address order. Make 1220 // sure that's the case. 1221 size_t addr_map_size = impl->addr_map_.Size(); 1222 if (addr_map_size != 0) { 1223 ObjFile *old = impl->addr_map_.At(addr_map_size - 1); 1224 if (old->end_addr > end_addr) { 1225 ABSL_RAW_LOG(ERROR, 1226 "Unsorted addr map entry: 0x%" PRIxPTR ": %s <-> 0x%" PRIxPTR 1227 ": %s", 1228 reinterpret_cast<uintptr_t>(end_addr), filename, 1229 reinterpret_cast<uintptr_t>(old->end_addr), old->filename); 1230 return true; 1231 } else if (old->end_addr == end_addr) { 1232 // The same entry appears twice. This sometimes happens for [vdso]. 1233 if (old->start_addr != start_addr || 1234 strcmp(old->filename, filename) != 0) { 1235 ABSL_RAW_LOG(ERROR, 1236 "Duplicate addr 0x%" PRIxPTR ": %s <-> 0x%" PRIxPTR ": %s", 1237 reinterpret_cast<uintptr_t>(end_addr), filename, 1238 reinterpret_cast<uintptr_t>(old->end_addr), old->filename); 1239 } 1240 return true; 1241 } else if (old->end_addr == start_addr && 1242 reinterpret_cast<uintptr_t>(old->start_addr) - old->offset == 1243 reinterpret_cast<uintptr_t>(start_addr) - offset && 1244 strcmp(old->filename, filename) == 0) { 1245 // Two contiguous map entries that span a contiguous region of the file, 1246 // perhaps because some part of the file was mlock()ed. Combine them. 1247 old->end_addr = end_addr; 1248 return true; 1249 } 1250 } 1251 ObjFile *obj = impl->addr_map_.Add(); 1252 obj->filename = impl->CopyString(filename); 1253 obj->start_addr = start_addr; 1254 obj->end_addr = end_addr; 1255 obj->offset = offset; 1256 obj->elf_type = -1; // filled on demand 1257 obj->fd = -1; // opened on demand 1258 return true; 1259} 1260 1261// This function wraps the Demangle function to provide an interface 1262// where the input symbol is demangled in-place. 1263// To keep stack consumption low, we would like this function to not 1264// get inlined. 1265static ABSL_ATTRIBUTE_NOINLINE void DemangleInplace(char *out, size_t out_size, 1266 char *tmp_buf, 1267 size_t tmp_buf_size) { 1268 if (Demangle(out, tmp_buf, tmp_buf_size)) { 1269 // Demangling succeeded. Copy to out if the space allows. 1270 size_t len = strlen(tmp_buf); 1271 if (len + 1 <= out_size) { // +1 for '\0'. 1272 SAFE_ASSERT(len < tmp_buf_size); 1273 memmove(out, tmp_buf, len + 1); 1274 } 1275 } 1276} 1277 1278SymbolCacheLine *Symbolizer::GetCacheLine(const void *const pc) { 1279 uintptr_t pc0 = reinterpret_cast<uintptr_t>(pc); 1280 pc0 >>= 3; // drop the low 3 bits 1281 1282 // Shuffle bits. 1283 pc0 ^= (pc0 >> 6) ^ (pc0 >> 12) ^ (pc0 >> 18); 1284 return &symbol_cache_[pc0 % SYMBOL_CACHE_LINES]; 1285} 1286 1287void Symbolizer::AgeSymbols(SymbolCacheLine *line) { 1288 for (uint32_t &age : line->age) { 1289 ++age; 1290 } 1291} 1292 1293const char *Symbolizer::FindSymbolInCache(const void *const pc) { 1294 if (pc == nullptr) return nullptr; 1295 1296 SymbolCacheLine *line = GetCacheLine(pc); 1297 for (size_t i = 0; i < ABSL_ARRAYSIZE(line->pc); ++i) { 1298 if (line->pc[i] == pc) { 1299 AgeSymbols(line); 1300 line->age[i] = 0; 1301 return line->name[i]; 1302 } 1303 } 1304 return nullptr; 1305} 1306 1307const char *Symbolizer::InsertSymbolInCache(const void *const pc, 1308 const char *name) { 1309 SAFE_ASSERT(pc != nullptr); 1310 1311 SymbolCacheLine *line = GetCacheLine(pc); 1312 uint32_t max_age = 0; 1313 size_t oldest_index = 0; 1314 bool found_oldest_index = false; 1315 for (size_t i = 0; i < ABSL_ARRAYSIZE(line->pc); ++i) { 1316 if (line->pc[i] == nullptr) { 1317 AgeSymbols(line); 1318 line->pc[i] = pc; 1319 line->name[i] = CopyString(name); 1320 line->age[i] = 0; 1321 return line->name[i]; 1322 } 1323 if (line->age[i] >= max_age) { 1324 max_age = line->age[i]; 1325 oldest_index = i; 1326 found_oldest_index = true; 1327 } 1328 } 1329 1330 AgeSymbols(line); 1331 ABSL_RAW_CHECK(found_oldest_index, "Corrupt cache"); 1332 base_internal::LowLevelAlloc::Free(line->name[oldest_index]); 1333 line->pc[oldest_index] = pc; 1334 line->name[oldest_index] = CopyString(name); 1335 line->age[oldest_index] = 0; 1336 return line->name[oldest_index]; 1337} 1338 1339static void MaybeOpenFdFromSelfExe(ObjFile *obj) { 1340 if (memcmp(obj->start_addr, ELFMAG, SELFMAG) != 0) { 1341 return; 1342 } 1343 int fd = open("/proc/self/exe", O_RDONLY); 1344 if (fd == -1) { 1345 return; 1346 } 1347 // Verify that contents of /proc/self/exe matches in-memory image of 1348 // the binary. This can fail if the "deleted" binary is in fact not 1349 // the main executable, or for binaries that have the first PT_LOAD 1350 // segment smaller than 4K. We do it in four steps so that the 1351 // buffer is smaller and we don't consume too much stack space. 1352 const char *mem = reinterpret_cast<const char *>(obj->start_addr); 1353 for (int i = 0; i < 4; ++i) { 1354 char buf[1024]; 1355 ssize_t n = read(fd, buf, sizeof(buf)); 1356 if (n != sizeof(buf) || memcmp(buf, mem, sizeof(buf)) != 0) { 1357 close(fd); 1358 return; 1359 } 1360 mem += sizeof(buf); 1361 } 1362 obj->fd = fd; 1363} 1364 1365static bool MaybeInitializeObjFile(ObjFile *obj) { 1366 if (obj->fd < 0) { 1367 obj->fd = open(obj->filename, O_RDONLY); 1368 1369 if (obj->fd < 0) { 1370 // Getting /proc/self/exe here means that we were hinted. 1371 if (strcmp(obj->filename, "/proc/self/exe") == 0) { 1372 // /proc/self/exe may be inaccessible (due to setuid, etc.), so try 1373 // accessing the binary via argv0. 1374 if (argv0_value != nullptr) { 1375 obj->fd = open(argv0_value, O_RDONLY); 1376 } 1377 } else { 1378 MaybeOpenFdFromSelfExe(obj); 1379 } 1380 } 1381 1382 if (obj->fd < 0) { 1383 ABSL_RAW_LOG(WARNING, "%s: open failed: errno=%d", obj->filename, errno); 1384 return false; 1385 } 1386 1387 char buf[kSmallFileCacheSize]; 1388 CachingFile file(obj->fd, buf, sizeof(buf)); 1389 1390 obj->elf_type = FileGetElfType(&file); 1391 if (obj->elf_type < 0) { 1392 ABSL_RAW_LOG(WARNING, "%s: wrong elf type: %d", obj->filename, 1393 obj->elf_type); 1394 return false; 1395 } 1396 1397 if (!file.ReadFromOffsetExact(&obj->elf_header, sizeof(obj->elf_header), 1398 0)) { 1399 ABSL_RAW_LOG(WARNING, "%s: failed to read elf header", obj->filename); 1400 return false; 1401 } 1402 const int phnum = obj->elf_header.e_phnum; 1403 const int phentsize = obj->elf_header.e_phentsize; 1404 auto phoff = static_cast<off_t>(obj->elf_header.e_phoff); 1405 size_t num_interesting_load_segments = 0; 1406 for (int j = 0; j < phnum; j++) { 1407 ElfW(Phdr) phdr; 1408 if (!file.ReadFromOffsetExact(&phdr, sizeof(phdr), phoff)) { 1409 ABSL_RAW_LOG(WARNING, "%s: failed to read program header %d", 1410 obj->filename, j); 1411 return false; 1412 } 1413 phoff += phentsize; 1414 1415#if defined(__powerpc__) && !(_CALL_ELF > 1) 1416 // On the PowerPC ELF v1 ABI, function pointers actually point to function 1417 // descriptors. These descriptors are stored in an .opd section, which is 1418 // mapped read-only. We thus need to look at all readable segments, not 1419 // just the executable ones. 1420 constexpr int interesting = PF_R; 1421#else 1422 constexpr int interesting = PF_X | PF_R; 1423#endif 1424 1425 if (phdr.p_type != PT_LOAD 1426 || (phdr.p_flags & interesting) != interesting) { 1427 // Not a LOAD segment, not executable code, and not a function 1428 // descriptor. 1429 continue; 1430 } 1431 if (num_interesting_load_segments < obj->phdr.size()) { 1432 memcpy(&obj->phdr[num_interesting_load_segments++], &phdr, sizeof(phdr)); 1433 } else { 1434 ABSL_RAW_LOG( 1435 WARNING, "%s: too many interesting LOAD segments: %zu >= %zu", 1436 obj->filename, num_interesting_load_segments, obj->phdr.size()); 1437 break; 1438 } 1439 } 1440 if (num_interesting_load_segments == 0) { 1441 // This object has no interesting LOAD segments. That's unexpected. 1442 ABSL_RAW_LOG(WARNING, "%s: no interesting LOAD segments", obj->filename); 1443 return false; 1444 } 1445 } 1446 return true; 1447} 1448 1449// The implementation of our symbolization routine. If it 1450// successfully finds the symbol containing "pc" and obtains the 1451// symbol name, returns pointer to that symbol. Otherwise, returns nullptr. 1452// If any symbol decorators have been installed via InstallSymbolDecorator(), 1453// they are called here as well. 1454// To keep stack consumption low, we would like this function to not 1455// get inlined. 1456const char *Symbolizer::GetUncachedSymbol(const void *pc) { 1457 ObjFile *const obj = FindObjFile(pc, 1); 1458 ptrdiff_t relocation = 0; 1459 int fd = -1; 1460 if (obj != nullptr) { 1461 if (MaybeInitializeObjFile(obj)) { 1462 const size_t start_addr = reinterpret_cast<size_t>(obj->start_addr); 1463 if (obj->elf_type == ET_DYN && start_addr >= obj->offset) { 1464 // This object was relocated. 1465 // 1466 // For obj->offset > 0, adjust the relocation since a mapping at offset 1467 // X in the file will have a start address of [true relocation]+X. 1468 relocation = static_cast<ptrdiff_t>(start_addr - obj->offset); 1469 1470 // Note: some binaries have multiple LOAD segments that can contain 1471 // function pointers. We must find the right one. 1472 ElfW(Phdr) *phdr = nullptr; 1473 for (size_t j = 0; j < obj->phdr.size(); j++) { 1474 ElfW(Phdr) &p = obj->phdr[j]; 1475 if (p.p_type != PT_LOAD) { 1476 // We only expect PT_LOADs. This must be PT_NULL that we didn't 1477 // write over (i.e. we exhausted all interesting PT_LOADs). 1478 ABSL_RAW_CHECK(p.p_type == PT_NULL, "unexpected p_type"); 1479 break; 1480 } 1481 if (pc < reinterpret_cast<void *>(start_addr + p.p_vaddr + p.p_memsz)) { 1482 phdr = &p; 1483 break; 1484 } 1485 } 1486 if (phdr == nullptr) { 1487 // That's unexpected. Hope for the best. 1488 ABSL_RAW_LOG( 1489 WARNING, 1490 "%s: unable to find LOAD segment for pc: %p, start_addr: %zx", 1491 obj->filename, pc, start_addr); 1492 } else { 1493 // Adjust relocation in case phdr.p_vaddr != 0. 1494 // This happens for binaries linked with `lld --rosegment`, and for 1495 // binaries linked with BFD `ld -z separate-code`. 1496 relocation -= phdr->p_vaddr - phdr->p_offset; 1497 } 1498 } 1499 1500 fd = obj->fd; 1501 if (GetSymbolFromObjectFile(*obj, pc, relocation, symbol_buf_, 1502 sizeof(symbol_buf_), tmp_buf_, 1503 sizeof(tmp_buf_)) == SYMBOL_FOUND) { 1504 // Only try to demangle the symbol name if it fit into symbol_buf_. 1505 DemangleInplace(symbol_buf_, sizeof(symbol_buf_), tmp_buf_, 1506 sizeof(tmp_buf_)); 1507 } 1508 } 1509 } else { 1510#if ABSL_HAVE_VDSO_SUPPORT 1511 VDSOSupport vdso; 1512 if (vdso.IsPresent()) { 1513 VDSOSupport::SymbolInfo symbol_info; 1514 if (vdso.LookupSymbolByAddress(pc, &symbol_info)) { 1515 // All VDSO symbols are known to be short. 1516 size_t len = strlen(symbol_info.name); 1517 ABSL_RAW_CHECK(len + 1 < sizeof(symbol_buf_), 1518 "VDSO symbol unexpectedly long"); 1519 memcpy(symbol_buf_, symbol_info.name, len + 1); 1520 } 1521 } 1522#endif 1523 } 1524 1525 if (g_decorators_mu.TryLock()) { 1526 if (g_num_decorators > 0) { 1527 SymbolDecoratorArgs decorator_args = { 1528 pc, relocation, fd, symbol_buf_, sizeof(symbol_buf_), 1529 tmp_buf_, sizeof(tmp_buf_), nullptr}; 1530 for (int i = 0; i < g_num_decorators; ++i) { 1531 decorator_args.arg = g_decorators[i].arg; 1532 g_decorators[i].fn(&decorator_args); 1533 } 1534 } 1535 g_decorators_mu.Unlock(); 1536 } 1537 if (symbol_buf_[0] == '\0') { 1538 return nullptr; 1539 } 1540 symbol_buf_[sizeof(symbol_buf_) - 1] = '\0'; // Paranoia. 1541 return InsertSymbolInCache(pc, symbol_buf_); 1542} 1543 1544const char *Symbolizer::GetSymbol(const void *pc) { 1545 const char *entry = FindSymbolInCache(pc); 1546 if (entry != nullptr) { 1547 return entry; 1548 } 1549 symbol_buf_[0] = '\0'; 1550 1551#ifdef __hppa__ 1552 { 1553 // In some contexts (e.g., return addresses), PA-RISC uses the lowest two 1554 // bits of the address to indicate the privilege level. Clear those bits 1555 // before trying to symbolize. 1556 const auto pc_bits = reinterpret_cast<uintptr_t>(pc); 1557 const auto address = pc_bits & ~0x3; 1558 entry = GetUncachedSymbol(reinterpret_cast<const void *>(address)); 1559 if (entry != nullptr) { 1560 return entry; 1561 } 1562 1563 // In some contexts, PA-RISC also uses bit 1 of the address to indicate that 1564 // this is a cross-DSO function pointer. Such function pointers actually 1565 // point to a procedure label, a struct whose first 32-bit (pointer) element 1566 // actually points to the function text. With no symbol found for this 1567 // address so far, try interpreting it as a cross-DSO function pointer and 1568 // see how that goes. 1569 if (pc_bits & 0x2) { 1570 return GetUncachedSymbol(*reinterpret_cast<const void *const *>(address)); 1571 } 1572 1573 return nullptr; 1574 } 1575#else 1576 return GetUncachedSymbol(pc); 1577#endif 1578} 1579 1580bool RemoveAllSymbolDecorators(void) { 1581 if (!g_decorators_mu.TryLock()) { 1582 // Someone else is using decorators. Get out. 1583 return false; 1584 } 1585 g_num_decorators = 0; 1586 g_decorators_mu.Unlock(); 1587 return true; 1588} 1589 1590bool RemoveSymbolDecorator(int ticket) { 1591 if (!g_decorators_mu.TryLock()) { 1592 // Someone else is using decorators. Get out. 1593 return false; 1594 } 1595 for (int i = 0; i < g_num_decorators; ++i) { 1596 if (g_decorators[i].ticket == ticket) { 1597 while (i < g_num_decorators - 1) { 1598 g_decorators[i] = g_decorators[i + 1]; 1599 ++i; 1600 } 1601 g_num_decorators = i; 1602 break; 1603 } 1604 } 1605 g_decorators_mu.Unlock(); 1606 return true; // Decorator is known to be removed. 1607} 1608 1609int InstallSymbolDecorator(SymbolDecorator decorator, void *arg) { 1610 static int ticket = 0; 1611 1612 if (!g_decorators_mu.TryLock()) { 1613 // Someone else is using decorators. Get out. 1614 return -2; 1615 } 1616 int ret = ticket; 1617 if (g_num_decorators >= kMaxDecorators) { 1618 ret = -1; 1619 } else { 1620 g_decorators[g_num_decorators] = {decorator, arg, ticket++}; 1621 ++g_num_decorators; 1622 } 1623 g_decorators_mu.Unlock(); 1624 return ret; 1625} 1626 1627bool RegisterFileMappingHint(const void *start, const void *end, uint64_t offset, 1628 const char *filename) { 1629 SAFE_ASSERT(start <= end); 1630 SAFE_ASSERT(filename != nullptr); 1631 1632 InitSigSafeArena(); 1633 1634 if (!g_file_mapping_mu.TryLock()) { 1635 return false; 1636 } 1637 1638 bool ret = true; 1639 if (g_num_file_mapping_hints >= kMaxFileMappingHints) { 1640 ret = false; 1641 } else { 1642 // TODO(ckennelly): Move this into a string copy routine. 1643 size_t len = strlen(filename); 1644 char *dst = static_cast<char *>( 1645 base_internal::LowLevelAlloc::AllocWithArena(len + 1, SigSafeArena())); 1646 ABSL_RAW_CHECK(dst != nullptr, "out of memory"); 1647 memcpy(dst, filename, len + 1); 1648 1649 auto &hint = g_file_mapping_hints[g_num_file_mapping_hints++]; 1650 hint.start = start; 1651 hint.end = end; 1652 hint.offset = offset; 1653 hint.filename = dst; 1654 } 1655 1656 g_file_mapping_mu.Unlock(); 1657 return ret; 1658} 1659 1660bool GetFileMappingHint(const void **start, const void **end, uint64_t *offset, 1661 const char **filename) { 1662 if (!g_file_mapping_mu.TryLock()) { 1663 return false; 1664 } 1665 bool found = false; 1666 for (int i = 0; i < g_num_file_mapping_hints; i++) { 1667 if (g_file_mapping_hints[i].start <= *start && 1668 *end <= g_file_mapping_hints[i].end) { 1669 // We assume that the start_address for the mapping is the base 1670 // address of the ELF section, but when [start_address,end_address) is 1671 // not strictly equal to [hint.start, hint.end), that assumption is 1672 // invalid. 1673 // 1674 // This uses the hint's start address (even though hint.start is not 1675 // necessarily equal to start_address) to ensure the correct 1676 // relocation is computed later. 1677 *start = g_file_mapping_hints[i].start; 1678 *end = g_file_mapping_hints[i].end; 1679 *offset = g_file_mapping_hints[i].offset; 1680 *filename = g_file_mapping_hints[i].filename; 1681 found = true; 1682 break; 1683 } 1684 } 1685 g_file_mapping_mu.Unlock(); 1686 return found; 1687} 1688 1689} // namespace debugging_internal 1690 1691bool Symbolize(const void *pc, char *out, int out_size) { 1692 // Symbolization is very slow under tsan. 1693 ABSL_ANNOTATE_IGNORE_READS_AND_WRITES_BEGIN(); 1694 SAFE_ASSERT(out_size >= 0); 1695 debugging_internal::Symbolizer *s = debugging_internal::AllocateSymbolizer(); 1696 const char *name = s->GetSymbol(pc); 1697 bool ok = false; 1698 if (name != nullptr && out_size > 0) { 1699 strncpy(out, name, static_cast<size_t>(out_size)); 1700 ok = true; 1701 if (out[static_cast<size_t>(out_size) - 1] != '\0') { 1702 // strncpy() does not '\0' terminate when it truncates. Do so, with 1703 // trailing ellipsis. 1704 static constexpr char kEllipsis[] = "..."; 1705 size_t ellipsis_size = 1706 std::min(strlen(kEllipsis), static_cast<size_t>(out_size) - 1); 1707 memcpy(out + static_cast<size_t>(out_size) - ellipsis_size - 1, kEllipsis, 1708 ellipsis_size); 1709 out[static_cast<size_t>(out_size) - 1] = '\0'; 1710 } 1711 } 1712 debugging_internal::FreeSymbolizer(s); 1713 ABSL_ANNOTATE_IGNORE_READS_AND_WRITES_END(); 1714 return ok; 1715} 1716 1717ABSL_NAMESPACE_END 1718} // namespace absl 1719 1720extern "C" bool AbslInternalGetFileMappingHint(const void **start, 1721 const void **end, uint64_t *offset, 1722 const char **filename) { 1723 return absl::debugging_internal::GetFileMappingHint(start, end, offset, 1724 filename); 1725} 1726