1*9712c20fSFrederick Mayle // -*- mode: C++ -*- 2*9712c20fSFrederick Mayle 3*9712c20fSFrederick Mayle // Copyright 2010 Google LLC 4*9712c20fSFrederick Mayle // 5*9712c20fSFrederick Mayle // Redistribution and use in source and binary forms, with or without 6*9712c20fSFrederick Mayle // modification, are permitted provided that the following conditions are 7*9712c20fSFrederick Mayle // met: 8*9712c20fSFrederick Mayle // 9*9712c20fSFrederick Mayle // * Redistributions of source code must retain the above copyright 10*9712c20fSFrederick Mayle // notice, this list of conditions and the following disclaimer. 11*9712c20fSFrederick Mayle // * Redistributions in binary form must reproduce the above 12*9712c20fSFrederick Mayle // copyright notice, this list of conditions and the following disclaimer 13*9712c20fSFrederick Mayle // in the documentation and/or other materials provided with the 14*9712c20fSFrederick Mayle // distribution. 15*9712c20fSFrederick Mayle // * Neither the name of Google LLC nor the names of its 16*9712c20fSFrederick Mayle // contributors may be used to endorse or promote products derived from 17*9712c20fSFrederick Mayle // this software without specific prior written permission. 18*9712c20fSFrederick Mayle // 19*9712c20fSFrederick Mayle // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20*9712c20fSFrederick Mayle // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21*9712c20fSFrederick Mayle // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22*9712c20fSFrederick Mayle // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23*9712c20fSFrederick Mayle // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24*9712c20fSFrederick Mayle // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25*9712c20fSFrederick Mayle // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26*9712c20fSFrederick Mayle // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27*9712c20fSFrederick Mayle // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28*9712c20fSFrederick Mayle // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29*9712c20fSFrederick Mayle // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30*9712c20fSFrederick Mayle 31*9712c20fSFrederick Mayle // CFI reader author: Jim Blandy <[email protected]> <[email protected]> 32*9712c20fSFrederick Mayle 33*9712c20fSFrederick Mayle // This file contains definitions related to the DWARF2/3 reader and 34*9712c20fSFrederick Mayle // it's handler interfaces. 35*9712c20fSFrederick Mayle // The DWARF2/3 specification can be found at 36*9712c20fSFrederick Mayle // http://dwarf.freestandards.org and should be considered required 37*9712c20fSFrederick Mayle // reading if you wish to modify the implementation. 38*9712c20fSFrederick Mayle // Only a cursory attempt is made to explain terminology that is 39*9712c20fSFrederick Mayle // used here, as it is much better explained in the standard documents 40*9712c20fSFrederick Mayle #ifndef COMMON_DWARF_DWARF2READER_H__ 41*9712c20fSFrederick Mayle #define COMMON_DWARF_DWARF2READER_H__ 42*9712c20fSFrederick Mayle 43*9712c20fSFrederick Mayle #include <assert.h> 44*9712c20fSFrederick Mayle #include <stdint.h> 45*9712c20fSFrederick Mayle 46*9712c20fSFrederick Mayle #include <list> 47*9712c20fSFrederick Mayle #include <map> 48*9712c20fSFrederick Mayle #include <string> 49*9712c20fSFrederick Mayle #include <utility> 50*9712c20fSFrederick Mayle #include <vector> 51*9712c20fSFrederick Mayle #include <memory> 52*9712c20fSFrederick Mayle 53*9712c20fSFrederick Mayle #include "common/dwarf/bytereader.h" 54*9712c20fSFrederick Mayle #include "common/dwarf/dwarf2enums.h" 55*9712c20fSFrederick Mayle #include "common/dwarf/types.h" 56*9712c20fSFrederick Mayle #include "common/using_std_string.h" 57*9712c20fSFrederick Mayle #include "common/dwarf/elf_reader.h" 58*9712c20fSFrederick Mayle 59*9712c20fSFrederick Mayle namespace google_breakpad { 60*9712c20fSFrederick Mayle struct LineStateMachine; 61*9712c20fSFrederick Mayle class Dwarf2Handler; 62*9712c20fSFrederick Mayle class LineInfoHandler; 63*9712c20fSFrederick Mayle class DwpReader; 64*9712c20fSFrederick Mayle 65*9712c20fSFrederick Mayle // This maps from a string naming a section to a pair containing a 66*9712c20fSFrederick Mayle // the data for the section, and the size of the section. 67*9712c20fSFrederick Mayle typedef std::map<string, std::pair<const uint8_t*, uint64_t> > SectionMap; 68*9712c20fSFrederick Mayle 69*9712c20fSFrederick Mayle // Abstract away the difference between elf and mach-o section names. 70*9712c20fSFrederick Mayle // Elf-names use ".section_name, mach-o uses "__section_name". Pass "name" in 71*9712c20fSFrederick Mayle // the elf form, ".section_name". 72*9712c20fSFrederick Mayle const SectionMap::const_iterator GetSectionByName(const SectionMap& 73*9712c20fSFrederick Mayle sections, const char* name); 74*9712c20fSFrederick Mayle 75*9712c20fSFrederick Mayle // Most of the time, this struct functions as a simple attribute and form pair. 76*9712c20fSFrederick Mayle // However, Dwarf5 DW_FORM_implicit_const means that a form may have its value 77*9712c20fSFrederick Mayle // in line in the abbrev table, and that value must be associated with the 78*9712c20fSFrederick Mayle // pair until the attr's value is needed. 79*9712c20fSFrederick Mayle struct AttrForm { AttrFormAttrForm80*9712c20fSFrederick Mayle AttrForm(enum DwarfAttribute attr, enum DwarfForm form, uint64_t value) : 81*9712c20fSFrederick Mayle attr_(attr), form_(form), value_(value) { } 82*9712c20fSFrederick Mayle 83*9712c20fSFrederick Mayle enum DwarfAttribute attr_; 84*9712c20fSFrederick Mayle enum DwarfForm form_; 85*9712c20fSFrederick Mayle uint64_t value_; 86*9712c20fSFrederick Mayle }; 87*9712c20fSFrederick Mayle typedef std::list<AttrForm> AttributeList; 88*9712c20fSFrederick Mayle typedef AttributeList::iterator AttributeIterator; 89*9712c20fSFrederick Mayle typedef AttributeList::const_iterator ConstAttributeIterator; 90*9712c20fSFrederick Mayle 91*9712c20fSFrederick Mayle struct LineInfoHeader { 92*9712c20fSFrederick Mayle uint64_t total_length; 93*9712c20fSFrederick Mayle uint16_t version; 94*9712c20fSFrederick Mayle uint64_t prologue_length; 95*9712c20fSFrederick Mayle uint8_t min_insn_length; // insn stands for instructin 96*9712c20fSFrederick Mayle bool default_is_stmt; // stmt stands for statement 97*9712c20fSFrederick Mayle int8_t line_base; 98*9712c20fSFrederick Mayle uint8_t line_range; 99*9712c20fSFrederick Mayle uint8_t opcode_base; 100*9712c20fSFrederick Mayle // Use a pointer so that signalsafe_addr2line is able to use this structure 101*9712c20fSFrederick Mayle // without heap allocation problem. 102*9712c20fSFrederick Mayle std::vector<unsigned char>* std_opcode_lengths; 103*9712c20fSFrederick Mayle }; 104*9712c20fSFrederick Mayle 105*9712c20fSFrederick Mayle class LineInfo { 106*9712c20fSFrederick Mayle public: 107*9712c20fSFrederick Mayle 108*9712c20fSFrederick Mayle // Initializes a .debug_line reader. Buffer and buffer length point 109*9712c20fSFrederick Mayle // to the beginning and length of the line information to read. 110*9712c20fSFrederick Mayle // Reader is a ByteReader class that has the endianness set 111*9712c20fSFrederick Mayle // properly. 112*9712c20fSFrederick Mayle LineInfo(const uint8_t* buffer, uint64_t buffer_length, 113*9712c20fSFrederick Mayle ByteReader* reader, const uint8_t* string_buffer, 114*9712c20fSFrederick Mayle size_t string_buffer_length, const uint8_t* line_string_buffer, 115*9712c20fSFrederick Mayle size_t line_string_buffer_length, LineInfoHandler* handler); 116*9712c20fSFrederick Mayle ~LineInfo()117*9712c20fSFrederick Mayle virtual ~LineInfo() { 118*9712c20fSFrederick Mayle if (header_.std_opcode_lengths) { 119*9712c20fSFrederick Mayle delete header_.std_opcode_lengths; 120*9712c20fSFrederick Mayle } 121*9712c20fSFrederick Mayle } 122*9712c20fSFrederick Mayle 123*9712c20fSFrederick Mayle // Start processing line info, and calling callbacks in the handler. 124*9712c20fSFrederick Mayle // Consumes the line number information for a single compilation unit. 125*9712c20fSFrederick Mayle // Returns the number of bytes processed. 126*9712c20fSFrederick Mayle uint64_t Start(); 127*9712c20fSFrederick Mayle 128*9712c20fSFrederick Mayle // Process a single line info opcode at START using the state 129*9712c20fSFrederick Mayle // machine at LSM. Return true if we should define a line using the 130*9712c20fSFrederick Mayle // current state of the line state machine. Place the length of the 131*9712c20fSFrederick Mayle // opcode in LEN. 132*9712c20fSFrederick Mayle // If LSM_PASSES_PC is non-NULL, this function also checks if the lsm 133*9712c20fSFrederick Mayle // passes the address of PC. In other words, LSM_PASSES_PC will be 134*9712c20fSFrederick Mayle // set to true, if the following condition is met. 135*9712c20fSFrederick Mayle // 136*9712c20fSFrederick Mayle // lsm's old address < PC <= lsm's new address 137*9712c20fSFrederick Mayle static bool ProcessOneOpcode(ByteReader* reader, 138*9712c20fSFrederick Mayle LineInfoHandler* handler, 139*9712c20fSFrederick Mayle const struct LineInfoHeader& header, 140*9712c20fSFrederick Mayle const uint8_t* start, 141*9712c20fSFrederick Mayle struct LineStateMachine* lsm, 142*9712c20fSFrederick Mayle size_t* len, 143*9712c20fSFrederick Mayle uintptr pc, 144*9712c20fSFrederick Mayle bool* lsm_passes_pc); 145*9712c20fSFrederick Mayle 146*9712c20fSFrederick Mayle private: 147*9712c20fSFrederick Mayle // Reads the DWARF2/3 header for this line info. 148*9712c20fSFrederick Mayle void ReadHeader(); 149*9712c20fSFrederick Mayle 150*9712c20fSFrederick Mayle // Reads the DWARF2/3 line information 151*9712c20fSFrederick Mayle void ReadLines(); 152*9712c20fSFrederick Mayle 153*9712c20fSFrederick Mayle // Read the DWARF5 types and forms for the file and directory tables. 154*9712c20fSFrederick Mayle void ReadTypesAndForms(const uint8_t** lineptr, uint32_t* content_types, 155*9712c20fSFrederick Mayle uint32_t* content_forms, uint32_t max_types, 156*9712c20fSFrederick Mayle uint32_t* format_count); 157*9712c20fSFrederick Mayle 158*9712c20fSFrederick Mayle // Read a row from the dwarf5 LineInfo file table. 159*9712c20fSFrederick Mayle void ReadFileRow(const uint8_t** lineptr, const uint32_t* content_types, 160*9712c20fSFrederick Mayle const uint32_t* content_forms, uint32_t row, 161*9712c20fSFrederick Mayle uint32_t format_count); 162*9712c20fSFrederick Mayle 163*9712c20fSFrederick Mayle // Read and return the data at *lineptr according to form. Advance 164*9712c20fSFrederick Mayle // *lineptr appropriately. 165*9712c20fSFrederick Mayle uint64_t ReadUnsignedData(uint32_t form, const uint8_t** lineptr); 166*9712c20fSFrederick Mayle 167*9712c20fSFrederick Mayle // Read and return the data at *lineptr according to form. Advance 168*9712c20fSFrederick Mayle // *lineptr appropriately. 169*9712c20fSFrederick Mayle const char* ReadStringForm(uint32_t form, const uint8_t** lineptr); 170*9712c20fSFrederick Mayle 171*9712c20fSFrederick Mayle // The associated handler to call processing functions in 172*9712c20fSFrederick Mayle LineInfoHandler* handler_; 173*9712c20fSFrederick Mayle 174*9712c20fSFrederick Mayle // The associated ByteReader that handles endianness issues for us 175*9712c20fSFrederick Mayle ByteReader* reader_; 176*9712c20fSFrederick Mayle 177*9712c20fSFrederick Mayle // A DWARF line info header. This is not the same size as in the actual file, 178*9712c20fSFrederick Mayle // as the one in the file may have a 32 bit or 64 bit lengths 179*9712c20fSFrederick Mayle 180*9712c20fSFrederick Mayle struct LineInfoHeader header_; 181*9712c20fSFrederick Mayle 182*9712c20fSFrederick Mayle // buffer is the buffer for our line info, starting at exactly where 183*9712c20fSFrederick Mayle // the line info to read is. after_header is the place right after 184*9712c20fSFrederick Mayle // the end of the line information header. 185*9712c20fSFrederick Mayle const uint8_t* buffer_; 186*9712c20fSFrederick Mayle #ifndef NDEBUG 187*9712c20fSFrederick Mayle uint64_t buffer_length_; 188*9712c20fSFrederick Mayle #endif 189*9712c20fSFrederick Mayle // Convenience pointers into .debug_str and .debug_line_str. These exactly 190*9712c20fSFrederick Mayle // correspond to those in the compilation unit. 191*9712c20fSFrederick Mayle const uint8_t* string_buffer_; 192*9712c20fSFrederick Mayle #ifndef NDEBUG 193*9712c20fSFrederick Mayle uint64_t string_buffer_length_; 194*9712c20fSFrederick Mayle #endif 195*9712c20fSFrederick Mayle const uint8_t* line_string_buffer_; 196*9712c20fSFrederick Mayle #ifndef NDEBUG 197*9712c20fSFrederick Mayle uint64_t line_string_buffer_length_; 198*9712c20fSFrederick Mayle #endif 199*9712c20fSFrederick Mayle 200*9712c20fSFrederick Mayle const uint8_t* after_header_; 201*9712c20fSFrederick Mayle }; 202*9712c20fSFrederick Mayle 203*9712c20fSFrederick Mayle // This class is the main interface between the line info reader and 204*9712c20fSFrederick Mayle // the client. The virtual functions inside this get called for 205*9712c20fSFrederick Mayle // interesting events that happen during line info reading. The 206*9712c20fSFrederick Mayle // default implementation does nothing 207*9712c20fSFrederick Mayle 208*9712c20fSFrederick Mayle class LineInfoHandler { 209*9712c20fSFrederick Mayle public: LineInfoHandler()210*9712c20fSFrederick Mayle LineInfoHandler() { } 211*9712c20fSFrederick Mayle ~LineInfoHandler()212*9712c20fSFrederick Mayle virtual ~LineInfoHandler() { } 213*9712c20fSFrederick Mayle 214*9712c20fSFrederick Mayle // Called when we define a directory. NAME is the directory name, 215*9712c20fSFrederick Mayle // DIR_NUM is the directory number DefineDir(const string & name,uint32_t dir_num)216*9712c20fSFrederick Mayle virtual void DefineDir(const string& name, uint32_t dir_num) { } 217*9712c20fSFrederick Mayle 218*9712c20fSFrederick Mayle // Called when we define a filename. NAME is the filename, FILE_NUM 219*9712c20fSFrederick Mayle // is the file number which is -1 if the file index is the next 220*9712c20fSFrederick Mayle // index after the last numbered index (this happens when files are 221*9712c20fSFrederick Mayle // dynamically defined by the line program), DIR_NUM is the 222*9712c20fSFrederick Mayle // directory index for the directory name of this file, MOD_TIME is 223*9712c20fSFrederick Mayle // the modification time of the file, and LENGTH is the length of 224*9712c20fSFrederick Mayle // the file DefineFile(const string & name,int32_t file_num,uint32_t dir_num,uint64_t mod_time,uint64_t length)225*9712c20fSFrederick Mayle virtual void DefineFile(const string& name, int32_t file_num, 226*9712c20fSFrederick Mayle uint32_t dir_num, uint64_t mod_time, 227*9712c20fSFrederick Mayle uint64_t length) { } 228*9712c20fSFrederick Mayle 229*9712c20fSFrederick Mayle // Called when the line info reader has a new line, address pair 230*9712c20fSFrederick Mayle // ready for us. ADDRESS is the address of the code, LENGTH is the 231*9712c20fSFrederick Mayle // length of its machine code in bytes, FILE_NUM is the file number 232*9712c20fSFrederick Mayle // containing the code, LINE_NUM is the line number in that file for 233*9712c20fSFrederick Mayle // the code, and COLUMN_NUM is the column number the code starts at, 234*9712c20fSFrederick Mayle // if we know it (0 otherwise). AddLine(uint64_t address,uint64_t length,uint32_t file_num,uint32_t line_num,uint32_t column_num)235*9712c20fSFrederick Mayle virtual void AddLine(uint64_t address, uint64_t length, 236*9712c20fSFrederick Mayle uint32_t file_num, uint32_t line_num, uint32_t column_num) { } 237*9712c20fSFrederick Mayle }; 238*9712c20fSFrederick Mayle 239*9712c20fSFrederick Mayle class RangeListHandler { 240*9712c20fSFrederick Mayle public: RangeListHandler()241*9712c20fSFrederick Mayle RangeListHandler() { } 242*9712c20fSFrederick Mayle ~RangeListHandler()243*9712c20fSFrederick Mayle virtual ~RangeListHandler() { } 244*9712c20fSFrederick Mayle 245*9712c20fSFrederick Mayle // Add a range. AddRange(uint64_t begin,uint64_t end)246*9712c20fSFrederick Mayle virtual void AddRange(uint64_t begin, uint64_t end) { }; 247*9712c20fSFrederick Mayle 248*9712c20fSFrederick Mayle // Finish processing the range list. Finish()249*9712c20fSFrederick Mayle virtual void Finish() { }; 250*9712c20fSFrederick Mayle }; 251*9712c20fSFrederick Mayle 252*9712c20fSFrederick Mayle class RangeListReader { 253*9712c20fSFrederick Mayle public: 254*9712c20fSFrederick Mayle // Reading a range list requires quite a bit of information 255*9712c20fSFrederick Mayle // from the compilation unit. Package it conveniently. 256*9712c20fSFrederick Mayle struct CURangesInfo { CURangesInfoCURangesInfo257*9712c20fSFrederick Mayle CURangesInfo() : 258*9712c20fSFrederick Mayle version_(0), base_address_(0), ranges_base_(0), 259*9712c20fSFrederick Mayle buffer_(nullptr), size_(0), addr_buffer_(nullptr), 260*9712c20fSFrederick Mayle addr_buffer_size_(0), addr_base_(0) { } 261*9712c20fSFrederick Mayle 262*9712c20fSFrederick Mayle uint16_t version_; 263*9712c20fSFrederick Mayle // Ranges base address. Ordinarily the CU's low_pc. 264*9712c20fSFrederick Mayle uint64_t base_address_; 265*9712c20fSFrederick Mayle // Offset into .debug_rnglists for this CU's rangelists. 266*9712c20fSFrederick Mayle uint64_t ranges_base_; 267*9712c20fSFrederick Mayle // Contents of either .debug_ranges or .debug_rnglists. 268*9712c20fSFrederick Mayle const uint8_t* buffer_; 269*9712c20fSFrederick Mayle uint64_t size_; 270*9712c20fSFrederick Mayle // Contents of .debug_addr. This cu's contribution starts at 271*9712c20fSFrederick Mayle // addr_base_ 272*9712c20fSFrederick Mayle const uint8_t* addr_buffer_; 273*9712c20fSFrederick Mayle uint64_t addr_buffer_size_; 274*9712c20fSFrederick Mayle uint64_t addr_base_; 275*9712c20fSFrederick Mayle }; 276*9712c20fSFrederick Mayle RangeListReader(ByteReader * reader,CURangesInfo * cu_info,RangeListHandler * handler)277*9712c20fSFrederick Mayle RangeListReader(ByteReader* reader, CURangesInfo* cu_info, 278*9712c20fSFrederick Mayle RangeListHandler* handler) : 279*9712c20fSFrederick Mayle reader_(reader), cu_info_(cu_info), handler_(handler), 280*9712c20fSFrederick Mayle offset_array_(0) { } 281*9712c20fSFrederick Mayle 282*9712c20fSFrederick Mayle // Read ranges from cu_info as specified by form and data. 283*9712c20fSFrederick Mayle bool ReadRanges(enum DwarfForm form, uint64_t data); 284*9712c20fSFrederick Mayle 285*9712c20fSFrederick Mayle private: 286*9712c20fSFrederick Mayle // Read dwarf4 .debug_ranges at offset. 287*9712c20fSFrederick Mayle bool ReadDebugRanges(uint64_t offset); 288*9712c20fSFrederick Mayle // Read dwarf5 .debug_rngslist at offset. 289*9712c20fSFrederick Mayle bool ReadDebugRngList(uint64_t offset); 290*9712c20fSFrederick Mayle 291*9712c20fSFrederick Mayle // Convenience functions to handle the mechanics of reading entries in the 292*9712c20fSFrederick Mayle // ranges section. ReadULEB(uint64_t offset,uint64_t * value)293*9712c20fSFrederick Mayle uint64_t ReadULEB(uint64_t offset, uint64_t* value) { 294*9712c20fSFrederick Mayle size_t len; 295*9712c20fSFrederick Mayle *value = reader_->ReadUnsignedLEB128(cu_info_->buffer_ + offset, &len); 296*9712c20fSFrederick Mayle return len; 297*9712c20fSFrederick Mayle } 298*9712c20fSFrederick Mayle ReadAddress(uint64_t offset,uint64_t * value)299*9712c20fSFrederick Mayle uint64_t ReadAddress(uint64_t offset, uint64_t* value) { 300*9712c20fSFrederick Mayle *value = reader_->ReadAddress(cu_info_->buffer_ + offset); 301*9712c20fSFrederick Mayle return reader_->AddressSize(); 302*9712c20fSFrederick Mayle } 303*9712c20fSFrederick Mayle 304*9712c20fSFrederick Mayle // Read the address at this CU's addr_index in the .debug_addr section. GetAddressAtIndex(uint64_t addr_index)305*9712c20fSFrederick Mayle uint64_t GetAddressAtIndex(uint64_t addr_index) { 306*9712c20fSFrederick Mayle assert(cu_info_->addr_buffer_ != nullptr); 307*9712c20fSFrederick Mayle uint64_t offset = 308*9712c20fSFrederick Mayle cu_info_->addr_base_ + addr_index * reader_->AddressSize(); 309*9712c20fSFrederick Mayle assert(offset < cu_info_->addr_buffer_size_); 310*9712c20fSFrederick Mayle return reader_->ReadAddress(cu_info_->addr_buffer_ + offset); 311*9712c20fSFrederick Mayle } 312*9712c20fSFrederick Mayle 313*9712c20fSFrederick Mayle ByteReader* reader_; 314*9712c20fSFrederick Mayle CURangesInfo* cu_info_; 315*9712c20fSFrederick Mayle RangeListHandler* handler_; 316*9712c20fSFrederick Mayle uint64_t offset_array_; 317*9712c20fSFrederick Mayle }; 318*9712c20fSFrederick Mayle 319*9712c20fSFrederick Mayle // This class is the main interface between the reader and the 320*9712c20fSFrederick Mayle // client. The virtual functions inside this get called for 321*9712c20fSFrederick Mayle // interesting events that happen during DWARF2 reading. 322*9712c20fSFrederick Mayle // The default implementation skips everything. 323*9712c20fSFrederick Mayle class Dwarf2Handler { 324*9712c20fSFrederick Mayle public: Dwarf2Handler()325*9712c20fSFrederick Mayle Dwarf2Handler() { } 326*9712c20fSFrederick Mayle ~Dwarf2Handler()327*9712c20fSFrederick Mayle virtual ~Dwarf2Handler() { } 328*9712c20fSFrederick Mayle 329*9712c20fSFrederick Mayle // Start to process a compilation unit at OFFSET from the beginning of the 330*9712c20fSFrederick Mayle // .debug_info section. Return false if you would like to skip this 331*9712c20fSFrederick Mayle // compilation unit. StartCompilationUnit(uint64_t offset,uint8_t address_size,uint8_t offset_size,uint64_t cu_length,uint8_t dwarf_version)332*9712c20fSFrederick Mayle virtual bool StartCompilationUnit(uint64_t offset, uint8_t address_size, 333*9712c20fSFrederick Mayle uint8_t offset_size, uint64_t cu_length, 334*9712c20fSFrederick Mayle uint8_t dwarf_version) { return false; } 335*9712c20fSFrederick Mayle 336*9712c20fSFrederick Mayle // When processing a skeleton compilation unit, resulting from a split 337*9712c20fSFrederick Mayle // DWARF compilation, once the skeleton debug info has been read, 338*9712c20fSFrederick Mayle // the reader will call this function to ask the client if it needs 339*9712c20fSFrederick Mayle // the full debug info from the .dwo or .dwp file. Return true if 340*9712c20fSFrederick Mayle // you need it, or false to skip processing the split debug info. NeedSplitDebugInfo()341*9712c20fSFrederick Mayle virtual bool NeedSplitDebugInfo() { return true; } 342*9712c20fSFrederick Mayle 343*9712c20fSFrederick Mayle // Start to process a split compilation unit at OFFSET from the beginning of 344*9712c20fSFrederick Mayle // the debug_info section in the .dwp/.dwo file. Return false if you would 345*9712c20fSFrederick Mayle // like to skip this compilation unit. StartSplitCompilationUnit(uint64_t offset,uint64_t cu_length)346*9712c20fSFrederick Mayle virtual bool StartSplitCompilationUnit(uint64_t offset, 347*9712c20fSFrederick Mayle uint64_t cu_length) { return false; } 348*9712c20fSFrederick Mayle 349*9712c20fSFrederick Mayle // Start to process a DIE at OFFSET from the beginning of the .debug_info 350*9712c20fSFrederick Mayle // section. Return false if you would like to skip this DIE. StartDIE(uint64_t offset,enum DwarfTag tag)351*9712c20fSFrederick Mayle virtual bool StartDIE(uint64_t offset, enum DwarfTag tag) { return false; } 352*9712c20fSFrederick Mayle 353*9712c20fSFrederick Mayle // Called when we have an attribute with unsigned data to give to our 354*9712c20fSFrederick Mayle // handler. The attribute is for the DIE at OFFSET from the beginning of the 355*9712c20fSFrederick Mayle // .debug_info section. Its name is ATTR, its form is FORM, and its value is 356*9712c20fSFrederick Mayle // DATA. ProcessAttributeUnsigned(uint64_t offset,enum DwarfAttribute attr,enum DwarfForm form,uint64_t data)357*9712c20fSFrederick Mayle virtual void ProcessAttributeUnsigned(uint64_t offset, 358*9712c20fSFrederick Mayle enum DwarfAttribute attr, 359*9712c20fSFrederick Mayle enum DwarfForm form, 360*9712c20fSFrederick Mayle uint64_t data) { } 361*9712c20fSFrederick Mayle 362*9712c20fSFrederick Mayle // Called when we have an attribute with signed data to give to our handler. 363*9712c20fSFrederick Mayle // The attribute is for the DIE at OFFSET from the beginning of the 364*9712c20fSFrederick Mayle // .debug_info section. Its name is ATTR, its form is FORM, and its value is 365*9712c20fSFrederick Mayle // DATA. ProcessAttributeSigned(uint64_t offset,enum DwarfAttribute attr,enum DwarfForm form,int64_t data)366*9712c20fSFrederick Mayle virtual void ProcessAttributeSigned(uint64_t offset, 367*9712c20fSFrederick Mayle enum DwarfAttribute attr, 368*9712c20fSFrederick Mayle enum DwarfForm form, 369*9712c20fSFrederick Mayle int64_t data) { } 370*9712c20fSFrederick Mayle 371*9712c20fSFrederick Mayle // Called when we have an attribute whose value is a reference to 372*9712c20fSFrederick Mayle // another DIE. The attribute belongs to the DIE at OFFSET from the 373*9712c20fSFrederick Mayle // beginning of the .debug_info section. Its name is ATTR, its form 374*9712c20fSFrederick Mayle // is FORM, and the offset of the DIE being referred to from the 375*9712c20fSFrederick Mayle // beginning of the .debug_info section is DATA. ProcessAttributeReference(uint64_t offset,enum DwarfAttribute attr,enum DwarfForm form,uint64_t data)376*9712c20fSFrederick Mayle virtual void ProcessAttributeReference(uint64_t offset, 377*9712c20fSFrederick Mayle enum DwarfAttribute attr, 378*9712c20fSFrederick Mayle enum DwarfForm form, 379*9712c20fSFrederick Mayle uint64_t data) { } 380*9712c20fSFrederick Mayle 381*9712c20fSFrederick Mayle // Called when we have an attribute with a buffer of data to give to our 382*9712c20fSFrederick Mayle // handler. The attribute is for the DIE at OFFSET from the beginning of the 383*9712c20fSFrederick Mayle // .debug_info section. Its name is ATTR, its form is FORM, DATA points to 384*9712c20fSFrederick Mayle // the buffer's contents, and its length in bytes is LENGTH. The buffer is 385*9712c20fSFrederick Mayle // owned by the caller, not the callee, and may not persist for very long. 386*9712c20fSFrederick Mayle // If you want the data to be available later, it needs to be copied. ProcessAttributeBuffer(uint64_t offset,enum DwarfAttribute attr,enum DwarfForm form,const uint8_t * data,uint64_t len)387*9712c20fSFrederick Mayle virtual void ProcessAttributeBuffer(uint64_t offset, 388*9712c20fSFrederick Mayle enum DwarfAttribute attr, 389*9712c20fSFrederick Mayle enum DwarfForm form, 390*9712c20fSFrederick Mayle const uint8_t* data, 391*9712c20fSFrederick Mayle uint64_t len) { } 392*9712c20fSFrederick Mayle 393*9712c20fSFrederick Mayle // Called when we have an attribute with string data to give to our handler. 394*9712c20fSFrederick Mayle // The attribute is for the DIE at OFFSET from the beginning of the 395*9712c20fSFrederick Mayle // .debug_info section. Its name is ATTR, its form is FORM, and its value is 396*9712c20fSFrederick Mayle // DATA. ProcessAttributeString(uint64_t offset,enum DwarfAttribute attr,enum DwarfForm form,const string & data)397*9712c20fSFrederick Mayle virtual void ProcessAttributeString(uint64_t offset, 398*9712c20fSFrederick Mayle enum DwarfAttribute attr, 399*9712c20fSFrederick Mayle enum DwarfForm form, 400*9712c20fSFrederick Mayle const string& data) { } 401*9712c20fSFrederick Mayle 402*9712c20fSFrederick Mayle // Called when we have an attribute whose value is the 64-bit signature 403*9712c20fSFrederick Mayle // of a type unit in the .debug_types section. OFFSET is the offset of 404*9712c20fSFrederick Mayle // the DIE whose attribute we're reporting. ATTR and FORM are the 405*9712c20fSFrederick Mayle // attribute's name and form. SIGNATURE is the type unit's signature. ProcessAttributeSignature(uint64_t offset,enum DwarfAttribute attr,enum DwarfForm form,uint64_t signature)406*9712c20fSFrederick Mayle virtual void ProcessAttributeSignature(uint64_t offset, 407*9712c20fSFrederick Mayle enum DwarfAttribute attr, 408*9712c20fSFrederick Mayle enum DwarfForm form, 409*9712c20fSFrederick Mayle uint64_t signature) { } 410*9712c20fSFrederick Mayle 411*9712c20fSFrederick Mayle // Called when finished processing the DIE at OFFSET. 412*9712c20fSFrederick Mayle // Because DWARF2/3 specifies a tree of DIEs, you may get starts 413*9712c20fSFrederick Mayle // before ends of the previous DIE, as we process children before 414*9712c20fSFrederick Mayle // ending the parent. EndDIE(uint64_t offset)415*9712c20fSFrederick Mayle virtual void EndDIE(uint64_t offset) { } 416*9712c20fSFrederick Mayle 417*9712c20fSFrederick Mayle }; 418*9712c20fSFrederick Mayle 419*9712c20fSFrederick Mayle // The base of DWARF2/3 debug info is a DIE (Debugging Information 420*9712c20fSFrederick Mayle // Entry. 421*9712c20fSFrederick Mayle // DWARF groups DIE's into a tree and calls the root of this tree a 422*9712c20fSFrederick Mayle // "compilation unit". Most of the time, there is one compilation 423*9712c20fSFrederick Mayle // unit in the .debug_info section for each file that had debug info 424*9712c20fSFrederick Mayle // generated. 425*9712c20fSFrederick Mayle // Each DIE consists of 426*9712c20fSFrederick Mayle 427*9712c20fSFrederick Mayle // 1. a tag specifying a thing that is being described (ie 428*9712c20fSFrederick Mayle // DW_TAG_subprogram for functions, DW_TAG_variable for variables, etc 429*9712c20fSFrederick Mayle // 2. attributes (such as DW_AT_location for location in memory, 430*9712c20fSFrederick Mayle // DW_AT_name for name), and data for each attribute. 431*9712c20fSFrederick Mayle // 3. A flag saying whether the DIE has children or not 432*9712c20fSFrederick Mayle 433*9712c20fSFrederick Mayle // In order to gain some amount of compression, the format of 434*9712c20fSFrederick Mayle // each DIE (tag name, attributes and data forms for the attributes) 435*9712c20fSFrederick Mayle // are stored in a separate table called the "abbreviation table". 436*9712c20fSFrederick Mayle // This is done because a large number of DIEs have the exact same tag 437*9712c20fSFrederick Mayle // and list of attributes, but different data for those attributes. 438*9712c20fSFrederick Mayle // As a result, the .debug_info section is just a stream of data, and 439*9712c20fSFrederick Mayle // requires reading of the .debug_abbrev section to say what the data 440*9712c20fSFrederick Mayle // means. 441*9712c20fSFrederick Mayle 442*9712c20fSFrederick Mayle // As a warning to the user, it should be noted that the reason for 443*9712c20fSFrederick Mayle // using absolute offsets from the beginning of .debug_info is that 444*9712c20fSFrederick Mayle // DWARF2/3 supports referencing DIE's from other DIE's by their offset 445*9712c20fSFrederick Mayle // from either the current compilation unit start, *or* the beginning 446*9712c20fSFrederick Mayle // of the .debug_info section. This means it is possible to reference 447*9712c20fSFrederick Mayle // a DIE in one compilation unit from a DIE in another compilation 448*9712c20fSFrederick Mayle // unit. This style of reference is usually used to eliminate 449*9712c20fSFrederick Mayle // duplicated information that occurs across compilation 450*9712c20fSFrederick Mayle // units, such as base types, etc. GCC 3.4+ support this with 451*9712c20fSFrederick Mayle // -feliminate-dwarf2-dups. Other toolchains will sometimes do 452*9712c20fSFrederick Mayle // duplicate elimination in the linker. 453*9712c20fSFrederick Mayle 454*9712c20fSFrederick Mayle class CompilationUnit { 455*9712c20fSFrederick Mayle public: 456*9712c20fSFrederick Mayle 457*9712c20fSFrederick Mayle // Initialize a compilation unit. This requires a map of sections, 458*9712c20fSFrederick Mayle // the offset of this compilation unit in the .debug_info section, a 459*9712c20fSFrederick Mayle // ByteReader, and a Dwarf2Handler class to call callbacks in. 460*9712c20fSFrederick Mayle CompilationUnit(const string& path, const SectionMap& sections, 461*9712c20fSFrederick Mayle uint64_t offset, ByteReader* reader, Dwarf2Handler* handler); ~CompilationUnit()462*9712c20fSFrederick Mayle virtual ~CompilationUnit() { 463*9712c20fSFrederick Mayle if (abbrevs_) delete abbrevs_; 464*9712c20fSFrederick Mayle } 465*9712c20fSFrederick Mayle 466*9712c20fSFrederick Mayle // Initialize a compilation unit from a .dwo or .dwp file. 467*9712c20fSFrederick Mayle // In this case, we need the .debug_addr section from the 468*9712c20fSFrederick Mayle // executable file that contains the corresponding skeleton 469*9712c20fSFrederick Mayle // compilation unit. We also inherit the Dwarf2Handler from 470*9712c20fSFrederick Mayle // the executable file, and call it as if we were still 471*9712c20fSFrederick Mayle // processing the original compilation unit. 472*9712c20fSFrederick Mayle void SetSplitDwarf(uint64_t addr_base, uint64_t dwo_id); 473*9712c20fSFrederick Mayle 474*9712c20fSFrederick Mayle // Begin reading a Dwarf2 compilation unit, and calling the 475*9712c20fSFrederick Mayle // callbacks in the Dwarf2Handler 476*9712c20fSFrederick Mayle 477*9712c20fSFrederick Mayle // Return the full length of the compilation unit, including 478*9712c20fSFrederick Mayle // headers. This plus the starting offset passed to the constructor 479*9712c20fSFrederick Mayle // is the offset of the end of the compilation unit --- and the 480*9712c20fSFrederick Mayle // start of the next compilation unit, if there is one. 481*9712c20fSFrederick Mayle uint64_t Start(); 482*9712c20fSFrederick Mayle 483*9712c20fSFrederick Mayle // Process the actual debug information in a split DWARF file. 484*9712c20fSFrederick Mayle bool ProcessSplitDwarf(std::string& split_file, 485*9712c20fSFrederick Mayle SectionMap& sections, 486*9712c20fSFrederick Mayle ByteReader& split_byte_reader, 487*9712c20fSFrederick Mayle uint64_t& cu_offset); 488*9712c20fSFrederick Mayle GetAddrBuffer()489*9712c20fSFrederick Mayle const uint8_t* GetAddrBuffer() { return addr_buffer_; } 490*9712c20fSFrederick Mayle GetAddrBufferLen()491*9712c20fSFrederick Mayle uint64_t GetAddrBufferLen() { return addr_buffer_length_; } 492*9712c20fSFrederick Mayle GetAddrBase()493*9712c20fSFrederick Mayle uint64_t GetAddrBase() { return addr_base_; } 494*9712c20fSFrederick Mayle GetLowPC()495*9712c20fSFrederick Mayle uint64_t GetLowPC() { return low_pc_; } 496*9712c20fSFrederick Mayle GetDWOID()497*9712c20fSFrederick Mayle uint64_t GetDWOID() { return dwo_id_; } 498*9712c20fSFrederick Mayle GetLineBuffer()499*9712c20fSFrederick Mayle const uint8_t* GetLineBuffer() { return line_buffer_; } 500*9712c20fSFrederick Mayle GetLineBufferLen()501*9712c20fSFrederick Mayle uint64_t GetLineBufferLen() { return line_buffer_length_; } 502*9712c20fSFrederick Mayle GetLineStrBuffer()503*9712c20fSFrederick Mayle const uint8_t* GetLineStrBuffer() { return line_string_buffer_; } 504*9712c20fSFrederick Mayle GetLineStrBufferLen()505*9712c20fSFrederick Mayle uint64_t GetLineStrBufferLen() { return line_string_buffer_length_; } 506*9712c20fSFrederick Mayle HasSourceLineInfo()507*9712c20fSFrederick Mayle bool HasSourceLineInfo() { return has_source_line_info_; } 508*9712c20fSFrederick Mayle GetSourceLineOffset()509*9712c20fSFrederick Mayle uint64_t GetSourceLineOffset() { return source_line_offset_; } 510*9712c20fSFrederick Mayle ShouldProcessSplitDwarf()511*9712c20fSFrederick Mayle bool ShouldProcessSplitDwarf() { return should_process_split_dwarf_; } 512*9712c20fSFrederick Mayle 513*9712c20fSFrederick Mayle private: 514*9712c20fSFrederick Mayle 515*9712c20fSFrederick Mayle // This struct represents a single DWARF2/3 abbreviation 516*9712c20fSFrederick Mayle // The abbreviation tells how to read a DWARF2/3 DIE, and consist of a 517*9712c20fSFrederick Mayle // tag and a list of attributes, as well as the data form of each attribute. 518*9712c20fSFrederick Mayle struct Abbrev { 519*9712c20fSFrederick Mayle uint64_t number; 520*9712c20fSFrederick Mayle enum DwarfTag tag; 521*9712c20fSFrederick Mayle bool has_children; 522*9712c20fSFrederick Mayle AttributeList attributes; 523*9712c20fSFrederick Mayle }; 524*9712c20fSFrederick Mayle 525*9712c20fSFrederick Mayle // A DWARF2/3 compilation unit header. This is not the same size as 526*9712c20fSFrederick Mayle // in the actual file, as the one in the file may have a 32 bit or 527*9712c20fSFrederick Mayle // 64 bit length. 528*9712c20fSFrederick Mayle struct CompilationUnitHeader { 529*9712c20fSFrederick Mayle uint64_t length; 530*9712c20fSFrederick Mayle uint16_t version; 531*9712c20fSFrederick Mayle uint64_t abbrev_offset; 532*9712c20fSFrederick Mayle uint8_t address_size; 533*9712c20fSFrederick Mayle } header_; 534*9712c20fSFrederick Mayle 535*9712c20fSFrederick Mayle // Reads the DWARF2/3 header for this compilation unit. 536*9712c20fSFrederick Mayle void ReadHeader(); 537*9712c20fSFrederick Mayle 538*9712c20fSFrederick Mayle // Reads the DWARF2/3 abbreviations for this compilation unit 539*9712c20fSFrederick Mayle void ReadAbbrevs(); 540*9712c20fSFrederick Mayle 541*9712c20fSFrederick Mayle // Read the abbreviation offset for this compilation unit 542*9712c20fSFrederick Mayle size_t ReadAbbrevOffset(const uint8_t* headerptr); 543*9712c20fSFrederick Mayle 544*9712c20fSFrederick Mayle // Read the address size for this compilation unit 545*9712c20fSFrederick Mayle size_t ReadAddressSize(const uint8_t* headerptr); 546*9712c20fSFrederick Mayle 547*9712c20fSFrederick Mayle // Read the DWO id from a split or skeleton compilation unit header 548*9712c20fSFrederick Mayle size_t ReadDwoId(const uint8_t* headerptr); 549*9712c20fSFrederick Mayle 550*9712c20fSFrederick Mayle // Read the type signature from a type or split type compilation unit header 551*9712c20fSFrederick Mayle size_t ReadTypeSignature(const uint8_t* headerptr); 552*9712c20fSFrederick Mayle 553*9712c20fSFrederick Mayle // Read the DWO id from a split or skeleton compilation unit header 554*9712c20fSFrederick Mayle size_t ReadTypeOffset(const uint8_t* headerptr); 555*9712c20fSFrederick Mayle 556*9712c20fSFrederick Mayle // Processes a single DIE for this compilation unit and return a new 557*9712c20fSFrederick Mayle // pointer just past the end of it 558*9712c20fSFrederick Mayle const uint8_t* ProcessDIE(uint64_t dieoffset, 559*9712c20fSFrederick Mayle const uint8_t* start, 560*9712c20fSFrederick Mayle const Abbrev& abbrev); 561*9712c20fSFrederick Mayle 562*9712c20fSFrederick Mayle // Processes a single attribute and return a new pointer just past the 563*9712c20fSFrederick Mayle // end of it 564*9712c20fSFrederick Mayle const uint8_t* ProcessAttribute(uint64_t dieoffset, 565*9712c20fSFrederick Mayle const uint8_t* start, 566*9712c20fSFrederick Mayle enum DwarfAttribute attr, 567*9712c20fSFrederick Mayle enum DwarfForm form, 568*9712c20fSFrederick Mayle uint64_t implicit_const); 569*9712c20fSFrederick Mayle 570*9712c20fSFrederick Mayle // Special version of ProcessAttribute, for finding str_offsets_base and 571*9712c20fSFrederick Mayle // DW_AT_addr_base in DW_TAG_compile_unit, for DWARF v5. 572*9712c20fSFrederick Mayle const uint8_t* ProcessOffsetBaseAttribute(uint64_t dieoffset, 573*9712c20fSFrederick Mayle const uint8_t* start, 574*9712c20fSFrederick Mayle enum DwarfAttribute attr, 575*9712c20fSFrederick Mayle enum DwarfForm form, 576*9712c20fSFrederick Mayle uint64_t implicit_const); 577*9712c20fSFrederick Mayle 578*9712c20fSFrederick Mayle // Called when we have an attribute with unsigned data to give to 579*9712c20fSFrederick Mayle // our handler. The attribute is for the DIE at OFFSET from the 580*9712c20fSFrederick Mayle // beginning of compilation unit, has a name of ATTR, a form of 581*9712c20fSFrederick Mayle // FORM, and the actual data of the attribute is in DATA. 582*9712c20fSFrederick Mayle // If we see a DW_AT_GNU_dwo_id attribute, save the value so that 583*9712c20fSFrederick Mayle // we can find the debug info in a .dwo or .dwp file. ProcessAttributeUnsigned(uint64_t offset,enum DwarfAttribute attr,enum DwarfForm form,uint64_t data)584*9712c20fSFrederick Mayle void ProcessAttributeUnsigned(uint64_t offset, 585*9712c20fSFrederick Mayle enum DwarfAttribute attr, 586*9712c20fSFrederick Mayle enum DwarfForm form, 587*9712c20fSFrederick Mayle uint64_t data) { 588*9712c20fSFrederick Mayle if (attr == DW_AT_GNU_dwo_id) { 589*9712c20fSFrederick Mayle dwo_id_ = data; 590*9712c20fSFrederick Mayle } 591*9712c20fSFrederick Mayle else if (attr == DW_AT_GNU_addr_base || attr == DW_AT_addr_base) { 592*9712c20fSFrederick Mayle addr_base_ = data; 593*9712c20fSFrederick Mayle } 594*9712c20fSFrederick Mayle else if (attr == DW_AT_str_offsets_base) { 595*9712c20fSFrederick Mayle str_offsets_base_ = data; 596*9712c20fSFrederick Mayle } 597*9712c20fSFrederick Mayle else if (attr == DW_AT_low_pc) { 598*9712c20fSFrederick Mayle low_pc_ = data; 599*9712c20fSFrederick Mayle } 600*9712c20fSFrederick Mayle else if (attr == DW_AT_stmt_list) { 601*9712c20fSFrederick Mayle has_source_line_info_ = true; 602*9712c20fSFrederick Mayle source_line_offset_ = data; 603*9712c20fSFrederick Mayle } 604*9712c20fSFrederick Mayle handler_->ProcessAttributeUnsigned(offset, attr, form, data); 605*9712c20fSFrederick Mayle } 606*9712c20fSFrederick Mayle 607*9712c20fSFrederick Mayle // Called when we have an attribute with signed data to give to 608*9712c20fSFrederick Mayle // our handler. The attribute is for the DIE at OFFSET from the 609*9712c20fSFrederick Mayle // beginning of compilation unit, has a name of ATTR, a form of 610*9712c20fSFrederick Mayle // FORM, and the actual data of the attribute is in DATA. ProcessAttributeSigned(uint64_t offset,enum DwarfAttribute attr,enum DwarfForm form,int64_t data)611*9712c20fSFrederick Mayle void ProcessAttributeSigned(uint64_t offset, 612*9712c20fSFrederick Mayle enum DwarfAttribute attr, 613*9712c20fSFrederick Mayle enum DwarfForm form, 614*9712c20fSFrederick Mayle int64_t data) { 615*9712c20fSFrederick Mayle handler_->ProcessAttributeSigned(offset, attr, form, data); 616*9712c20fSFrederick Mayle } 617*9712c20fSFrederick Mayle 618*9712c20fSFrederick Mayle // Called when we have an attribute with a buffer of data to give to 619*9712c20fSFrederick Mayle // our handler. The attribute is for the DIE at OFFSET from the 620*9712c20fSFrederick Mayle // beginning of compilation unit, has a name of ATTR, a form of 621*9712c20fSFrederick Mayle // FORM, and the actual data of the attribute is in DATA, and the 622*9712c20fSFrederick Mayle // length of the buffer is LENGTH. ProcessAttributeBuffer(uint64_t offset,enum DwarfAttribute attr,enum DwarfForm form,const uint8_t * data,uint64_t len)623*9712c20fSFrederick Mayle void ProcessAttributeBuffer(uint64_t offset, 624*9712c20fSFrederick Mayle enum DwarfAttribute attr, 625*9712c20fSFrederick Mayle enum DwarfForm form, 626*9712c20fSFrederick Mayle const uint8_t* data, 627*9712c20fSFrederick Mayle uint64_t len) { 628*9712c20fSFrederick Mayle handler_->ProcessAttributeBuffer(offset, attr, form, data, len); 629*9712c20fSFrederick Mayle } 630*9712c20fSFrederick Mayle 631*9712c20fSFrederick Mayle // Handles the common parts of DW_FORM_GNU_str_index, DW_FORM_strx, 632*9712c20fSFrederick Mayle // DW_FORM_strx1, DW_FORM_strx2, DW_FORM_strx3, and DW_FORM_strx4. 633*9712c20fSFrederick Mayle // Retrieves the data and calls through to ProcessAttributeString. 634*9712c20fSFrederick Mayle void ProcessFormStringIndex(uint64_t offset, 635*9712c20fSFrederick Mayle enum DwarfAttribute attr, 636*9712c20fSFrederick Mayle enum DwarfForm form, 637*9712c20fSFrederick Mayle uint64_t str_index); 638*9712c20fSFrederick Mayle 639*9712c20fSFrederick Mayle // Called when we have an attribute with string data to give to 640*9712c20fSFrederick Mayle // our handler. The attribute is for the DIE at OFFSET from the 641*9712c20fSFrederick Mayle // beginning of compilation unit, has a name of ATTR, a form of 642*9712c20fSFrederick Mayle // FORM, and the actual data of the attribute is in DATA. 643*9712c20fSFrederick Mayle // If we see a DW_AT_GNU_dwo_name attribute, save the value so 644*9712c20fSFrederick Mayle // that we can find the debug info in a .dwo or .dwp file. ProcessAttributeString(uint64_t offset,enum DwarfAttribute attr,enum DwarfForm form,const char * data)645*9712c20fSFrederick Mayle void ProcessAttributeString(uint64_t offset, 646*9712c20fSFrederick Mayle enum DwarfAttribute attr, 647*9712c20fSFrederick Mayle enum DwarfForm form, 648*9712c20fSFrederick Mayle const char* data) { 649*9712c20fSFrederick Mayle if (attr == DW_AT_GNU_dwo_name || attr == DW_AT_dwo_name) 650*9712c20fSFrederick Mayle dwo_name_ = data; 651*9712c20fSFrederick Mayle handler_->ProcessAttributeString(offset, attr, form, data); 652*9712c20fSFrederick Mayle } 653*9712c20fSFrederick Mayle 654*9712c20fSFrederick Mayle // Called to handle common portions of DW_FORM_addrx and variations, as well 655*9712c20fSFrederick Mayle // as DW_FORM_GNU_addr_index. ProcessAttributeAddrIndex(uint64_t offset,enum DwarfAttribute attr,enum DwarfForm form,uint64_t addr_index)656*9712c20fSFrederick Mayle void ProcessAttributeAddrIndex(uint64_t offset, 657*9712c20fSFrederick Mayle enum DwarfAttribute attr, 658*9712c20fSFrederick Mayle enum DwarfForm form, 659*9712c20fSFrederick Mayle uint64_t addr_index) { 660*9712c20fSFrederick Mayle const uint8_t* addr_ptr = 661*9712c20fSFrederick Mayle addr_buffer_ + addr_base_ + addr_index * reader_->AddressSize(); 662*9712c20fSFrederick Mayle ProcessAttributeUnsigned( 663*9712c20fSFrederick Mayle offset, attr, form, reader_->ReadAddress(addr_ptr)); 664*9712c20fSFrederick Mayle } 665*9712c20fSFrederick Mayle 666*9712c20fSFrederick Mayle // Processes all DIEs for this compilation unit 667*9712c20fSFrederick Mayle void ProcessDIEs(); 668*9712c20fSFrederick Mayle 669*9712c20fSFrederick Mayle // Skips the die with attributes specified in ABBREV starting at 670*9712c20fSFrederick Mayle // START, and return the new place to position the stream to. 671*9712c20fSFrederick Mayle const uint8_t* SkipDIE(const uint8_t* start, const Abbrev& abbrev); 672*9712c20fSFrederick Mayle 673*9712c20fSFrederick Mayle // Skips the attribute starting at START, with FORM, and return the 674*9712c20fSFrederick Mayle // new place to position the stream to. 675*9712c20fSFrederick Mayle const uint8_t* SkipAttribute(const uint8_t* start, enum DwarfForm form); 676*9712c20fSFrederick Mayle 677*9712c20fSFrederick Mayle // Read the debug sections from a .dwo file. 678*9712c20fSFrederick Mayle void ReadDebugSectionsFromDwo(ElfReader* elf_reader, 679*9712c20fSFrederick Mayle SectionMap* sections); 680*9712c20fSFrederick Mayle 681*9712c20fSFrederick Mayle // Path of the file containing the debug information. 682*9712c20fSFrederick Mayle const string path_; 683*9712c20fSFrederick Mayle 684*9712c20fSFrederick Mayle // Offset from section start is the offset of this compilation unit 685*9712c20fSFrederick Mayle // from the beginning of the .debug_info/.debug_info.dwo section. 686*9712c20fSFrederick Mayle uint64_t offset_from_section_start_; 687*9712c20fSFrederick Mayle 688*9712c20fSFrederick Mayle // buffer is the buffer for our CU, starting at .debug_info + offset 689*9712c20fSFrederick Mayle // passed in from constructor. 690*9712c20fSFrederick Mayle // after_header points to right after the compilation unit header. 691*9712c20fSFrederick Mayle const uint8_t* buffer_; 692*9712c20fSFrederick Mayle uint64_t buffer_length_; 693*9712c20fSFrederick Mayle const uint8_t* after_header_; 694*9712c20fSFrederick Mayle 695*9712c20fSFrederick Mayle // The associated ByteReader that handles endianness issues for us 696*9712c20fSFrederick Mayle ByteReader* reader_; 697*9712c20fSFrederick Mayle 698*9712c20fSFrederick Mayle // The map of sections in our file to buffers containing their data 699*9712c20fSFrederick Mayle const SectionMap& sections_; 700*9712c20fSFrederick Mayle 701*9712c20fSFrederick Mayle // The associated handler to call processing functions in 702*9712c20fSFrederick Mayle Dwarf2Handler* handler_; 703*9712c20fSFrederick Mayle 704*9712c20fSFrederick Mayle // Set of DWARF2/3 abbreviations for this compilation unit. Indexed 705*9712c20fSFrederick Mayle // by abbreviation number, which means that abbrevs_[0] is not 706*9712c20fSFrederick Mayle // valid. 707*9712c20fSFrederick Mayle std::vector<Abbrev>* abbrevs_; 708*9712c20fSFrederick Mayle 709*9712c20fSFrederick Mayle // String section buffer and length, if we have a string section. 710*9712c20fSFrederick Mayle // This is here to avoid doing a section lookup for strings in 711*9712c20fSFrederick Mayle // ProcessAttribute, which is in the hot path for DWARF2 reading. 712*9712c20fSFrederick Mayle const uint8_t* string_buffer_; 713*9712c20fSFrederick Mayle uint64_t string_buffer_length_; 714*9712c20fSFrederick Mayle 715*9712c20fSFrederick Mayle // Similarly for .debug_line_str. 716*9712c20fSFrederick Mayle const uint8_t* line_string_buffer_; 717*9712c20fSFrederick Mayle uint64_t line_string_buffer_length_; 718*9712c20fSFrederick Mayle 719*9712c20fSFrederick Mayle // String offsets section buffer and length, if we have a string offsets 720*9712c20fSFrederick Mayle // section (.debug_str_offsets or .debug_str_offsets.dwo). 721*9712c20fSFrederick Mayle const uint8_t* str_offsets_buffer_; 722*9712c20fSFrederick Mayle uint64_t str_offsets_buffer_length_; 723*9712c20fSFrederick Mayle 724*9712c20fSFrederick Mayle // Address section buffer and length, if we have an address section 725*9712c20fSFrederick Mayle // (.debug_addr). 726*9712c20fSFrederick Mayle const uint8_t* addr_buffer_; 727*9712c20fSFrederick Mayle uint64_t addr_buffer_length_; 728*9712c20fSFrederick Mayle 729*9712c20fSFrederick Mayle // .debug_line section buffer and length. 730*9712c20fSFrederick Mayle const uint8_t* line_buffer_; 731*9712c20fSFrederick Mayle uint64_t line_buffer_length_; 732*9712c20fSFrederick Mayle 733*9712c20fSFrederick Mayle // Flag indicating whether this compilation unit is part of a .dwo 734*9712c20fSFrederick Mayle // or .dwp file. If true, we are reading this unit because a 735*9712c20fSFrederick Mayle // skeleton compilation unit in an executable file had a 736*9712c20fSFrederick Mayle // DW_AT_GNU_dwo_name or DW_AT_GNU_dwo_id attribute. 737*9712c20fSFrederick Mayle // In a .dwo file, we expect the string offsets section to 738*9712c20fSFrederick Mayle // have a ".dwo" suffix, and we will use the ".debug_addr" section 739*9712c20fSFrederick Mayle // associated with the skeleton compilation unit. 740*9712c20fSFrederick Mayle bool is_split_dwarf_; 741*9712c20fSFrederick Mayle 742*9712c20fSFrederick Mayle // Flag indicating if it's a Type Unit (only applicable to DWARF v5). 743*9712c20fSFrederick Mayle bool is_type_unit_; 744*9712c20fSFrederick Mayle 745*9712c20fSFrederick Mayle // The value of the DW_AT_GNU_dwo_id attribute, if any. 746*9712c20fSFrederick Mayle uint64_t dwo_id_; 747*9712c20fSFrederick Mayle 748*9712c20fSFrederick Mayle // The value of the DW_AT_GNU_type_signature attribute, if any. 749*9712c20fSFrederick Mayle uint64_t type_signature_; 750*9712c20fSFrederick Mayle 751*9712c20fSFrederick Mayle // The value of the DW_AT_GNU_type_offset attribute, if any. 752*9712c20fSFrederick Mayle size_t type_offset_; 753*9712c20fSFrederick Mayle 754*9712c20fSFrederick Mayle // The value of the DW_AT_GNU_dwo_name attribute, if any. 755*9712c20fSFrederick Mayle const char* dwo_name_; 756*9712c20fSFrederick Mayle 757*9712c20fSFrederick Mayle // If this is a split DWARF CU, the value of the DW_AT_GNU_dwo_id attribute 758*9712c20fSFrederick Mayle // from the skeleton CU. 759*9712c20fSFrederick Mayle uint64_t skeleton_dwo_id_; 760*9712c20fSFrederick Mayle 761*9712c20fSFrederick Mayle // The value of the DW_AT_GNU_addr_base attribute, if any. 762*9712c20fSFrederick Mayle uint64_t addr_base_; 763*9712c20fSFrederick Mayle 764*9712c20fSFrederick Mayle // The value of DW_AT_str_offsets_base attribute, if any. 765*9712c20fSFrederick Mayle uint64_t str_offsets_base_; 766*9712c20fSFrederick Mayle 767*9712c20fSFrederick Mayle // True if we have already looked for a .dwp file. 768*9712c20fSFrederick Mayle bool have_checked_for_dwp_; 769*9712c20fSFrederick Mayle 770*9712c20fSFrederick Mayle // ElfReader for the dwo/dwo file. 771*9712c20fSFrederick Mayle std::unique_ptr<ElfReader> split_elf_reader_; 772*9712c20fSFrederick Mayle 773*9712c20fSFrederick Mayle // DWP reader. 774*9712c20fSFrederick Mayle std::unique_ptr<DwpReader> dwp_reader_; 775*9712c20fSFrederick Mayle 776*9712c20fSFrederick Mayle bool should_process_split_dwarf_; 777*9712c20fSFrederick Mayle 778*9712c20fSFrederick Mayle // The value of the DW_AT_low_pc attribute, if any. 779*9712c20fSFrederick Mayle uint64_t low_pc_; 780*9712c20fSFrederick Mayle 781*9712c20fSFrederick Mayle // The value of DW_AT_stmt_list attribute if any. 782*9712c20fSFrederick Mayle bool has_source_line_info_; 783*9712c20fSFrederick Mayle uint64_t source_line_offset_; 784*9712c20fSFrederick Mayle }; 785*9712c20fSFrederick Mayle 786*9712c20fSFrederick Mayle // A Reader for a .dwp file. Supports the fetching of DWARF debug 787*9712c20fSFrederick Mayle // info for a given dwo_id. 788*9712c20fSFrederick Mayle // 789*9712c20fSFrederick Mayle // There are two versions of .dwp files. In both versions, the 790*9712c20fSFrederick Mayle // .dwp file is an ELF file containing only debug sections. 791*9712c20fSFrederick Mayle // In Version 1, the file contains many copies of each debug 792*9712c20fSFrederick Mayle // section, one for each .dwo file that is packaged in the .dwp 793*9712c20fSFrederick Mayle // file, and the .debug_cu_index section maps from the dwo_id 794*9712c20fSFrederick Mayle // to a set of section indexes. In Version 2, the file contains 795*9712c20fSFrederick Mayle // one of each debug section, and the .debug_cu_index section 796*9712c20fSFrederick Mayle // maps from the dwo_id to a set of offsets and lengths that 797*9712c20fSFrederick Mayle // identify each .dwo file's contribution to the larger sections. 798*9712c20fSFrederick Mayle 799*9712c20fSFrederick Mayle class DwpReader { 800*9712c20fSFrederick Mayle public: 801*9712c20fSFrederick Mayle DwpReader(const ByteReader& byte_reader, ElfReader* elf_reader); 802*9712c20fSFrederick Mayle 803*9712c20fSFrederick Mayle // Read the CU index and initialize data members. 804*9712c20fSFrederick Mayle void Initialize(); 805*9712c20fSFrederick Mayle 806*9712c20fSFrederick Mayle // Read the debug sections for the given dwo_id. 807*9712c20fSFrederick Mayle void ReadDebugSectionsForCU(uint64_t dwo_id, SectionMap* sections); 808*9712c20fSFrederick Mayle 809*9712c20fSFrederick Mayle private: 810*9712c20fSFrederick Mayle // Search a v1 hash table for "dwo_id". Returns the slot index 811*9712c20fSFrederick Mayle // where the dwo_id was found, or -1 if it was not found. 812*9712c20fSFrederick Mayle int LookupCU(uint64_t dwo_id); 813*9712c20fSFrederick Mayle 814*9712c20fSFrederick Mayle // Search a v2 hash table for "dwo_id". Returns the row index 815*9712c20fSFrederick Mayle // in the offsets and sizes tables, or 0 if it was not found. 816*9712c20fSFrederick Mayle uint32_t LookupCUv2(uint64_t dwo_id); 817*9712c20fSFrederick Mayle 818*9712c20fSFrederick Mayle // The ELF reader for the .dwp file. 819*9712c20fSFrederick Mayle ElfReader* elf_reader_; 820*9712c20fSFrederick Mayle 821*9712c20fSFrederick Mayle // The ByteReader for the .dwp file. 822*9712c20fSFrederick Mayle const ByteReader& byte_reader_; 823*9712c20fSFrederick Mayle 824*9712c20fSFrederick Mayle // Pointer to the .debug_cu_index section. 825*9712c20fSFrederick Mayle const char* cu_index_; 826*9712c20fSFrederick Mayle 827*9712c20fSFrederick Mayle // Size of the .debug_cu_index section. 828*9712c20fSFrederick Mayle size_t cu_index_size_; 829*9712c20fSFrederick Mayle 830*9712c20fSFrederick Mayle // Pointer to the .debug_str.dwo section. 831*9712c20fSFrederick Mayle const char* string_buffer_; 832*9712c20fSFrederick Mayle 833*9712c20fSFrederick Mayle // Size of the .debug_str.dwo section. 834*9712c20fSFrederick Mayle size_t string_buffer_size_; 835*9712c20fSFrederick Mayle 836*9712c20fSFrederick Mayle // Version of the .dwp file. We support versions 1 and 2 currently. 837*9712c20fSFrederick Mayle int version_; 838*9712c20fSFrederick Mayle 839*9712c20fSFrederick Mayle // Number of columns in the section tables (version 2). 840*9712c20fSFrederick Mayle unsigned int ncolumns_; 841*9712c20fSFrederick Mayle 842*9712c20fSFrederick Mayle // Number of units in the section tables (version 2). 843*9712c20fSFrederick Mayle unsigned int nunits_; 844*9712c20fSFrederick Mayle 845*9712c20fSFrederick Mayle // Number of slots in the hash table. 846*9712c20fSFrederick Mayle unsigned int nslots_; 847*9712c20fSFrederick Mayle 848*9712c20fSFrederick Mayle // Pointer to the beginning of the hash table. 849*9712c20fSFrederick Mayle const char* phash_; 850*9712c20fSFrederick Mayle 851*9712c20fSFrederick Mayle // Pointer to the beginning of the index table. 852*9712c20fSFrederick Mayle const char* pindex_; 853*9712c20fSFrederick Mayle 854*9712c20fSFrederick Mayle // Pointer to the beginning of the section index pool (version 1). 855*9712c20fSFrederick Mayle const char* shndx_pool_; 856*9712c20fSFrederick Mayle 857*9712c20fSFrederick Mayle // Pointer to the beginning of the section offset table (version 2). 858*9712c20fSFrederick Mayle const char* offset_table_; 859*9712c20fSFrederick Mayle 860*9712c20fSFrederick Mayle // Pointer to the beginning of the section size table (version 2). 861*9712c20fSFrederick Mayle const char* size_table_; 862*9712c20fSFrederick Mayle 863*9712c20fSFrederick Mayle // Contents of the sections of interest (version 2). 864*9712c20fSFrederick Mayle const char* abbrev_data_; 865*9712c20fSFrederick Mayle size_t abbrev_size_; 866*9712c20fSFrederick Mayle const char* info_data_; 867*9712c20fSFrederick Mayle size_t info_size_; 868*9712c20fSFrederick Mayle const char* str_offsets_data_; 869*9712c20fSFrederick Mayle size_t str_offsets_size_; 870*9712c20fSFrederick Mayle const char* rnglist_data_; 871*9712c20fSFrederick Mayle size_t rnglist_size_; 872*9712c20fSFrederick Mayle }; 873*9712c20fSFrederick Mayle 874*9712c20fSFrederick Mayle // This class is a reader for DWARF's Call Frame Information. CFI 875*9712c20fSFrederick Mayle // describes how to unwind stack frames --- even for functions that do 876*9712c20fSFrederick Mayle // not follow fixed conventions for saving registers, whose frame size 877*9712c20fSFrederick Mayle // varies as they execute, etc. 878*9712c20fSFrederick Mayle // 879*9712c20fSFrederick Mayle // CFI describes, at each machine instruction, how to compute the 880*9712c20fSFrederick Mayle // stack frame's base address, how to find the return address, and 881*9712c20fSFrederick Mayle // where to find the saved values of the caller's registers (if the 882*9712c20fSFrederick Mayle // callee has stashed them somewhere to free up the registers for its 883*9712c20fSFrederick Mayle // own use). 884*9712c20fSFrederick Mayle // 885*9712c20fSFrederick Mayle // For example, suppose we have a function whose machine code looks 886*9712c20fSFrederick Mayle // like this (imagine an assembly language that looks like C, for a 887*9712c20fSFrederick Mayle // machine with 32-bit registers, and a stack that grows towards lower 888*9712c20fSFrederick Mayle // addresses): 889*9712c20fSFrederick Mayle // 890*9712c20fSFrederick Mayle // func: ; entry point; return address at sp 891*9712c20fSFrederick Mayle // func+0: sp = sp - 16 ; allocate space for stack frame 892*9712c20fSFrederick Mayle // func+1: sp[12] = r0 ; save r0 at sp+12 893*9712c20fSFrederick Mayle // ... ; other code, not frame-related 894*9712c20fSFrederick Mayle // func+10: sp -= 4; *sp = x ; push some x on the stack 895*9712c20fSFrederick Mayle // ... ; other code, not frame-related 896*9712c20fSFrederick Mayle // func+20: r0 = sp[16] ; restore saved r0 897*9712c20fSFrederick Mayle // func+21: sp += 20 ; pop whole stack frame 898*9712c20fSFrederick Mayle // func+22: pc = *sp; sp += 4 ; pop return address and jump to it 899*9712c20fSFrederick Mayle // 900*9712c20fSFrederick Mayle // DWARF CFI is (a very compressed representation of) a table with a 901*9712c20fSFrederick Mayle // row for each machine instruction address and a column for each 902*9712c20fSFrederick Mayle // register showing how to restore it, if possible. 903*9712c20fSFrederick Mayle // 904*9712c20fSFrederick Mayle // A special column named "CFA", for "Canonical Frame Address", tells how 905*9712c20fSFrederick Mayle // to compute the base address of the frame; registers' entries may 906*9712c20fSFrederick Mayle // refer to the CFA in describing where the registers are saved. 907*9712c20fSFrederick Mayle // 908*9712c20fSFrederick Mayle // Another special column, named "RA", represents the return address. 909*9712c20fSFrederick Mayle // 910*9712c20fSFrederick Mayle // For example, here is a complete (uncompressed) table describing the 911*9712c20fSFrederick Mayle // function above: 912*9712c20fSFrederick Mayle // 913*9712c20fSFrederick Mayle // insn cfa r0 r1 ... ra 914*9712c20fSFrederick Mayle // ======================================= 915*9712c20fSFrederick Mayle // func+0: sp cfa[0] 916*9712c20fSFrederick Mayle // func+1: sp+16 cfa[0] 917*9712c20fSFrederick Mayle // func+2: sp+16 cfa[-4] cfa[0] 918*9712c20fSFrederick Mayle // func+11: sp+20 cfa[-4] cfa[0] 919*9712c20fSFrederick Mayle // func+21: sp+20 cfa[0] 920*9712c20fSFrederick Mayle // func+22: sp cfa[0] 921*9712c20fSFrederick Mayle // 922*9712c20fSFrederick Mayle // Some things to note here: 923*9712c20fSFrederick Mayle // 924*9712c20fSFrederick Mayle // - Each row describes the state of affairs *before* executing the 925*9712c20fSFrederick Mayle // instruction at the given address. Thus, the row for func+0 926*9712c20fSFrederick Mayle // describes the state before we allocate the stack frame. In the 927*9712c20fSFrederick Mayle // next row, the formula for computing the CFA has changed, 928*9712c20fSFrederick Mayle // reflecting that allocation. 929*9712c20fSFrederick Mayle // 930*9712c20fSFrederick Mayle // - The other entries are written in terms of the CFA; this allows 931*9712c20fSFrederick Mayle // them to remain unchanged as the stack pointer gets bumped around. 932*9712c20fSFrederick Mayle // For example, the rule for recovering the return address (the "ra" 933*9712c20fSFrederick Mayle // column) remains unchanged throughout the function, even as the 934*9712c20fSFrederick Mayle // stack pointer takes on three different offsets from the return 935*9712c20fSFrederick Mayle // address. 936*9712c20fSFrederick Mayle // 937*9712c20fSFrederick Mayle // - Although we haven't shown it, most calling conventions designate 938*9712c20fSFrederick Mayle // "callee-saves" and "caller-saves" registers. The callee must 939*9712c20fSFrederick Mayle // preserve the values of callee-saves registers; if it uses them, 940*9712c20fSFrederick Mayle // it must save their original values somewhere, and restore them 941*9712c20fSFrederick Mayle // before it returns. In contrast, the callee is free to trash 942*9712c20fSFrederick Mayle // caller-saves registers; if the callee uses these, it will 943*9712c20fSFrederick Mayle // probably not bother to save them anywhere, and the CFI will 944*9712c20fSFrederick Mayle // probably mark their values as "unrecoverable". 945*9712c20fSFrederick Mayle // 946*9712c20fSFrederick Mayle // (However, since the caller cannot assume the callee was going to 947*9712c20fSFrederick Mayle // save them, caller-saves registers are probably dead in the caller 948*9712c20fSFrederick Mayle // anyway, so compilers usually don't generate CFA for caller-saves 949*9712c20fSFrederick Mayle // registers.) 950*9712c20fSFrederick Mayle // 951*9712c20fSFrederick Mayle // - Exactly where the CFA points is a matter of convention that 952*9712c20fSFrederick Mayle // depends on the architecture and ABI in use. In the example, the 953*9712c20fSFrederick Mayle // CFA is the value the stack pointer had upon entry to the 954*9712c20fSFrederick Mayle // function, pointing at the saved return address. But on the x86, 955*9712c20fSFrederick Mayle // the call frame information generated by GCC follows the 956*9712c20fSFrederick Mayle // convention that the CFA is the address *after* the saved return 957*9712c20fSFrederick Mayle // address. 958*9712c20fSFrederick Mayle // 959*9712c20fSFrederick Mayle // But by definition, the CFA remains constant throughout the 960*9712c20fSFrederick Mayle // lifetime of the frame. This makes it a useful value for other 961*9712c20fSFrederick Mayle // columns to refer to. It is also gives debuggers a useful handle 962*9712c20fSFrederick Mayle // for identifying a frame. 963*9712c20fSFrederick Mayle // 964*9712c20fSFrederick Mayle // If you look at the table above, you'll notice that a given entry is 965*9712c20fSFrederick Mayle // often the same as the one immediately above it: most instructions 966*9712c20fSFrederick Mayle // change only one or two aspects of the stack frame, if they affect 967*9712c20fSFrederick Mayle // it at all. The DWARF format takes advantage of this fact, and 968*9712c20fSFrederick Mayle // reduces the size of the data by mentioning only the addresses and 969*9712c20fSFrederick Mayle // columns at which changes take place. So for the above, DWARF CFI 970*9712c20fSFrederick Mayle // data would only actually mention the following: 971*9712c20fSFrederick Mayle // 972*9712c20fSFrederick Mayle // insn cfa r0 r1 ... ra 973*9712c20fSFrederick Mayle // ======================================= 974*9712c20fSFrederick Mayle // func+0: sp cfa[0] 975*9712c20fSFrederick Mayle // func+1: sp+16 976*9712c20fSFrederick Mayle // func+2: cfa[-4] 977*9712c20fSFrederick Mayle // func+11: sp+20 978*9712c20fSFrederick Mayle // func+21: r0 979*9712c20fSFrederick Mayle // func+22: sp 980*9712c20fSFrederick Mayle // 981*9712c20fSFrederick Mayle // In fact, this is the way the parser reports CFI to the consumer: as 982*9712c20fSFrederick Mayle // a series of statements of the form, "At address X, column Y changed 983*9712c20fSFrederick Mayle // to Z," and related conventions for describing the initial state. 984*9712c20fSFrederick Mayle // 985*9712c20fSFrederick Mayle // Naturally, it would be impractical to have to scan the entire 986*9712c20fSFrederick Mayle // program's CFI, noting changes as we go, just to recover the 987*9712c20fSFrederick Mayle // unwinding rules in effect at one particular instruction. To avoid 988*9712c20fSFrederick Mayle // this, CFI data is grouped into "entries", each of which covers a 989*9712c20fSFrederick Mayle // specified range of addresses and begins with a complete statement 990*9712c20fSFrederick Mayle // of the rules for all recoverable registers at that starting 991*9712c20fSFrederick Mayle // address. Each entry typically covers a single function. 992*9712c20fSFrederick Mayle // 993*9712c20fSFrederick Mayle // Thus, to compute the contents of a given row of the table --- that 994*9712c20fSFrederick Mayle // is, rules for recovering the CFA, RA, and registers at a given 995*9712c20fSFrederick Mayle // instruction --- the consumer should find the entry that covers that 996*9712c20fSFrederick Mayle // instruction's address, start with the initial state supplied at the 997*9712c20fSFrederick Mayle // beginning of the entry, and work forward until it has processed all 998*9712c20fSFrederick Mayle // the changes up to and including those for the present instruction. 999*9712c20fSFrederick Mayle // 1000*9712c20fSFrederick Mayle // There are seven kinds of rules that can appear in an entry of the 1001*9712c20fSFrederick Mayle // table: 1002*9712c20fSFrederick Mayle // 1003*9712c20fSFrederick Mayle // - "undefined": The given register is not preserved by the callee; 1004*9712c20fSFrederick Mayle // its value cannot be recovered. 1005*9712c20fSFrederick Mayle // 1006*9712c20fSFrederick Mayle // - "same value": This register has the same value it did in the callee. 1007*9712c20fSFrederick Mayle // 1008*9712c20fSFrederick Mayle // - offset(N): The register is saved at offset N from the CFA. 1009*9712c20fSFrederick Mayle // 1010*9712c20fSFrederick Mayle // - val_offset(N): The value the register had in the caller is the 1011*9712c20fSFrederick Mayle // CFA plus offset N. (This is usually only useful for describing 1012*9712c20fSFrederick Mayle // the stack pointer.) 1013*9712c20fSFrederick Mayle // 1014*9712c20fSFrederick Mayle // - register(R): The register's value was saved in another register R. 1015*9712c20fSFrederick Mayle // 1016*9712c20fSFrederick Mayle // - expression(E): Evaluating the DWARF expression E using the 1017*9712c20fSFrederick Mayle // current frame's registers' values yields the address at which the 1018*9712c20fSFrederick Mayle // register was saved. 1019*9712c20fSFrederick Mayle // 1020*9712c20fSFrederick Mayle // - val_expression(E): Evaluating the DWARF expression E using the 1021*9712c20fSFrederick Mayle // current frame's registers' values yields the value the register 1022*9712c20fSFrederick Mayle // had in the caller. 1023*9712c20fSFrederick Mayle 1024*9712c20fSFrederick Mayle class CallFrameInfo { 1025*9712c20fSFrederick Mayle public: 1026*9712c20fSFrederick Mayle // The different kinds of entries one finds in CFI. Used internally, 1027*9712c20fSFrederick Mayle // and for error reporting. 1028*9712c20fSFrederick Mayle enum EntryKind { kUnknown, kCIE, kFDE, kTerminator }; 1029*9712c20fSFrederick Mayle 1030*9712c20fSFrederick Mayle // The handler class to which the parser hands the parsed call frame 1031*9712c20fSFrederick Mayle // information. Defined below. 1032*9712c20fSFrederick Mayle class Handler; 1033*9712c20fSFrederick Mayle 1034*9712c20fSFrederick Mayle // A reporter class, which CallFrameInfo uses to report errors 1035*9712c20fSFrederick Mayle // encountered while parsing call frame information. Defined below. 1036*9712c20fSFrederick Mayle class Reporter; 1037*9712c20fSFrederick Mayle 1038*9712c20fSFrederick Mayle // Create a DWARF CFI parser. BUFFER points to the contents of the 1039*9712c20fSFrederick Mayle // .debug_frame section to parse; BUFFER_LENGTH is its length in bytes. 1040*9712c20fSFrederick Mayle // REPORTER is an error reporter the parser should use to report 1041*9712c20fSFrederick Mayle // problems. READER is a ByteReader instance that has the endianness and 1042*9712c20fSFrederick Mayle // address size set properly. Report the data we find to HANDLER. 1043*9712c20fSFrederick Mayle // 1044*9712c20fSFrederick Mayle // This class can also parse Linux C++ exception handling data, as found 1045*9712c20fSFrederick Mayle // in '.eh_frame' sections. This data is a variant of DWARF CFI that is 1046*9712c20fSFrederick Mayle // placed in loadable segments so that it is present in the program's 1047*9712c20fSFrederick Mayle // address space, and is interpreted by the C++ runtime to search the 1048*9712c20fSFrederick Mayle // call stack for a handler interested in the exception being thrown, 1049*9712c20fSFrederick Mayle // actually pop the frames, and find cleanup code to run. 1050*9712c20fSFrederick Mayle // 1051*9712c20fSFrederick Mayle // There are two differences between the call frame information described 1052*9712c20fSFrederick Mayle // in the DWARF standard and the exception handling data Linux places in 1053*9712c20fSFrederick Mayle // the .eh_frame section: 1054*9712c20fSFrederick Mayle // 1055*9712c20fSFrederick Mayle // - Exception handling data uses uses a different format for call frame 1056*9712c20fSFrederick Mayle // information entry headers. The distinguished CIE id, the way FDEs 1057*9712c20fSFrederick Mayle // refer to their CIEs, and the way the end of the series of entries is 1058*9712c20fSFrederick Mayle // determined are all slightly different. 1059*9712c20fSFrederick Mayle // 1060*9712c20fSFrederick Mayle // If the constructor's EH_FRAME argument is true, then the 1061*9712c20fSFrederick Mayle // CallFrameInfo parses the entry headers as Linux C++ exception 1062*9712c20fSFrederick Mayle // handling data. If EH_FRAME is false or omitted, the CallFrameInfo 1063*9712c20fSFrederick Mayle // parses standard DWARF call frame information. 1064*9712c20fSFrederick Mayle // 1065*9712c20fSFrederick Mayle // - Linux C++ exception handling data uses CIE augmentation strings 1066*9712c20fSFrederick Mayle // beginning with 'z' to specify the presence of additional data after 1067*9712c20fSFrederick Mayle // the CIE and FDE headers and special encodings used for addresses in 1068*9712c20fSFrederick Mayle // frame description entries. 1069*9712c20fSFrederick Mayle // 1070*9712c20fSFrederick Mayle // CallFrameInfo can handle 'z' augmentations in either DWARF CFI or 1071*9712c20fSFrederick Mayle // exception handling data if you have supplied READER with the base 1072*9712c20fSFrederick Mayle // addresses needed to interpret the pointer encodings that 'z' 1073*9712c20fSFrederick Mayle // augmentations can specify. See the ByteReader interface for details 1074*9712c20fSFrederick Mayle // about the base addresses. See the CallFrameInfo::Handler interface 1075*9712c20fSFrederick Mayle // for details about the additional information one might find in 1076*9712c20fSFrederick Mayle // 'z'-augmented data. 1077*9712c20fSFrederick Mayle // 1078*9712c20fSFrederick Mayle // Thus: 1079*9712c20fSFrederick Mayle // 1080*9712c20fSFrederick Mayle // - If you are parsing standard DWARF CFI, as found in a .debug_frame 1081*9712c20fSFrederick Mayle // section, you should pass false for the EH_FRAME argument, or omit 1082*9712c20fSFrederick Mayle // it, and you need not worry about providing READER with the 1083*9712c20fSFrederick Mayle // additional base addresses. 1084*9712c20fSFrederick Mayle // 1085*9712c20fSFrederick Mayle // - If you want to parse Linux C++ exception handling data from a 1086*9712c20fSFrederick Mayle // .eh_frame section, you should pass EH_FRAME as true, and call 1087*9712c20fSFrederick Mayle // READER's Set*Base member functions before calling our Start method. 1088*9712c20fSFrederick Mayle // 1089*9712c20fSFrederick Mayle // - If you want to parse DWARF CFI that uses the 'z' augmentations 1090*9712c20fSFrederick Mayle // (although I don't think any toolchain ever emits such data), you 1091*9712c20fSFrederick Mayle // could pass false for EH_FRAME, but call READER's Set*Base members. 1092*9712c20fSFrederick Mayle // 1093*9712c20fSFrederick Mayle // The extensions the Linux C++ ABI makes to DWARF for exception 1094*9712c20fSFrederick Mayle // handling are described here, rather poorly: 1095*9712c20fSFrederick Mayle // http://refspecs.linux-foundation.org/LSB_4.0.0/LSB-Core-generic/LSB-Core-generic/dwarfext.html 1096*9712c20fSFrederick Mayle // http://refspecs.linux-foundation.org/LSB_4.0.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html 1097*9712c20fSFrederick Mayle // 1098*9712c20fSFrederick Mayle // The mechanics of C++ exception handling, personality routines, 1099*9712c20fSFrederick Mayle // and language-specific data areas are described here, rather nicely: 1100*9712c20fSFrederick Mayle // http://www.codesourcery.com/public/cxx-abi/abi-eh.html 1101*9712c20fSFrederick Mayle CallFrameInfo(const uint8_t* buffer, size_t buffer_length, 1102*9712c20fSFrederick Mayle ByteReader* reader, Handler* handler, Reporter* reporter, 1103*9712c20fSFrederick Mayle bool eh_frame = false) buffer_(buffer)1104*9712c20fSFrederick Mayle : buffer_(buffer), buffer_length_(buffer_length), 1105*9712c20fSFrederick Mayle reader_(reader), handler_(handler), reporter_(reporter), 1106*9712c20fSFrederick Mayle eh_frame_(eh_frame) { } 1107*9712c20fSFrederick Mayle ~CallFrameInfo()1108*9712c20fSFrederick Mayle ~CallFrameInfo() { } 1109*9712c20fSFrederick Mayle 1110*9712c20fSFrederick Mayle // Parse the entries in BUFFER, reporting what we find to HANDLER. 1111*9712c20fSFrederick Mayle // Return true if we reach the end of the section successfully, or 1112*9712c20fSFrederick Mayle // false if we encounter an error. 1113*9712c20fSFrederick Mayle bool Start(); 1114*9712c20fSFrederick Mayle 1115*9712c20fSFrederick Mayle // Return the textual name of KIND. For error reporting. 1116*9712c20fSFrederick Mayle static const char* KindName(EntryKind kind); 1117*9712c20fSFrederick Mayle 1118*9712c20fSFrederick Mayle private: 1119*9712c20fSFrederick Mayle 1120*9712c20fSFrederick Mayle struct CIE; 1121*9712c20fSFrederick Mayle 1122*9712c20fSFrederick Mayle // A CFI entry, either an FDE or a CIE. 1123*9712c20fSFrederick Mayle struct Entry { 1124*9712c20fSFrederick Mayle // The starting offset of the entry in the section, for error 1125*9712c20fSFrederick Mayle // reporting. 1126*9712c20fSFrederick Mayle size_t offset; 1127*9712c20fSFrederick Mayle 1128*9712c20fSFrederick Mayle // The start of this entry in the buffer. 1129*9712c20fSFrederick Mayle const uint8_t* start; 1130*9712c20fSFrederick Mayle 1131*9712c20fSFrederick Mayle // Which kind of entry this is. 1132*9712c20fSFrederick Mayle // 1133*9712c20fSFrederick Mayle // We want to be able to use this for error reporting even while we're 1134*9712c20fSFrederick Mayle // in the midst of parsing. Error reporting code may assume that kind, 1135*9712c20fSFrederick Mayle // offset, and start fields are valid, although kind may be kUnknown. 1136*9712c20fSFrederick Mayle EntryKind kind; 1137*9712c20fSFrederick Mayle 1138*9712c20fSFrederick Mayle // The end of this entry's common prologue (initial length and id), and 1139*9712c20fSFrederick Mayle // the start of this entry's kind-specific fields. 1140*9712c20fSFrederick Mayle const uint8_t* fields; 1141*9712c20fSFrederick Mayle 1142*9712c20fSFrederick Mayle // The start of this entry's instructions. 1143*9712c20fSFrederick Mayle const uint8_t* instructions; 1144*9712c20fSFrederick Mayle 1145*9712c20fSFrederick Mayle // The address past the entry's last byte in the buffer. (Note that 1146*9712c20fSFrederick Mayle // since offset points to the entry's initial length field, and the 1147*9712c20fSFrederick Mayle // length field is the number of bytes after that field, this is not 1148*9712c20fSFrederick Mayle // simply buffer_ + offset + length.) 1149*9712c20fSFrederick Mayle const uint8_t* end; 1150*9712c20fSFrederick Mayle 1151*9712c20fSFrederick Mayle // For both DWARF CFI and .eh_frame sections, this is the CIE id in a 1152*9712c20fSFrederick Mayle // CIE, and the offset of the associated CIE in an FDE. 1153*9712c20fSFrederick Mayle uint64_t id; 1154*9712c20fSFrederick Mayle 1155*9712c20fSFrederick Mayle // The CIE that applies to this entry, if we've parsed it. If this is a 1156*9712c20fSFrederick Mayle // CIE, then this field points to this structure. 1157*9712c20fSFrederick Mayle CIE* cie; 1158*9712c20fSFrederick Mayle }; 1159*9712c20fSFrederick Mayle 1160*9712c20fSFrederick Mayle // A common information entry (CIE). 1161*9712c20fSFrederick Mayle struct CIE: public Entry { 1162*9712c20fSFrederick Mayle uint8_t version; // CFI data version number 1163*9712c20fSFrederick Mayle string augmentation; // vendor format extension markers 1164*9712c20fSFrederick Mayle uint64_t code_alignment_factor; // scale for code address adjustments 1165*9712c20fSFrederick Mayle int data_alignment_factor; // scale for stack pointer adjustments 1166*9712c20fSFrederick Mayle unsigned return_address_register; // which register holds the return addr 1167*9712c20fSFrederick Mayle 1168*9712c20fSFrederick Mayle // True if this CIE includes Linux C++ ABI 'z' augmentation data. 1169*9712c20fSFrederick Mayle bool has_z_augmentation; 1170*9712c20fSFrederick Mayle 1171*9712c20fSFrederick Mayle // Parsed 'z' augmentation data. These are meaningful only if 1172*9712c20fSFrederick Mayle // has_z_augmentation is true. 1173*9712c20fSFrederick Mayle bool has_z_lsda; // The 'z' augmentation included 'L'. 1174*9712c20fSFrederick Mayle bool has_z_personality; // The 'z' augmentation included 'P'. 1175*9712c20fSFrederick Mayle bool has_z_signal_frame; // The 'z' augmentation included 'S'. 1176*9712c20fSFrederick Mayle 1177*9712c20fSFrederick Mayle // If has_z_lsda is true, this is the encoding to be used for language- 1178*9712c20fSFrederick Mayle // specific data area pointers in FDEs. 1179*9712c20fSFrederick Mayle DwarfPointerEncoding lsda_encoding; 1180*9712c20fSFrederick Mayle 1181*9712c20fSFrederick Mayle // If has_z_personality is true, this is the encoding used for the 1182*9712c20fSFrederick Mayle // personality routine pointer in the augmentation data. 1183*9712c20fSFrederick Mayle DwarfPointerEncoding personality_encoding; 1184*9712c20fSFrederick Mayle 1185*9712c20fSFrederick Mayle // If has_z_personality is true, this is the address of the personality 1186*9712c20fSFrederick Mayle // routine --- or, if personality_encoding & DW_EH_PE_indirect, the 1187*9712c20fSFrederick Mayle // address where the personality routine's address is stored. 1188*9712c20fSFrederick Mayle uint64_t personality_address; 1189*9712c20fSFrederick Mayle 1190*9712c20fSFrederick Mayle // This is the encoding used for addresses in the FDE header and 1191*9712c20fSFrederick Mayle // in DW_CFA_set_loc instructions. This is always valid, whether 1192*9712c20fSFrederick Mayle // or not we saw a 'z' augmentation string; its default value is 1193*9712c20fSFrederick Mayle // DW_EH_PE_absptr, which is what normal DWARF CFI uses. 1194*9712c20fSFrederick Mayle DwarfPointerEncoding pointer_encoding; 1195*9712c20fSFrederick Mayle 1196*9712c20fSFrederick Mayle // These were only introduced in DWARF4, so will not be set in older 1197*9712c20fSFrederick Mayle // versions. 1198*9712c20fSFrederick Mayle uint8_t address_size; 1199*9712c20fSFrederick Mayle uint8_t segment_size; 1200*9712c20fSFrederick Mayle }; 1201*9712c20fSFrederick Mayle 1202*9712c20fSFrederick Mayle // A frame description entry (FDE). 1203*9712c20fSFrederick Mayle struct FDE: public Entry { 1204*9712c20fSFrederick Mayle uint64_t address; // start address of described code 1205*9712c20fSFrederick Mayle uint64_t size; // size of described code, in bytes 1206*9712c20fSFrederick Mayle 1207*9712c20fSFrederick Mayle // If cie->has_z_lsda is true, then this is the language-specific data 1208*9712c20fSFrederick Mayle // area's address --- or its address's address, if cie->lsda_encoding 1209*9712c20fSFrederick Mayle // has the DW_EH_PE_indirect bit set. 1210*9712c20fSFrederick Mayle uint64_t lsda_address; 1211*9712c20fSFrederick Mayle }; 1212*9712c20fSFrederick Mayle 1213*9712c20fSFrederick Mayle // Internal use. 1214*9712c20fSFrederick Mayle class Rule; 1215*9712c20fSFrederick Mayle class UndefinedRule; 1216*9712c20fSFrederick Mayle class SameValueRule; 1217*9712c20fSFrederick Mayle class OffsetRule; 1218*9712c20fSFrederick Mayle class ValOffsetRule; 1219*9712c20fSFrederick Mayle class RegisterRule; 1220*9712c20fSFrederick Mayle class ExpressionRule; 1221*9712c20fSFrederick Mayle class ValExpressionRule; 1222*9712c20fSFrederick Mayle class RuleMap; 1223*9712c20fSFrederick Mayle class State; 1224*9712c20fSFrederick Mayle 1225*9712c20fSFrederick Mayle // Parse the initial length and id of a CFI entry, either a CIE, an FDE, 1226*9712c20fSFrederick Mayle // or a .eh_frame end-of-data mark. CURSOR points to the beginning of the 1227*9712c20fSFrederick Mayle // data to parse. On success, populate ENTRY as appropriate, and return 1228*9712c20fSFrederick Mayle // true. On failure, report the problem, and return false. Even if we 1229*9712c20fSFrederick Mayle // return false, set ENTRY->end to the first byte after the entry if we 1230*9712c20fSFrederick Mayle // were able to figure that out, or NULL if we weren't. 1231*9712c20fSFrederick Mayle bool ReadEntryPrologue(const uint8_t* cursor, Entry* entry); 1232*9712c20fSFrederick Mayle 1233*9712c20fSFrederick Mayle // Parse the fields of a CIE after the entry prologue, including any 'z' 1234*9712c20fSFrederick Mayle // augmentation data. Assume that the 'Entry' fields of CIE are 1235*9712c20fSFrederick Mayle // populated; use CIE->fields and CIE->end as the start and limit for 1236*9712c20fSFrederick Mayle // parsing. On success, populate the rest of *CIE, and return true; on 1237*9712c20fSFrederick Mayle // failure, report the problem and return false. 1238*9712c20fSFrederick Mayle bool ReadCIEFields(CIE* cie); 1239*9712c20fSFrederick Mayle 1240*9712c20fSFrederick Mayle // Parse the fields of an FDE after the entry prologue, including any 'z' 1241*9712c20fSFrederick Mayle // augmentation data. Assume that the 'Entry' fields of *FDE are 1242*9712c20fSFrederick Mayle // initialized; use FDE->fields and FDE->end as the start and limit for 1243*9712c20fSFrederick Mayle // parsing. Assume that FDE->cie is fully initialized. On success, 1244*9712c20fSFrederick Mayle // populate the rest of *FDE, and return true; on failure, report the 1245*9712c20fSFrederick Mayle // problem and return false. 1246*9712c20fSFrederick Mayle bool ReadFDEFields(FDE* fde); 1247*9712c20fSFrederick Mayle 1248*9712c20fSFrederick Mayle // Report that ENTRY is incomplete, and return false. This is just a 1249*9712c20fSFrederick Mayle // trivial wrapper for invoking reporter_->Incomplete; it provides a 1250*9712c20fSFrederick Mayle // little brevity. 1251*9712c20fSFrederick Mayle bool ReportIncomplete(Entry* entry); 1252*9712c20fSFrederick Mayle 1253*9712c20fSFrederick Mayle // Return true if ENCODING has the DW_EH_PE_indirect bit set. IsIndirectEncoding(DwarfPointerEncoding encoding)1254*9712c20fSFrederick Mayle static bool IsIndirectEncoding(DwarfPointerEncoding encoding) { 1255*9712c20fSFrederick Mayle return encoding & DW_EH_PE_indirect; 1256*9712c20fSFrederick Mayle } 1257*9712c20fSFrederick Mayle 1258*9712c20fSFrederick Mayle // The contents of the DWARF .debug_info section we're parsing. 1259*9712c20fSFrederick Mayle const uint8_t* buffer_; 1260*9712c20fSFrederick Mayle size_t buffer_length_; 1261*9712c20fSFrederick Mayle 1262*9712c20fSFrederick Mayle // For reading multi-byte values with the appropriate endianness. 1263*9712c20fSFrederick Mayle ByteReader* reader_; 1264*9712c20fSFrederick Mayle 1265*9712c20fSFrederick Mayle // The handler to which we should report the data we find. 1266*9712c20fSFrederick Mayle Handler* handler_; 1267*9712c20fSFrederick Mayle 1268*9712c20fSFrederick Mayle // For reporting problems in the info we're parsing. 1269*9712c20fSFrederick Mayle Reporter* reporter_; 1270*9712c20fSFrederick Mayle 1271*9712c20fSFrederick Mayle // True if we are processing .eh_frame-format data. 1272*9712c20fSFrederick Mayle bool eh_frame_; 1273*9712c20fSFrederick Mayle }; 1274*9712c20fSFrederick Mayle 1275*9712c20fSFrederick Mayle // The handler class for CallFrameInfo. The a CFI parser calls the 1276*9712c20fSFrederick Mayle // member functions of a handler object to report the data it finds. 1277*9712c20fSFrederick Mayle class CallFrameInfo::Handler { 1278*9712c20fSFrederick Mayle public: 1279*9712c20fSFrederick Mayle // The pseudo-register number for the canonical frame address. 1280*9712c20fSFrederick Mayle enum { kCFARegister = -1 }; 1281*9712c20fSFrederick Mayle Handler()1282*9712c20fSFrederick Mayle Handler() { } ~Handler()1283*9712c20fSFrederick Mayle virtual ~Handler() { } 1284*9712c20fSFrederick Mayle 1285*9712c20fSFrederick Mayle // The parser has found CFI for the machine code at ADDRESS, 1286*9712c20fSFrederick Mayle // extending for LENGTH bytes. OFFSET is the offset of the frame 1287*9712c20fSFrederick Mayle // description entry in the section, for use in error messages. 1288*9712c20fSFrederick Mayle // VERSION is the version number of the CFI format. AUGMENTATION is 1289*9712c20fSFrederick Mayle // a string describing any producer-specific extensions present in 1290*9712c20fSFrederick Mayle // the data. RETURN_ADDRESS is the number of the register that holds 1291*9712c20fSFrederick Mayle // the address to which the function should return. 1292*9712c20fSFrederick Mayle // 1293*9712c20fSFrederick Mayle // Entry should return true to process this CFI, or false to skip to 1294*9712c20fSFrederick Mayle // the next entry. 1295*9712c20fSFrederick Mayle // 1296*9712c20fSFrederick Mayle // The parser invokes Entry for each Frame Description Entry (FDE) 1297*9712c20fSFrederick Mayle // it finds. The parser doesn't report Common Information Entries 1298*9712c20fSFrederick Mayle // to the handler explicitly; instead, if the handler elects to 1299*9712c20fSFrederick Mayle // process a given FDE, the parser reiterates the appropriate CIE's 1300*9712c20fSFrederick Mayle // contents at the beginning of the FDE's rules. 1301*9712c20fSFrederick Mayle virtual bool Entry(size_t offset, uint64_t address, uint64_t length, 1302*9712c20fSFrederick Mayle uint8_t version, const string& augmentation, 1303*9712c20fSFrederick Mayle unsigned return_address) = 0; 1304*9712c20fSFrederick Mayle 1305*9712c20fSFrederick Mayle // When the Entry function returns true, the parser calls these 1306*9712c20fSFrederick Mayle // handler functions repeatedly to describe the rules for recovering 1307*9712c20fSFrederick Mayle // registers at each instruction in the given range of machine code. 1308*9712c20fSFrederick Mayle // Immediately after a call to Entry, the handler should assume that 1309*9712c20fSFrederick Mayle // the rule for each callee-saves register is "unchanged" --- that 1310*9712c20fSFrederick Mayle // is, that the register still has the value it had in the caller. 1311*9712c20fSFrederick Mayle // 1312*9712c20fSFrederick Mayle // If a *Rule function returns true, we continue processing this entry's 1313*9712c20fSFrederick Mayle // instructions. If a *Rule function returns false, we stop evaluating 1314*9712c20fSFrederick Mayle // instructions, and skip to the next entry. Either way, we call End 1315*9712c20fSFrederick Mayle // before going on to the next entry. 1316*9712c20fSFrederick Mayle // 1317*9712c20fSFrederick Mayle // In all of these functions, if the REG parameter is kCFARegister, then 1318*9712c20fSFrederick Mayle // the rule describes how to find the canonical frame address. 1319*9712c20fSFrederick Mayle // kCFARegister may be passed as a BASE_REGISTER argument, meaning that 1320*9712c20fSFrederick Mayle // the canonical frame address should be used as the base address for the 1321*9712c20fSFrederick Mayle // computation. All other REG values will be positive. 1322*9712c20fSFrederick Mayle 1323*9712c20fSFrederick Mayle // At ADDRESS, register REG's value is not recoverable. 1324*9712c20fSFrederick Mayle virtual bool UndefinedRule(uint64_t address, int reg) = 0; 1325*9712c20fSFrederick Mayle 1326*9712c20fSFrederick Mayle // At ADDRESS, register REG's value is the same as that it had in 1327*9712c20fSFrederick Mayle // the caller. 1328*9712c20fSFrederick Mayle virtual bool SameValueRule(uint64_t address, int reg) = 0; 1329*9712c20fSFrederick Mayle 1330*9712c20fSFrederick Mayle // At ADDRESS, register REG has been saved at offset OFFSET from 1331*9712c20fSFrederick Mayle // BASE_REGISTER. 1332*9712c20fSFrederick Mayle virtual bool OffsetRule(uint64_t address, int reg, 1333*9712c20fSFrederick Mayle int base_register, long offset) = 0; 1334*9712c20fSFrederick Mayle 1335*9712c20fSFrederick Mayle // At ADDRESS, the caller's value of register REG is the current 1336*9712c20fSFrederick Mayle // value of BASE_REGISTER plus OFFSET. (This rule doesn't provide an 1337*9712c20fSFrederick Mayle // address at which the register's value is saved.) 1338*9712c20fSFrederick Mayle virtual bool ValOffsetRule(uint64_t address, int reg, 1339*9712c20fSFrederick Mayle int base_register, long offset) = 0; 1340*9712c20fSFrederick Mayle 1341*9712c20fSFrederick Mayle // At ADDRESS, register REG has been saved in BASE_REGISTER. This differs 1342*9712c20fSFrederick Mayle // from ValOffsetRule(ADDRESS, REG, BASE_REGISTER, 0), in that 1343*9712c20fSFrederick Mayle // BASE_REGISTER is the "home" for REG's saved value: if you want to 1344*9712c20fSFrederick Mayle // assign to a variable whose home is REG in the calling frame, you 1345*9712c20fSFrederick Mayle // should put the value in BASE_REGISTER. 1346*9712c20fSFrederick Mayle virtual bool RegisterRule(uint64_t address, int reg, int base_register) = 0; 1347*9712c20fSFrederick Mayle 1348*9712c20fSFrederick Mayle // At ADDRESS, the DWARF expression EXPRESSION yields the address at 1349*9712c20fSFrederick Mayle // which REG was saved. 1350*9712c20fSFrederick Mayle virtual bool ExpressionRule(uint64_t address, int reg, 1351*9712c20fSFrederick Mayle const string& expression) = 0; 1352*9712c20fSFrederick Mayle 1353*9712c20fSFrederick Mayle // At ADDRESS, the DWARF expression EXPRESSION yields the caller's 1354*9712c20fSFrederick Mayle // value for REG. (This rule doesn't provide an address at which the 1355*9712c20fSFrederick Mayle // register's value is saved.) 1356*9712c20fSFrederick Mayle virtual bool ValExpressionRule(uint64_t address, int reg, 1357*9712c20fSFrederick Mayle const string& expression) = 0; 1358*9712c20fSFrederick Mayle 1359*9712c20fSFrederick Mayle // Indicate that the rules for the address range reported by the 1360*9712c20fSFrederick Mayle // last call to Entry are complete. End should return true if 1361*9712c20fSFrederick Mayle // everything is okay, or false if an error has occurred and parsing 1362*9712c20fSFrederick Mayle // should stop. 1363*9712c20fSFrederick Mayle virtual bool End() = 0; 1364*9712c20fSFrederick Mayle 1365*9712c20fSFrederick Mayle // The target architecture for the data. 1366*9712c20fSFrederick Mayle virtual string Architecture() = 0; 1367*9712c20fSFrederick Mayle 1368*9712c20fSFrederick Mayle // Handler functions for Linux C++ exception handling data. These are 1369*9712c20fSFrederick Mayle // only called if the data includes 'z' augmentation strings. 1370*9712c20fSFrederick Mayle 1371*9712c20fSFrederick Mayle // The Linux C++ ABI uses an extension of the DWARF CFI format to 1372*9712c20fSFrederick Mayle // walk the stack to propagate exceptions from the throw to the 1373*9712c20fSFrederick Mayle // appropriate catch, and do the appropriate cleanups along the way. 1374*9712c20fSFrederick Mayle // CFI entries used for exception handling have two additional data 1375*9712c20fSFrederick Mayle // associated with them: 1376*9712c20fSFrederick Mayle // 1377*9712c20fSFrederick Mayle // - The "language-specific data area" describes which exception 1378*9712c20fSFrederick Mayle // types the function has 'catch' clauses for, and indicates how 1379*9712c20fSFrederick Mayle // to go about re-entering the function at the appropriate catch 1380*9712c20fSFrederick Mayle // clause. If the exception is not caught, it describes the 1381*9712c20fSFrederick Mayle // destructors that must run before the frame is popped. 1382*9712c20fSFrederick Mayle // 1383*9712c20fSFrederick Mayle // - The "personality routine" is responsible for interpreting the 1384*9712c20fSFrederick Mayle // language-specific data area's contents, and deciding whether 1385*9712c20fSFrederick Mayle // the exception should continue to propagate down the stack, 1386*9712c20fSFrederick Mayle // perhaps after doing some cleanup for this frame, or whether the 1387*9712c20fSFrederick Mayle // exception will be caught here. 1388*9712c20fSFrederick Mayle // 1389*9712c20fSFrederick Mayle // In principle, the language-specific data area is opaque to 1390*9712c20fSFrederick Mayle // everybody but the personality routine. In practice, these values 1391*9712c20fSFrederick Mayle // may be useful or interesting to readers with extra context, and 1392*9712c20fSFrederick Mayle // we have to at least skip them anyway, so we might as well report 1393*9712c20fSFrederick Mayle // them to the handler. 1394*9712c20fSFrederick Mayle 1395*9712c20fSFrederick Mayle // This entry's exception handling personality routine's address is 1396*9712c20fSFrederick Mayle // ADDRESS. If INDIRECT is true, then ADDRESS is the address at 1397*9712c20fSFrederick Mayle // which the routine's address is stored. The default definition for 1398*9712c20fSFrederick Mayle // this handler function simply returns true, allowing parsing of 1399*9712c20fSFrederick Mayle // the entry to continue. PersonalityRoutine(uint64_t address,bool indirect)1400*9712c20fSFrederick Mayle virtual bool PersonalityRoutine(uint64_t address, bool indirect) { 1401*9712c20fSFrederick Mayle return true; 1402*9712c20fSFrederick Mayle } 1403*9712c20fSFrederick Mayle 1404*9712c20fSFrederick Mayle // This entry's language-specific data area (LSDA) is located at 1405*9712c20fSFrederick Mayle // ADDRESS. If INDIRECT is true, then ADDRESS is the address at 1406*9712c20fSFrederick Mayle // which the area's address is stored. The default definition for 1407*9712c20fSFrederick Mayle // this handler function simply returns true, allowing parsing of 1408*9712c20fSFrederick Mayle // the entry to continue. LanguageSpecificDataArea(uint64_t address,bool indirect)1409*9712c20fSFrederick Mayle virtual bool LanguageSpecificDataArea(uint64_t address, bool indirect) { 1410*9712c20fSFrederick Mayle return true; 1411*9712c20fSFrederick Mayle } 1412*9712c20fSFrederick Mayle 1413*9712c20fSFrederick Mayle // This entry describes a signal trampoline --- this frame is the 1414*9712c20fSFrederick Mayle // caller of a signal handler. The default definition for this 1415*9712c20fSFrederick Mayle // handler function simply returns true, allowing parsing of the 1416*9712c20fSFrederick Mayle // entry to continue. 1417*9712c20fSFrederick Mayle // 1418*9712c20fSFrederick Mayle // The best description of the rationale for and meaning of signal 1419*9712c20fSFrederick Mayle // trampoline CFI entries seems to be in the GCC bug database: 1420*9712c20fSFrederick Mayle // http://gcc.gnu.org/bugzilla/show_bug.cgi?id=26208 SignalHandler()1421*9712c20fSFrederick Mayle virtual bool SignalHandler() { return true; } 1422*9712c20fSFrederick Mayle }; 1423*9712c20fSFrederick Mayle 1424*9712c20fSFrederick Mayle // The CallFrameInfo class makes calls on an instance of this class to 1425*9712c20fSFrederick Mayle // report errors or warn about problems in the data it is parsing. The 1426*9712c20fSFrederick Mayle // default definitions of these methods print a message to stderr, but 1427*9712c20fSFrederick Mayle // you can make a derived class that overrides them. 1428*9712c20fSFrederick Mayle class CallFrameInfo::Reporter { 1429*9712c20fSFrederick Mayle public: 1430*9712c20fSFrederick Mayle // Create an error reporter which attributes troubles to the section 1431*9712c20fSFrederick Mayle // named SECTION in FILENAME. 1432*9712c20fSFrederick Mayle // 1433*9712c20fSFrederick Mayle // Normally SECTION would be .debug_frame, but the Mac puts CFI data 1434*9712c20fSFrederick Mayle // in a Mach-O section named __debug_frame. If we support 1435*9712c20fSFrederick Mayle // Linux-style exception handling data, we could be reading an 1436*9712c20fSFrederick Mayle // .eh_frame section. 1437*9712c20fSFrederick Mayle Reporter(const string& filename, 1438*9712c20fSFrederick Mayle const string& section = ".debug_frame") filename_(filename)1439*9712c20fSFrederick Mayle : filename_(filename), section_(section) { } ~Reporter()1440*9712c20fSFrederick Mayle virtual ~Reporter() { } 1441*9712c20fSFrederick Mayle 1442*9712c20fSFrederick Mayle // The CFI entry at OFFSET ends too early to be well-formed. KIND 1443*9712c20fSFrederick Mayle // indicates what kind of entry it is; KIND can be kUnknown if we 1444*9712c20fSFrederick Mayle // haven't parsed enough of the entry to tell yet. 1445*9712c20fSFrederick Mayle virtual void Incomplete(uint64_t offset, CallFrameInfo::EntryKind kind); 1446*9712c20fSFrederick Mayle 1447*9712c20fSFrederick Mayle // The .eh_frame data has a four-byte zero at OFFSET where the next 1448*9712c20fSFrederick Mayle // entry's length would be; this is a terminator. However, the buffer 1449*9712c20fSFrederick Mayle // length as given to the CallFrameInfo constructor says there should be 1450*9712c20fSFrederick Mayle // more data. 1451*9712c20fSFrederick Mayle virtual void EarlyEHTerminator(uint64_t offset); 1452*9712c20fSFrederick Mayle 1453*9712c20fSFrederick Mayle // The FDE at OFFSET refers to the CIE at CIE_OFFSET, but the 1454*9712c20fSFrederick Mayle // section is not that large. 1455*9712c20fSFrederick Mayle virtual void CIEPointerOutOfRange(uint64_t offset, uint64_t cie_offset); 1456*9712c20fSFrederick Mayle 1457*9712c20fSFrederick Mayle // The FDE at OFFSET refers to the CIE at CIE_OFFSET, but the entry 1458*9712c20fSFrederick Mayle // there is not a CIE. 1459*9712c20fSFrederick Mayle virtual void BadCIEId(uint64_t offset, uint64_t cie_offset); 1460*9712c20fSFrederick Mayle 1461*9712c20fSFrederick Mayle // The FDE at OFFSET refers to a CIE with an address size we don't know how 1462*9712c20fSFrederick Mayle // to handle. 1463*9712c20fSFrederick Mayle virtual void UnexpectedAddressSize(uint64_t offset, uint8_t address_size); 1464*9712c20fSFrederick Mayle 1465*9712c20fSFrederick Mayle // The FDE at OFFSET refers to a CIE with an segment descriptor size we 1466*9712c20fSFrederick Mayle // don't know how to handle. 1467*9712c20fSFrederick Mayle virtual void UnexpectedSegmentSize(uint64_t offset, uint8_t segment_size); 1468*9712c20fSFrederick Mayle 1469*9712c20fSFrederick Mayle // The FDE at OFFSET refers to a CIE with version number VERSION, 1470*9712c20fSFrederick Mayle // which we don't recognize. We cannot parse DWARF CFI if it uses 1471*9712c20fSFrederick Mayle // a version number we don't recognize. 1472*9712c20fSFrederick Mayle virtual void UnrecognizedVersion(uint64_t offset, int version); 1473*9712c20fSFrederick Mayle 1474*9712c20fSFrederick Mayle // The FDE at OFFSET refers to a CIE with augmentation AUGMENTATION, 1475*9712c20fSFrederick Mayle // which we don't recognize. We cannot parse DWARF CFI if it uses 1476*9712c20fSFrederick Mayle // augmentations we don't recognize. 1477*9712c20fSFrederick Mayle virtual void UnrecognizedAugmentation(uint64_t offset, 1478*9712c20fSFrederick Mayle const string& augmentation); 1479*9712c20fSFrederick Mayle 1480*9712c20fSFrederick Mayle // The pointer encoding ENCODING, specified by the CIE at OFFSET, is not 1481*9712c20fSFrederick Mayle // a valid encoding. 1482*9712c20fSFrederick Mayle virtual void InvalidPointerEncoding(uint64_t offset, uint8_t encoding); 1483*9712c20fSFrederick Mayle 1484*9712c20fSFrederick Mayle // The pointer encoding ENCODING, specified by the CIE at OFFSET, depends 1485*9712c20fSFrederick Mayle // on a base address which has not been supplied. 1486*9712c20fSFrederick Mayle virtual void UnusablePointerEncoding(uint64_t offset, uint8_t encoding); 1487*9712c20fSFrederick Mayle 1488*9712c20fSFrederick Mayle // The CIE at OFFSET contains a DW_CFA_restore instruction at 1489*9712c20fSFrederick Mayle // INSN_OFFSET, which may not appear in a CIE. 1490*9712c20fSFrederick Mayle virtual void RestoreInCIE(uint64_t offset, uint64_t insn_offset); 1491*9712c20fSFrederick Mayle 1492*9712c20fSFrederick Mayle // The entry at OFFSET, of kind KIND, has an unrecognized 1493*9712c20fSFrederick Mayle // instruction at INSN_OFFSET. 1494*9712c20fSFrederick Mayle virtual void BadInstruction(uint64_t offset, CallFrameInfo::EntryKind kind, 1495*9712c20fSFrederick Mayle uint64_t insn_offset); 1496*9712c20fSFrederick Mayle 1497*9712c20fSFrederick Mayle // The instruction at INSN_OFFSET in the entry at OFFSET, of kind 1498*9712c20fSFrederick Mayle // KIND, establishes a rule that cites the CFA, but we have not 1499*9712c20fSFrederick Mayle // established a CFA rule yet. 1500*9712c20fSFrederick Mayle virtual void NoCFARule(uint64_t offset, CallFrameInfo::EntryKind kind, 1501*9712c20fSFrederick Mayle uint64_t insn_offset); 1502*9712c20fSFrederick Mayle 1503*9712c20fSFrederick Mayle // The instruction at INSN_OFFSET in the entry at OFFSET, of kind 1504*9712c20fSFrederick Mayle // KIND, is a DW_CFA_restore_state instruction, but the stack of 1505*9712c20fSFrederick Mayle // saved states is empty. 1506*9712c20fSFrederick Mayle virtual void EmptyStateStack(uint64_t offset, CallFrameInfo::EntryKind kind, 1507*9712c20fSFrederick Mayle uint64_t insn_offset); 1508*9712c20fSFrederick Mayle 1509*9712c20fSFrederick Mayle // The DW_CFA_remember_state instruction at INSN_OFFSET in the entry 1510*9712c20fSFrederick Mayle // at OFFSET, of kind KIND, would restore a state that has no CFA 1511*9712c20fSFrederick Mayle // rule, whereas the current state does have a CFA rule. This is 1512*9712c20fSFrederick Mayle // bogus input, which the CallFrameInfo::Handler interface doesn't 1513*9712c20fSFrederick Mayle // (and shouldn't) have any way to report. 1514*9712c20fSFrederick Mayle virtual void ClearingCFARule(uint64_t offset, CallFrameInfo::EntryKind kind, 1515*9712c20fSFrederick Mayle uint64_t insn_offset); 1516*9712c20fSFrederick Mayle 1517*9712c20fSFrederick Mayle protected: 1518*9712c20fSFrederick Mayle // The name of the file whose CFI we're reading. 1519*9712c20fSFrederick Mayle string filename_; 1520*9712c20fSFrederick Mayle 1521*9712c20fSFrederick Mayle // The name of the CFI section in that file. 1522*9712c20fSFrederick Mayle string section_; 1523*9712c20fSFrederick Mayle }; 1524*9712c20fSFrederick Mayle 1525*9712c20fSFrederick Mayle } // namespace google_breakpad 1526*9712c20fSFrederick Mayle 1527*9712c20fSFrederick Mayle #endif // UTIL_DEBUGINFO_DWARF2READER_H__ 1528