1*9712c20fSFrederick Mayle // -*- mode: c++ -*- 2*9712c20fSFrederick Mayle 3*9712c20fSFrederick Mayle // Copyright 2011 Google LLC 4*9712c20fSFrederick Mayle // 5*9712c20fSFrederick Mayle // Redistribution and use in source and binary forms, with or without 6*9712c20fSFrederick Mayle // modification, are permitted provided that the following conditions are 7*9712c20fSFrederick Mayle // met: 8*9712c20fSFrederick Mayle // 9*9712c20fSFrederick Mayle // * Redistributions of source code must retain the above copyright 10*9712c20fSFrederick Mayle // notice, this list of conditions and the following disclaimer. 11*9712c20fSFrederick Mayle // * Redistributions in binary form must reproduce the above 12*9712c20fSFrederick Mayle // copyright notice, this list of conditions and the following disclaimer 13*9712c20fSFrederick Mayle // in the documentation and/or other materials provided with the 14*9712c20fSFrederick Mayle // distribution. 15*9712c20fSFrederick Mayle // * Neither the name of Google LLC nor the names of its 16*9712c20fSFrederick Mayle // contributors may be used to endorse or promote products derived from 17*9712c20fSFrederick Mayle // this software without specific prior written permission. 18*9712c20fSFrederick Mayle // 19*9712c20fSFrederick Mayle // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20*9712c20fSFrederick Mayle // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21*9712c20fSFrederick Mayle // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22*9712c20fSFrederick Mayle // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23*9712c20fSFrederick Mayle // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24*9712c20fSFrederick Mayle // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25*9712c20fSFrederick Mayle // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26*9712c20fSFrederick Mayle // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27*9712c20fSFrederick Mayle // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28*9712c20fSFrederick Mayle // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29*9712c20fSFrederick Mayle // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30*9712c20fSFrederick Mayle 31*9712c20fSFrederick Mayle // Author: Jim Blandy <[email protected]> <[email protected]> 32*9712c20fSFrederick Mayle 33*9712c20fSFrederick Mayle // dump_syms.h: Declaration of google_breakpad::DumpSymbols, a class for 34*9712c20fSFrederick Mayle // reading debugging information from Mach-O files and writing it out as a 35*9712c20fSFrederick Mayle // Breakpad symbol file. 36*9712c20fSFrederick Mayle 37*9712c20fSFrederick Mayle #include <mach-o/loader.h> 38*9712c20fSFrederick Mayle #include <stdio.h> 39*9712c20fSFrederick Mayle #include <stdlib.h> 40*9712c20fSFrederick Mayle 41*9712c20fSFrederick Mayle #include <ostream> 42*9712c20fSFrederick Mayle #include <string> 43*9712c20fSFrederick Mayle #include <vector> 44*9712c20fSFrederick Mayle 45*9712c20fSFrederick Mayle #include "common/byte_cursor.h" 46*9712c20fSFrederick Mayle #include "common/dwarf/dwarf2reader.h" 47*9712c20fSFrederick Mayle #include "common/mac/arch_utilities.h" 48*9712c20fSFrederick Mayle #include "common/mac/macho_reader.h" 49*9712c20fSFrederick Mayle #include "common/mac/super_fat_arch.h" 50*9712c20fSFrederick Mayle #include "common/module.h" 51*9712c20fSFrederick Mayle #include "common/scoped_ptr.h" 52*9712c20fSFrederick Mayle #include "common/symbol_data.h" 53*9712c20fSFrederick Mayle 54*9712c20fSFrederick Mayle namespace google_breakpad { 55*9712c20fSFrederick Mayle 56*9712c20fSFrederick Mayle class DumpSymbols { 57*9712c20fSFrederick Mayle public: 58*9712c20fSFrederick Mayle DumpSymbols(SymbolData symbol_data, 59*9712c20fSFrederick Mayle bool handle_inter_cu_refs, 60*9712c20fSFrederick Mayle bool enable_multiple = false, 61*9712c20fSFrederick Mayle const std::string& module_name = "", 62*9712c20fSFrederick Mayle bool prefer_extern_name = false) symbol_data_(symbol_data)63*9712c20fSFrederick Mayle : symbol_data_(symbol_data), 64*9712c20fSFrederick Mayle handle_inter_cu_refs_(handle_inter_cu_refs), 65*9712c20fSFrederick Mayle object_filename_(), 66*9712c20fSFrederick Mayle contents_(), 67*9712c20fSFrederick Mayle size_(0), 68*9712c20fSFrederick Mayle from_disk_(false), 69*9712c20fSFrederick Mayle object_files_(), 70*9712c20fSFrederick Mayle selected_object_file_(), 71*9712c20fSFrederick Mayle selected_object_name_(), 72*9712c20fSFrederick Mayle enable_multiple_(enable_multiple), 73*9712c20fSFrederick Mayle module_name_(module_name), 74*9712c20fSFrederick Mayle prefer_extern_name_(prefer_extern_name) {} 75*9712c20fSFrederick Mayle ~DumpSymbols() = default; 76*9712c20fSFrederick Mayle 77*9712c20fSFrederick Mayle // Prepare to read debugging information from |filename|. |filename| may be 78*9712c20fSFrederick Mayle // the name of a fat file, a Mach-O file, or a dSYM bundle containing either 79*9712c20fSFrederick Mayle // of the above. 80*9712c20fSFrederick Mayle // 81*9712c20fSFrederick Mayle // If |module_name_| is empty, uses the basename of |filename| as the module 82*9712c20fSFrederick Mayle // name. Otherwise, uses |module_name_| as the module name. 83*9712c20fSFrederick Mayle // 84*9712c20fSFrederick Mayle // On success, return true; if there is a problem reading 85*9712c20fSFrederick Mayle // |filename|, report it and return false. 86*9712c20fSFrederick Mayle bool Read(const std::string& filename); 87*9712c20fSFrederick Mayle 88*9712c20fSFrederick Mayle // Prepare to read debugging information from |contents|. |contents| is 89*9712c20fSFrederick Mayle // expected to be the data obtained from reading a fat file, or a Mach-O file. 90*9712c20fSFrederick Mayle // |filename| is used to determine the object filename in the generated 91*9712c20fSFrederick Mayle // output; there will not be an attempt to open this file as the data 92*9712c20fSFrederick Mayle // is already expected to be in memory. On success, return true; if there is a 93*9712c20fSFrederick Mayle // problem reading |contents|, report it and return false. 94*9712c20fSFrederick Mayle bool ReadData(uint8_t* contents, size_t size, const std::string& filename); 95*9712c20fSFrederick Mayle 96*9712c20fSFrederick Mayle // If this dumper's file includes an object file for `info`, then select that 97*9712c20fSFrederick Mayle // object file for dumping, and return true. Otherwise, return false, and 98*9712c20fSFrederick Mayle // leave this dumper's selected architecture unchanged. 99*9712c20fSFrederick Mayle // 100*9712c20fSFrederick Mayle // By default, if this dumper's file contains only one object file, then 101*9712c20fSFrederick Mayle // the dumper will dump those symbols; and if it contains more than one 102*9712c20fSFrederick Mayle // object file, then the dumper will dump the object file whose 103*9712c20fSFrederick Mayle // architecture matches that of this dumper program. 104*9712c20fSFrederick Mayle bool SetArchitecture(const ArchInfo& info); 105*9712c20fSFrederick Mayle 106*9712c20fSFrederick Mayle // Return a pointer to an array of SuperFatArch structures describing the 107*9712c20fSFrederick Mayle // object files contained in this dumper's file. Set *|count| to the number 108*9712c20fSFrederick Mayle // of elements in the array. The returned array is owned by this DumpSymbols 109*9712c20fSFrederick Mayle // instance. 110*9712c20fSFrederick Mayle // 111*9712c20fSFrederick Mayle // If there are no available architectures, this function 112*9712c20fSFrederick Mayle // may return NULL. AvailableArchitectures(size_t * count)113*9712c20fSFrederick Mayle const SuperFatArch* AvailableArchitectures(size_t* count) { 114*9712c20fSFrederick Mayle *count = object_files_.size(); 115*9712c20fSFrederick Mayle if (object_files_.size() > 0) 116*9712c20fSFrederick Mayle return &object_files_[0]; 117*9712c20fSFrederick Mayle return NULL; 118*9712c20fSFrederick Mayle } 119*9712c20fSFrederick Mayle 120*9712c20fSFrederick Mayle // Read the selected object file's debugging information, and write out the 121*9712c20fSFrederick Mayle // header only to |stream|. Return true on success; if an error occurs, report 122*9712c20fSFrederick Mayle // it and return false. 123*9712c20fSFrederick Mayle bool WriteSymbolFileHeader(std::ostream& stream); 124*9712c20fSFrederick Mayle 125*9712c20fSFrederick Mayle // Read the selected object file's debugging information and store it in 126*9712c20fSFrederick Mayle // `module`. The caller owns the resulting module object and must delete 127*9712c20fSFrederick Mayle // it when finished. 128*9712c20fSFrederick Mayle bool ReadSymbolData(Module** module); 129*9712c20fSFrederick Mayle 130*9712c20fSFrederick Mayle // Return an identifier string for the file this DumpSymbols is dumping. 131*9712c20fSFrederick Mayle std::string Identifier(); 132*9712c20fSFrederick Mayle 133*9712c20fSFrederick Mayle private: 134*9712c20fSFrederick Mayle // Used internally. 135*9712c20fSFrederick Mayle class DumperLineToModule; 136*9712c20fSFrederick Mayle class DumperRangesHandler; 137*9712c20fSFrederick Mayle class LoadCommandDumper; 138*9712c20fSFrederick Mayle 139*9712c20fSFrederick Mayle // This method behaves similarly to NXFindBestFatArch, but it supports 140*9712c20fSFrederick Mayle // SuperFatArch. 141*9712c20fSFrederick Mayle SuperFatArch* FindBestMatchForArchitecture( 142*9712c20fSFrederick Mayle cpu_type_t cpu_type, cpu_subtype_t cpu_subtype); 143*9712c20fSFrederick Mayle 144*9712c20fSFrederick Mayle // Creates an empty module object. 145*9712c20fSFrederick Mayle bool CreateEmptyModule(scoped_ptr<Module>& module); 146*9712c20fSFrederick Mayle 147*9712c20fSFrederick Mayle // Process the split dwarf file referenced by reader. 148*9712c20fSFrederick Mayle void StartProcessSplitDwarf(google_breakpad::CompilationUnit* reader, 149*9712c20fSFrederick Mayle Module* module, 150*9712c20fSFrederick Mayle google_breakpad::Endianness endianness, 151*9712c20fSFrederick Mayle bool handle_inter_cu_refs, 152*9712c20fSFrederick Mayle bool handle_inline) const; 153*9712c20fSFrederick Mayle 154*9712c20fSFrederick Mayle // Read debugging information from |dwarf_sections|, which was taken from 155*9712c20fSFrederick Mayle // |macho_reader|, and add it to |module|. 156*9712c20fSFrederick Mayle void ReadDwarf(google_breakpad::Module* module, 157*9712c20fSFrederick Mayle const mach_o::Reader& macho_reader, 158*9712c20fSFrederick Mayle const mach_o::SectionMap& dwarf_sections, 159*9712c20fSFrederick Mayle bool handle_inter_cu_refs) const; 160*9712c20fSFrederick Mayle 161*9712c20fSFrederick Mayle // Read DWARF CFI or .eh_frame data from |section|, belonging to 162*9712c20fSFrederick Mayle // |macho_reader|, and record it in |module|. If |eh_frame| is true, 163*9712c20fSFrederick Mayle // then the data is .eh_frame-format data; otherwise, it is standard DWARF 164*9712c20fSFrederick Mayle // .debug_frame data. On success, return true; on failure, report 165*9712c20fSFrederick Mayle // the problem and return false. 166*9712c20fSFrederick Mayle bool ReadCFI(google_breakpad::Module* module, 167*9712c20fSFrederick Mayle const mach_o::Reader& macho_reader, 168*9712c20fSFrederick Mayle const mach_o::Section& section, 169*9712c20fSFrederick Mayle bool eh_frame) const; 170*9712c20fSFrederick Mayle 171*9712c20fSFrederick Mayle // The selection of what type of symbol data to read/write. 172*9712c20fSFrederick Mayle const SymbolData symbol_data_; 173*9712c20fSFrederick Mayle 174*9712c20fSFrederick Mayle // Whether to handle references between compilation units. 175*9712c20fSFrederick Mayle const bool handle_inter_cu_refs_; 176*9712c20fSFrederick Mayle 177*9712c20fSFrederick Mayle // The name of the file this DumpSymbols will actually read debugging 178*9712c20fSFrederick Mayle // information from. If the filename passed to Read refers to a dSYM bundle, 179*9712c20fSFrederick Mayle // then this is the resource file within that bundle. 180*9712c20fSFrederick Mayle std::string object_filename_; 181*9712c20fSFrederick Mayle 182*9712c20fSFrederick Mayle // The complete contents of object_filename_, mapped into memory. 183*9712c20fSFrederick Mayle scoped_array<uint8_t> contents_; 184*9712c20fSFrederick Mayle 185*9712c20fSFrederick Mayle // The size of contents_. 186*9712c20fSFrederick Mayle size_t size_; 187*9712c20fSFrederick Mayle 188*9712c20fSFrederick Mayle // Indicates which entry point to DumpSymbols was used, i.e. Read vs ReadData. 189*9712c20fSFrederick Mayle // This is used to indicate that downstream code paths can/should also read 190*9712c20fSFrederick Mayle // from disk or not. 191*9712c20fSFrederick Mayle bool from_disk_; 192*9712c20fSFrederick Mayle 193*9712c20fSFrederick Mayle // A vector of SuperFatArch structures describing the object files 194*9712c20fSFrederick Mayle // object_filename_ contains. If object_filename_ refers to a fat binary, 195*9712c20fSFrederick Mayle // this may have more than one element; if it refers to a Mach-O file, this 196*9712c20fSFrederick Mayle // has exactly one element. 197*9712c20fSFrederick Mayle vector<SuperFatArch> object_files_; 198*9712c20fSFrederick Mayle 199*9712c20fSFrederick Mayle // The object file in object_files_ selected to dump, or NULL if 200*9712c20fSFrederick Mayle // SetArchitecture hasn't been called yet. 201*9712c20fSFrederick Mayle const SuperFatArch* selected_object_file_; 202*9712c20fSFrederick Mayle 203*9712c20fSFrederick Mayle // A string that identifies the selected object file, for use in error 204*9712c20fSFrederick Mayle // messages. This is usually object_filename_, but if that refers to a 205*9712c20fSFrederick Mayle // fat binary, it includes an indication of the particular architecture 206*9712c20fSFrederick Mayle // within that binary. 207*9712c20fSFrederick Mayle string selected_object_name_; 208*9712c20fSFrederick Mayle 209*9712c20fSFrederick Mayle // Whether symbols sharing an address should be collapsed into a single entry 210*9712c20fSFrederick Mayle // and marked with an `m` in the output. 211*9712c20fSFrederick Mayle // See: https://crbug.com/google-breakpad/751 and docs at 212*9712c20fSFrederick Mayle // docs/symbol_files.md#records-3 213*9712c20fSFrederick Mayle bool enable_multiple_; 214*9712c20fSFrederick Mayle 215*9712c20fSFrederick Mayle // If non-empty, used as the module name. Otherwise, the basename of 216*9712c20fSFrederick Mayle // |object_filename_| is used as the module name. 217*9712c20fSFrederick Mayle const std::string module_name_; 218*9712c20fSFrederick Mayle 219*9712c20fSFrederick Mayle // If a Function and an Extern share the same address but have a different 220*9712c20fSFrederick Mayle // name, prefer the name of the Extern. 221*9712c20fSFrederick Mayle // 222*9712c20fSFrederick Mayle // Use this when dumping Mach-O .dSYMs built with -gmlt (Minimum Line Tables), 223*9712c20fSFrederick Mayle // as the Function's fully-qualified name will only be present in the STABS 224*9712c20fSFrederick Mayle // (which are placed in the Extern), not in the DWARF symbols (which are 225*9712c20fSFrederick Mayle // placed in the Function). 226*9712c20fSFrederick Mayle bool prefer_extern_name_; 227*9712c20fSFrederick Mayle }; 228*9712c20fSFrederick Mayle 229*9712c20fSFrederick Mayle } // namespace google_breakpad 230