xref: /aosp_15_r20/external/google-breakpad/src/common/mac/dump_syms.h (revision 9712c20fc9bbfbac4935993a2ca0b3958c5adad2)
1*9712c20fSFrederick Mayle // -*- mode: c++ -*-
2*9712c20fSFrederick Mayle 
3*9712c20fSFrederick Mayle // Copyright 2011 Google LLC
4*9712c20fSFrederick Mayle //
5*9712c20fSFrederick Mayle // Redistribution and use in source and binary forms, with or without
6*9712c20fSFrederick Mayle // modification, are permitted provided that the following conditions are
7*9712c20fSFrederick Mayle // met:
8*9712c20fSFrederick Mayle //
9*9712c20fSFrederick Mayle //     * Redistributions of source code must retain the above copyright
10*9712c20fSFrederick Mayle // notice, this list of conditions and the following disclaimer.
11*9712c20fSFrederick Mayle //     * Redistributions in binary form must reproduce the above
12*9712c20fSFrederick Mayle // copyright notice, this list of conditions and the following disclaimer
13*9712c20fSFrederick Mayle // in the documentation and/or other materials provided with the
14*9712c20fSFrederick Mayle // distribution.
15*9712c20fSFrederick Mayle //     * Neither the name of Google LLC nor the names of its
16*9712c20fSFrederick Mayle // contributors may be used to endorse or promote products derived from
17*9712c20fSFrederick Mayle // this software without specific prior written permission.
18*9712c20fSFrederick Mayle //
19*9712c20fSFrederick Mayle // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20*9712c20fSFrederick Mayle // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21*9712c20fSFrederick Mayle // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22*9712c20fSFrederick Mayle // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23*9712c20fSFrederick Mayle // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24*9712c20fSFrederick Mayle // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25*9712c20fSFrederick Mayle // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26*9712c20fSFrederick Mayle // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27*9712c20fSFrederick Mayle // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28*9712c20fSFrederick Mayle // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29*9712c20fSFrederick Mayle // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30*9712c20fSFrederick Mayle 
31*9712c20fSFrederick Mayle // Author: Jim Blandy <[email protected]> <[email protected]>
32*9712c20fSFrederick Mayle 
33*9712c20fSFrederick Mayle // dump_syms.h: Declaration of google_breakpad::DumpSymbols, a class for
34*9712c20fSFrederick Mayle // reading debugging information from Mach-O files and writing it out as a
35*9712c20fSFrederick Mayle // Breakpad symbol file.
36*9712c20fSFrederick Mayle 
37*9712c20fSFrederick Mayle #include <mach-o/loader.h>
38*9712c20fSFrederick Mayle #include <stdio.h>
39*9712c20fSFrederick Mayle #include <stdlib.h>
40*9712c20fSFrederick Mayle 
41*9712c20fSFrederick Mayle #include <ostream>
42*9712c20fSFrederick Mayle #include <string>
43*9712c20fSFrederick Mayle #include <vector>
44*9712c20fSFrederick Mayle 
45*9712c20fSFrederick Mayle #include "common/byte_cursor.h"
46*9712c20fSFrederick Mayle #include "common/dwarf/dwarf2reader.h"
47*9712c20fSFrederick Mayle #include "common/mac/arch_utilities.h"
48*9712c20fSFrederick Mayle #include "common/mac/macho_reader.h"
49*9712c20fSFrederick Mayle #include "common/mac/super_fat_arch.h"
50*9712c20fSFrederick Mayle #include "common/module.h"
51*9712c20fSFrederick Mayle #include "common/scoped_ptr.h"
52*9712c20fSFrederick Mayle #include "common/symbol_data.h"
53*9712c20fSFrederick Mayle 
54*9712c20fSFrederick Mayle namespace google_breakpad {
55*9712c20fSFrederick Mayle 
56*9712c20fSFrederick Mayle class DumpSymbols {
57*9712c20fSFrederick Mayle  public:
58*9712c20fSFrederick Mayle   DumpSymbols(SymbolData symbol_data,
59*9712c20fSFrederick Mayle               bool handle_inter_cu_refs,
60*9712c20fSFrederick Mayle               bool enable_multiple = false,
61*9712c20fSFrederick Mayle               const std::string& module_name = "",
62*9712c20fSFrederick Mayle               bool prefer_extern_name = false)
symbol_data_(symbol_data)63*9712c20fSFrederick Mayle       : symbol_data_(symbol_data),
64*9712c20fSFrederick Mayle         handle_inter_cu_refs_(handle_inter_cu_refs),
65*9712c20fSFrederick Mayle         object_filename_(),
66*9712c20fSFrederick Mayle         contents_(),
67*9712c20fSFrederick Mayle         size_(0),
68*9712c20fSFrederick Mayle         from_disk_(false),
69*9712c20fSFrederick Mayle         object_files_(),
70*9712c20fSFrederick Mayle         selected_object_file_(),
71*9712c20fSFrederick Mayle         selected_object_name_(),
72*9712c20fSFrederick Mayle         enable_multiple_(enable_multiple),
73*9712c20fSFrederick Mayle         module_name_(module_name),
74*9712c20fSFrederick Mayle         prefer_extern_name_(prefer_extern_name) {}
75*9712c20fSFrederick Mayle   ~DumpSymbols() = default;
76*9712c20fSFrederick Mayle 
77*9712c20fSFrederick Mayle   // Prepare to read debugging information from |filename|. |filename| may be
78*9712c20fSFrederick Mayle   // the name of a fat file, a Mach-O file, or a dSYM bundle containing either
79*9712c20fSFrederick Mayle   // of the above.
80*9712c20fSFrederick Mayle   //
81*9712c20fSFrederick Mayle   // If |module_name_| is empty, uses the basename of |filename| as the module
82*9712c20fSFrederick Mayle   // name. Otherwise, uses |module_name_| as the module name.
83*9712c20fSFrederick Mayle   //
84*9712c20fSFrederick Mayle   // On success, return true; if there is a problem reading
85*9712c20fSFrederick Mayle   // |filename|, report it and return false.
86*9712c20fSFrederick Mayle   bool Read(const std::string& filename);
87*9712c20fSFrederick Mayle 
88*9712c20fSFrederick Mayle   // Prepare to read debugging information from |contents|. |contents| is
89*9712c20fSFrederick Mayle   // expected to be the data obtained from reading a fat file, or a Mach-O file.
90*9712c20fSFrederick Mayle   // |filename| is used to determine the object filename in the generated
91*9712c20fSFrederick Mayle   // output; there will not be an attempt to open this file as the data
92*9712c20fSFrederick Mayle   // is already expected to be in memory. On success, return true; if there is a
93*9712c20fSFrederick Mayle   // problem reading |contents|, report it and return false.
94*9712c20fSFrederick Mayle   bool ReadData(uint8_t* contents, size_t size, const std::string& filename);
95*9712c20fSFrederick Mayle 
96*9712c20fSFrederick Mayle   // If this dumper's file includes an object file for `info`, then select that
97*9712c20fSFrederick Mayle   // object file for dumping, and return true. Otherwise, return false, and
98*9712c20fSFrederick Mayle   // leave this dumper's selected architecture unchanged.
99*9712c20fSFrederick Mayle   //
100*9712c20fSFrederick Mayle   // By default, if this dumper's file contains only one object file, then
101*9712c20fSFrederick Mayle   // the dumper will dump those symbols; and if it contains more than one
102*9712c20fSFrederick Mayle   // object file, then the dumper will dump the object file whose
103*9712c20fSFrederick Mayle   // architecture matches that of this dumper program.
104*9712c20fSFrederick Mayle   bool SetArchitecture(const ArchInfo& info);
105*9712c20fSFrederick Mayle 
106*9712c20fSFrederick Mayle   // Return a pointer to an array of SuperFatArch structures describing the
107*9712c20fSFrederick Mayle   // object files contained in this dumper's file. Set *|count| to the number
108*9712c20fSFrederick Mayle   // of elements in the array. The returned array is owned by this DumpSymbols
109*9712c20fSFrederick Mayle   // instance.
110*9712c20fSFrederick Mayle   //
111*9712c20fSFrederick Mayle   // If there are no available architectures, this function
112*9712c20fSFrederick Mayle   // may return NULL.
AvailableArchitectures(size_t * count)113*9712c20fSFrederick Mayle   const SuperFatArch* AvailableArchitectures(size_t* count) {
114*9712c20fSFrederick Mayle     *count = object_files_.size();
115*9712c20fSFrederick Mayle     if (object_files_.size() > 0)
116*9712c20fSFrederick Mayle       return &object_files_[0];
117*9712c20fSFrederick Mayle     return NULL;
118*9712c20fSFrederick Mayle   }
119*9712c20fSFrederick Mayle 
120*9712c20fSFrederick Mayle   // Read the selected object file's debugging information, and write out the
121*9712c20fSFrederick Mayle   // header only to |stream|. Return true on success; if an error occurs, report
122*9712c20fSFrederick Mayle   // it and return false.
123*9712c20fSFrederick Mayle   bool WriteSymbolFileHeader(std::ostream& stream);
124*9712c20fSFrederick Mayle 
125*9712c20fSFrederick Mayle   // Read the selected object file's debugging information and store it in
126*9712c20fSFrederick Mayle   // `module`. The caller owns the resulting module object and must delete
127*9712c20fSFrederick Mayle   // it when finished.
128*9712c20fSFrederick Mayle   bool ReadSymbolData(Module** module);
129*9712c20fSFrederick Mayle 
130*9712c20fSFrederick Mayle   // Return an identifier string for the file this DumpSymbols is dumping.
131*9712c20fSFrederick Mayle   std::string Identifier();
132*9712c20fSFrederick Mayle 
133*9712c20fSFrederick Mayle  private:
134*9712c20fSFrederick Mayle   // Used internally.
135*9712c20fSFrederick Mayle   class DumperLineToModule;
136*9712c20fSFrederick Mayle   class DumperRangesHandler;
137*9712c20fSFrederick Mayle   class LoadCommandDumper;
138*9712c20fSFrederick Mayle 
139*9712c20fSFrederick Mayle   // This method behaves similarly to NXFindBestFatArch, but it supports
140*9712c20fSFrederick Mayle   // SuperFatArch.
141*9712c20fSFrederick Mayle   SuperFatArch* FindBestMatchForArchitecture(
142*9712c20fSFrederick Mayle       cpu_type_t cpu_type, cpu_subtype_t cpu_subtype);
143*9712c20fSFrederick Mayle 
144*9712c20fSFrederick Mayle   // Creates an empty module object.
145*9712c20fSFrederick Mayle   bool CreateEmptyModule(scoped_ptr<Module>& module);
146*9712c20fSFrederick Mayle 
147*9712c20fSFrederick Mayle   // Process the split dwarf file referenced by reader.
148*9712c20fSFrederick Mayle   void StartProcessSplitDwarf(google_breakpad::CompilationUnit* reader,
149*9712c20fSFrederick Mayle                               Module* module,
150*9712c20fSFrederick Mayle                               google_breakpad::Endianness endianness,
151*9712c20fSFrederick Mayle                               bool handle_inter_cu_refs,
152*9712c20fSFrederick Mayle                               bool handle_inline) const;
153*9712c20fSFrederick Mayle 
154*9712c20fSFrederick Mayle   // Read debugging information from |dwarf_sections|, which was taken from
155*9712c20fSFrederick Mayle   // |macho_reader|, and add it to |module|.
156*9712c20fSFrederick Mayle   void ReadDwarf(google_breakpad::Module* module,
157*9712c20fSFrederick Mayle                  const mach_o::Reader& macho_reader,
158*9712c20fSFrederick Mayle                  const mach_o::SectionMap& dwarf_sections,
159*9712c20fSFrederick Mayle                  bool handle_inter_cu_refs) const;
160*9712c20fSFrederick Mayle 
161*9712c20fSFrederick Mayle   // Read DWARF CFI or .eh_frame data from |section|, belonging to
162*9712c20fSFrederick Mayle   // |macho_reader|, and record it in |module|.  If |eh_frame| is true,
163*9712c20fSFrederick Mayle   // then the data is .eh_frame-format data; otherwise, it is standard DWARF
164*9712c20fSFrederick Mayle   // .debug_frame data. On success, return true; on failure, report
165*9712c20fSFrederick Mayle   // the problem and return false.
166*9712c20fSFrederick Mayle   bool ReadCFI(google_breakpad::Module* module,
167*9712c20fSFrederick Mayle                const mach_o::Reader& macho_reader,
168*9712c20fSFrederick Mayle                const mach_o::Section& section,
169*9712c20fSFrederick Mayle                bool eh_frame) const;
170*9712c20fSFrederick Mayle 
171*9712c20fSFrederick Mayle   // The selection of what type of symbol data to read/write.
172*9712c20fSFrederick Mayle   const SymbolData symbol_data_;
173*9712c20fSFrederick Mayle 
174*9712c20fSFrederick Mayle   // Whether to handle references between compilation units.
175*9712c20fSFrederick Mayle   const bool handle_inter_cu_refs_;
176*9712c20fSFrederick Mayle 
177*9712c20fSFrederick Mayle   // The name of the file this DumpSymbols will actually read debugging
178*9712c20fSFrederick Mayle   // information from. If the filename passed to Read refers to a dSYM bundle,
179*9712c20fSFrederick Mayle   // then this is the resource file within that bundle.
180*9712c20fSFrederick Mayle   std::string object_filename_;
181*9712c20fSFrederick Mayle 
182*9712c20fSFrederick Mayle   // The complete contents of object_filename_, mapped into memory.
183*9712c20fSFrederick Mayle   scoped_array<uint8_t> contents_;
184*9712c20fSFrederick Mayle 
185*9712c20fSFrederick Mayle   // The size of contents_.
186*9712c20fSFrederick Mayle   size_t size_;
187*9712c20fSFrederick Mayle 
188*9712c20fSFrederick Mayle   // Indicates which entry point to DumpSymbols was used, i.e. Read vs ReadData.
189*9712c20fSFrederick Mayle   // This is used to indicate that downstream code paths can/should also read
190*9712c20fSFrederick Mayle   // from disk or not.
191*9712c20fSFrederick Mayle   bool from_disk_;
192*9712c20fSFrederick Mayle 
193*9712c20fSFrederick Mayle   // A vector of SuperFatArch structures describing the object files
194*9712c20fSFrederick Mayle   // object_filename_ contains. If object_filename_ refers to a fat binary,
195*9712c20fSFrederick Mayle   // this may have more than one element; if it refers to a Mach-O file, this
196*9712c20fSFrederick Mayle   // has exactly one element.
197*9712c20fSFrederick Mayle   vector<SuperFatArch> object_files_;
198*9712c20fSFrederick Mayle 
199*9712c20fSFrederick Mayle   // The object file in object_files_ selected to dump, or NULL if
200*9712c20fSFrederick Mayle   // SetArchitecture hasn't been called yet.
201*9712c20fSFrederick Mayle   const SuperFatArch* selected_object_file_;
202*9712c20fSFrederick Mayle 
203*9712c20fSFrederick Mayle   // A string that identifies the selected object file, for use in error
204*9712c20fSFrederick Mayle   // messages.  This is usually object_filename_, but if that refers to a
205*9712c20fSFrederick Mayle   // fat binary, it includes an indication of the particular architecture
206*9712c20fSFrederick Mayle   // within that binary.
207*9712c20fSFrederick Mayle   string selected_object_name_;
208*9712c20fSFrederick Mayle 
209*9712c20fSFrederick Mayle   // Whether symbols sharing an address should be collapsed into a single entry
210*9712c20fSFrederick Mayle   // and marked with an `m` in the output.
211*9712c20fSFrederick Mayle   // See: https://crbug.com/google-breakpad/751 and docs at
212*9712c20fSFrederick Mayle   // docs/symbol_files.md#records-3
213*9712c20fSFrederick Mayle   bool enable_multiple_;
214*9712c20fSFrederick Mayle 
215*9712c20fSFrederick Mayle   // If non-empty, used as the module name. Otherwise, the basename of
216*9712c20fSFrederick Mayle   // |object_filename_| is used as the module name.
217*9712c20fSFrederick Mayle   const std::string module_name_;
218*9712c20fSFrederick Mayle 
219*9712c20fSFrederick Mayle   // If a Function and an Extern share the same address but have a different
220*9712c20fSFrederick Mayle   // name, prefer the name of the Extern.
221*9712c20fSFrederick Mayle   //
222*9712c20fSFrederick Mayle   // Use this when dumping Mach-O .dSYMs built with -gmlt (Minimum Line Tables),
223*9712c20fSFrederick Mayle   // as the Function's fully-qualified name will only be present in the STABS
224*9712c20fSFrederick Mayle   // (which are placed in the Extern), not in the DWARF symbols (which are
225*9712c20fSFrederick Mayle   // placed in the Function).
226*9712c20fSFrederick Mayle   bool prefer_extern_name_;
227*9712c20fSFrederick Mayle };
228*9712c20fSFrederick Mayle 
229*9712c20fSFrederick Mayle }  // namespace google_breakpad
230