xref: /aosp_15_r20/external/google-breakpad/src/common/stabs_reader.h (revision 9712c20fc9bbfbac4935993a2ca0b3958c5adad2)
1 // -*- mode: c++ -*-
2 
3 // Copyright 2010 Google LLC
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google LLC nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 // Original author: Jim Blandy <[email protected]> <[email protected]>
32 
33 // stabs_reader.h: Define StabsReader, a parser for STABS debugging
34 // information. A description of the STABS debugging format can be
35 // found at:
36 //
37 //    http://sourceware.org/gdb/current/onlinedocs/stabs_toc.html
38 //
39 // The comments here assume you understand the format.
40 //
41 // This parser can handle big-endian and little-endian data, and the symbol
42 // values may be either 32 or 64 bits long. It handles both STABS in
43 // sections (as used on Linux) and STABS appearing directly in an
44 // a.out-like symbol table (as used in Darwin OS X Mach-O files).
45 
46 #ifndef COMMON_STABS_READER_H__
47 #define COMMON_STABS_READER_H__
48 
49 #include <stddef.h>
50 #include <stdint.h>
51 
52 #ifdef HAVE_MACH_O_NLIST_H
53 #include <mach-o/nlist.h>
54 #elif defined(HAVE_A_OUT_H)
55 #include <a.out.h>
56 #endif
57 
58 #include <string>
59 #include <vector>
60 
61 #include "common/byte_cursor.h"
62 #include "common/using_std_string.h"
63 
64 namespace google_breakpad {
65 
66 class StabsHandler;
67 
68 class StabsReader {
69  public:
70   // Create a reader for the STABS debug information whose .stab section is
71   // being traversed by ITERATOR, and whose .stabstr section is referred to
72   // by STRINGS. The reader will call the member functions of HANDLER to
73   // report the information it finds, when the reader's 'Process' member
74   // function is called.
75   //
76   // BIG_ENDIAN should be true if the entries in the .stab section are in
77   // big-endian form, or false if they are in little-endian form.
78   //
79   // VALUE_SIZE should be either 4 or 8, indicating the size of the 'value'
80   // field in each entry in bytes.
81   //
82   // UNITIZED should be true if the STABS data is stored in units with
83   // N_UNDF headers. This is usually the case for STABS stored in sections,
84   // like .stab/.stabstr, and usually not the case for STABS stored in the
85   // actual symbol table; UNITIZED should be true when parsing Linux stabs,
86   // false when parsing Mac OS X STABS. For details, see:
87   // http://sourceware.org/gdb/current/onlinedocs/stabs/Stab-Section-Basics.html
88   //
89   // Note that, in ELF, the .stabstr section should be found using the
90   // 'sh_link' field of the .stab section header, not by name.
91   StabsReader(const uint8_t* stab,    size_t stab_size,
92               const uint8_t* stabstr, size_t stabstr_size,
93               bool big_endian, size_t value_size, bool unitized,
94               StabsHandler* handler);
95 
96   // Process the STABS data, calling the handler's member functions to
97   // report what we find.  While the handler functions return true,
98   // continue to process until we reach the end of the section.  If we
99   // processed the entire section and all handlers returned true,
100   // return true.  If any handler returned false, return false.
101   //
102   // This is only meant to be called once per StabsReader instance;
103   // resuming a prior processing pass that stopped abruptly isn't supported.
104   bool Process();
105 
106  private:
107 
108   // An class for walking arrays of STABS entries. This isolates the main
109   // STABS reader from the exact format (size; endianness) of the entries
110   // themselves.
111   class EntryIterator {
112    public:
113     // The contents of a STABS entry, adjusted for the host's endianness,
114     // word size, 'struct nlist' layout, and so on.
115     struct Entry {
116       // True if this iterator has reached the end of the entry array. When
117       // this is set, the other members of this structure are not valid.
118       bool at_end;
119 
120       // The number of this entry within the list.
121       size_t index;
122 
123       // The current entry's name offset. This is the offset within the
124       // current compilation unit's strings, as establish by the N_UNDF entries.
125       size_t name_offset;
126 
127       // The current entry's type, 'other' field, descriptor, and value.
128       unsigned char type;
129       unsigned char other;
130       short descriptor;
131       uint64_t value;
132     };
133 
134     // Create a EntryIterator walking the entries in BUFFER. Treat the
135     // entries as big-endian if BIG_ENDIAN is true, as little-endian
136     // otherwise. Assume each entry has a 'value' field whose size is
137     // VALUE_SIZE.
138     //
139     // This would not be terribly clean to extend to other format variations,
140     // but it's enough to handle Linux and Mac, and we'd like STABS to die
141     // anyway.
142     //
143     // For the record: on Linux, STABS entry values are always 32 bits,
144     // regardless of the architecture address size (don't ask me why); on
145     // Mac, they are 32 or 64 bits long. Oddly, the section header's entry
146     // size for a Linux ELF .stab section varies according to the ELF class
147     // from 12 to 20 even as the actual entries remain unchanged.
148     EntryIterator(const ByteBuffer* buffer, bool big_endian, size_t value_size);
149 
150     // Move to the next entry. This function's behavior is undefined if
151     // at_end() is true when it is called.
152     EntryIterator& operator++() { Fetch(); entry_.index++; return *this; }
153 
154     // Dereferencing this iterator produces a reference to an Entry structure
155     // that holds the current entry's values. The entry is owned by this
156     // EntryIterator, and will be invalidated at the next call to operator++.
157     const Entry& operator*() const { return entry_; }
158     const Entry* operator->() const { return &entry_; }
159 
160    private:
161     // Read the STABS entry at cursor_, and set entry_ appropriately.
162     void Fetch();
163 
164     // The size of entries' value field, in bytes.
165     size_t value_size_;
166 
167     // A byte cursor traversing buffer_.
168     ByteCursor cursor_;
169 
170     // Values for the entry this iterator refers to.
171     Entry entry_;
172   };
173 
174   // A source line, saved to be reported later.
175   struct Line {
176     uint64_t address;
177     const char* filename;
178     int number;
179   };
180 
181   // Return the name of the current symbol.
182   const char* SymbolString();
183 
184   // Process a compilation unit starting at symbol_.  Return true
185   // to continue processing, or false to abort.
186   bool ProcessCompilationUnit();
187 
188   // Process a function in current_source_file_ starting at symbol_.
189   // Return true to continue processing, or false to abort.
190   bool ProcessFunction();
191 
192   // Process an exported function symbol.
193   // Return true to continue processing, or false to abort.
194   bool ProcessExtern();
195 
196   // The STABS entries being parsed.
197   ByteBuffer entries_;
198 
199   // The string section to which the entries refer.
200   ByteBuffer strings_;
201 
202   // The iterator walking the STABS entries.
203   EntryIterator iterator_;
204 
205   // True if the data is "unitized"; see the explanation in the comment for
206   // StabsReader::StabsReader.
207   bool unitized_;
208 
209   StabsHandler* handler_;
210 
211   // The offset of the current compilation unit's strings within stabstr_.
212   size_t string_offset_;
213 
214   // The value string_offset_ should have for the next compilation unit,
215   // as established by N_UNDF entries.
216   size_t next_cu_string_offset_;
217 
218   // The current source file name.
219   const char* current_source_file_;
220 
221   // Mac OS X STABS place SLINE records before functions; we accumulate a
222   // vector of these until we see the FUN record, and then report them
223   // after the StartFunction call.
224   std::vector<Line> queued_lines_;
225 };
226 
227 // Consumer-provided callback structure for the STABS reader.  Clients
228 // of the STABS reader provide an instance of this structure.  The
229 // reader then invokes the member functions of that instance to report
230 // the information it finds.
231 //
232 // The default definitions of the member functions do nothing, and return
233 // true so processing will continue.
234 class StabsHandler {
235  public:
StabsHandler()236   StabsHandler() { }
~StabsHandler()237   virtual ~StabsHandler() { }
238 
239   // Some general notes about the handler callback functions:
240 
241   // Processing proceeds until the end of the .stabs section, or until
242   // one of these functions returns false.
243 
244   // The addresses given are as reported in the STABS info, without
245   // regard for whether the module may be loaded at different
246   // addresses at different times (a shared library, say).  When
247   // processing STABS from an ELF shared library, the addresses given
248   // all assume the library is loaded at its nominal load address.
249   // They are *not* offsets from the nominal load address.  If you
250   // want offsets, you must subtract off the library's nominal load
251   // address.
252 
253   // The arguments to these functions named FILENAME are all
254   // references to strings stored in the .stabstr section.  Because
255   // both the Linux and Solaris linkers factor out duplicate strings
256   // from the .stabstr section, the consumer can assume that if two
257   // FILENAME values are different addresses, they represent different
258   // file names.
259   //
260   // Thus, it's safe to use (say) std::map<char*, ...>, which does
261   // string address comparisons, not string content comparisons.
262   // Since all the strings are in same array of characters --- the
263   // .stabstr section --- comparing their addresses produces
264   // predictable, if not lexicographically meaningful, results.
265 
266   // Begin processing a compilation unit whose main source file is
267   // named FILENAME, and whose base address is ADDRESS.  If
268   // BUILD_DIRECTORY is non-NULL, it is the name of the build
269   // directory in which the compilation occurred.
StartCompilationUnit(const char * filename,uint64_t address,const char * build_directory)270   virtual bool StartCompilationUnit(const char* filename, uint64_t address,
271                                     const char* build_directory) {
272     return true;
273   }
274 
275   // Finish processing the compilation unit.  If ADDRESS is non-zero,
276   // it is the ending address of the compilation unit.  If ADDRESS is
277   // zero, then the compilation unit's ending address is not
278   // available, and the consumer must infer it by other means.
EndCompilationUnit(uint64_t address)279   virtual bool EndCompilationUnit(uint64_t address) { return true; }
280 
281   // Begin processing a function named NAME, whose starting address is
282   // ADDRESS.  This function belongs to the compilation unit that was
283   // most recently started but not ended.
284   //
285   // Note that, unlike filenames, NAME is not a pointer into the
286   // .stabstr section; this is because the name as it appears in the
287   // STABS data is followed by type information.  The value passed to
288   // StartFunction is the function name alone.
289   //
290   // In languages that use name mangling, like C++, NAME is mangled.
StartFunction(const string & name,uint64_t address)291   virtual bool StartFunction(const string& name, uint64_t address) {
292     return true;
293   }
294 
295   // Finish processing the function.  If ADDRESS is non-zero, it is
296   // the ending address for the function.  If ADDRESS is zero, then
297   // the function's ending address is not available, and the consumer
298   // must infer it by other means.
EndFunction(uint64_t address)299   virtual bool EndFunction(uint64_t address) { return true; }
300 
301   // Report that the code at ADDRESS is attributable to line NUMBER of
302   // the source file named FILENAME.  The caller must infer the ending
303   // address of the line.
Line(uint64_t address,const char * filename,int number)304   virtual bool Line(uint64_t address, const char* filename, int number) {
305     return true;
306   }
307 
308   // Report that an exported function NAME is present at ADDRESS.
309   // The size of the function is unknown.
Extern(const string & name,uint64_t address)310   virtual bool Extern(const string& name, uint64_t address) {
311     return true;
312   }
313 
314   // Report a warning.  FORMAT is a printf-like format string,
315   // specifying how to format the subsequent arguments.
316   virtual void Warning(const char* format, ...) = 0;
317 };
318 
319 } // namespace google_breakpad
320 
321 #endif  // COMMON_STABS_READER_H__
322