xref: /aosp_15_r20/external/google-breakpad/src/common/stabs_reader.cc (revision 9712c20fc9bbfbac4935993a2ca0b3958c5adad2)
1 // Copyright 2010 Google LLC
2 //
3 // Redistribution and use in source and binary forms, with or without
4 // modification, are permitted provided that the following conditions are
5 // met:
6 //
7 //     * Redistributions of source code must retain the above copyright
8 // notice, this list of conditions and the following disclaimer.
9 //     * Redistributions in binary form must reproduce the above
10 // copyright notice, this list of conditions and the following disclaimer
11 // in the documentation and/or other materials provided with the
12 // distribution.
13 //     * Neither the name of Google LLC nor the names of its
14 // contributors may be used to endorse or promote products derived from
15 // this software without specific prior written permission.
16 //
17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 
29 // Original author: Jim Blandy <[email protected]> <[email protected]>
30 
31 // This file implements the google_breakpad::StabsReader class.
32 // See stabs_reader.h.
33 
34 #ifdef HAVE_CONFIG_H
35 #include <config.h>  // Must come first
36 #endif
37 
38 #include "common/stabs_reader.h"
39 
40 #include <assert.h>
41 #include <stab.h>
42 #include <string.h>
43 
44 #include <string>
45 
46 #include "common/using_std_string.h"
47 
48 using std::vector;
49 
50 namespace google_breakpad {
51 
EntryIterator(const ByteBuffer * buffer,bool big_endian,size_t value_size)52 StabsReader::EntryIterator::EntryIterator(const ByteBuffer* buffer,
53                                           bool big_endian, size_t value_size)
54     : value_size_(value_size), cursor_(buffer, big_endian) {
55   // Actually, we could handle weird sizes just fine, but they're
56   // probably mistakes --- expressed in bits, say.
57   assert(value_size == 4 || value_size == 8);
58   entry_.index = 0;
59   Fetch();
60 }
61 
Fetch()62 void StabsReader::EntryIterator::Fetch() {
63   cursor_
64       .Read(4, false, &entry_.name_offset)
65       .Read(1, false, &entry_.type)
66       .Read(1, false, &entry_.other)
67       .Read(2, false, &entry_.descriptor)
68       .Read(value_size_, false, &entry_.value);
69   entry_.at_end = !cursor_;
70 }
71 
StabsReader(const uint8_t * stab,size_t stab_size,const uint8_t * stabstr,size_t stabstr_size,bool big_endian,size_t value_size,bool unitized,StabsHandler * handler)72 StabsReader::StabsReader(const uint8_t* stab,    size_t stab_size,
73                          const uint8_t* stabstr, size_t stabstr_size,
74                          bool big_endian, size_t value_size, bool unitized,
75                          StabsHandler* handler)
76     : entries_(stab, stab_size),
77       strings_(stabstr, stabstr_size),
78       iterator_(&entries_, big_endian, value_size),
79       unitized_(unitized),
80       handler_(handler),
81       string_offset_(0),
82       next_cu_string_offset_(0),
83       current_source_file_(NULL) { }
84 
SymbolString()85 const char* StabsReader::SymbolString() {
86   ptrdiff_t offset = string_offset_ + iterator_->name_offset;
87   if (offset < 0 || (size_t) offset >= strings_.Size()) {
88     handler_->Warning("symbol %d: name offset outside the string section\n",
89                       iterator_->index);
90     // Return our null string, to keep our promise about all names being
91     // taken from the string section.
92     offset = 0;
93   }
94   return reinterpret_cast<const char*>(strings_.start + offset);
95 }
96 
Process()97 bool StabsReader::Process() {
98   while (!iterator_->at_end) {
99     if (iterator_->type == N_SO) {
100       if (! ProcessCompilationUnit())
101         return false;
102     } else if (iterator_->type == N_UNDF && unitized_) {
103       // In unitized STABS (including Linux STABS, and pretty much anything
104       // else that puts STABS data in sections), at the head of each
105       // compilation unit's entries there is an N_UNDF stab giving the
106       // number of symbols in the compilation unit, and the number of bytes
107       // that compilation unit's strings take up in the .stabstr section.
108       // Each CU's strings are separate; the n_strx values are offsets
109       // within the current CU's portion of the .stabstr section.
110       //
111       // As an optimization, the GNU linker combines all the
112       // compilation units into one, with a single N_UNDF at the
113       // beginning. However, other linkers, like Gold, do not perform
114       // this optimization.
115       string_offset_ = next_cu_string_offset_;
116       next_cu_string_offset_ = iterator_->value;
117       ++iterator_;
118     }
119 #if defined(HAVE_MACH_O_NLIST_H)
120     // Export symbols in Mach-O binaries look like this.
121     // This is necessary in order to be able to dump symbols
122     // from OS X system libraries.
123     else if ((iterator_->type & N_STAB) == 0 &&
124                (iterator_->type & N_TYPE) == N_SECT) {
125       ProcessExtern();
126     }
127 #endif
128     else {
129       ++iterator_;
130     }
131   }
132   return true;
133 }
134 
ProcessCompilationUnit()135 bool StabsReader::ProcessCompilationUnit() {
136   assert(!iterator_->at_end && iterator_->type == N_SO);
137 
138   // There may be an N_SO entry whose name ends with a slash,
139   // indicating the directory in which the compilation occurred.
140   // The build directory defaults to NULL.
141   const char* build_directory = NULL;
142   {
143     const char* name = SymbolString();
144     if (name[0] && name[strlen(name) - 1] == '/') {
145       build_directory = name;
146       ++iterator_;
147     }
148   }
149 
150   // We expect to see an N_SO entry with a filename next, indicating
151   // the start of the compilation unit.
152   {
153     if (iterator_->at_end || iterator_->type != N_SO)
154       return true;
155     const char* name = SymbolString();
156     if (name[0] == '\0') {
157       // This seems to be a stray end-of-compilation-unit marker;
158       // consume it, but don't report the end, since we didn't see a
159       // beginning.
160       ++iterator_;
161       return true;
162     }
163     current_source_file_ = name;
164   }
165 
166   if (! handler_->StartCompilationUnit(current_source_file_,
167                                        iterator_->value,
168                                        build_directory))
169     return false;
170 
171   ++iterator_;
172 
173   // The STABS documentation says that some compilers may emit
174   // additional N_SO entries with names immediately following the
175   // first, and that they should be ignored.  However, the original
176   // Breakpad STABS reader doesn't ignore them, so we won't either.
177 
178   // Process the body of the compilation unit, up to the next N_SO.
179   while (!iterator_->at_end && iterator_->type != N_SO) {
180     if (iterator_->type == N_FUN) {
181       if (! ProcessFunction())
182         return false;
183     } else if (iterator_->type == N_SLINE) {
184       // Mac OS X STABS place SLINE records before functions.
185       Line line;
186       // The value of an N_SLINE entry that appears outside a function is
187       // the absolute address of the line.
188       line.address = iterator_->value;
189       line.filename = current_source_file_;
190       // The n_desc of a N_SLINE entry is the line number.  It's a
191       // signed 16-bit field; line numbers from 32768 to 65535 are
192       // stored as n-65536.
193       line.number = (uint16_t) iterator_->descriptor;
194       queued_lines_.push_back(line);
195       ++iterator_;
196     } else if (iterator_->type == N_SOL) {
197       current_source_file_ = SymbolString();
198       ++iterator_;
199     } else {
200       // Ignore anything else.
201       ++iterator_;
202     }
203   }
204 
205   // An N_SO with an empty name indicates the end of the compilation
206   // unit.  Default to zero.
207   uint64_t ending_address = 0;
208   if (!iterator_->at_end) {
209     assert(iterator_->type == N_SO);
210     const char* name = SymbolString();
211     if (name[0] == '\0') {
212       ending_address = iterator_->value;
213       ++iterator_;
214     }
215   }
216 
217   if (! handler_->EndCompilationUnit(ending_address))
218     return false;
219 
220   queued_lines_.clear();
221 
222   return true;
223 }
224 
ProcessFunction()225 bool StabsReader::ProcessFunction() {
226   assert(!iterator_->at_end && iterator_->type == N_FUN);
227 
228   uint64_t function_address = iterator_->value;
229   // The STABS string for an N_FUN entry is the name of the function,
230   // followed by a colon, followed by type information for the
231   // function.  We want to pass the name alone to StartFunction.
232   const char* stab_string = SymbolString();
233   const char* name_end = strchr(stab_string, ':');
234   if (! name_end)
235     name_end = stab_string + strlen(stab_string);
236   string name(stab_string, name_end - stab_string);
237   if (! handler_->StartFunction(name, function_address))
238     return false;
239   ++iterator_;
240 
241   // If there were any SLINE records given before the function, report them now.
242   for (vector<Line>::const_iterator it = queued_lines_.begin();
243        it != queued_lines_.end(); it++) {
244     if (!handler_->Line(it->address, it->filename, it->number))
245       return false;
246   }
247   queued_lines_.clear();
248 
249   while (!iterator_->at_end) {
250     if (iterator_->type == N_SO || iterator_->type == N_FUN)
251       break;
252     else if (iterator_->type == N_SLINE) {
253       // The value of an N_SLINE entry is the offset of the line from
254       // the function's start address.
255       uint64_t line_address = function_address + iterator_->value;
256       // The n_desc of a N_SLINE entry is the line number.  It's a
257       // signed 16-bit field; line numbers from 32768 to 65535 are
258       // stored as n-65536.
259       uint16_t line_number = iterator_->descriptor;
260       if (! handler_->Line(line_address, current_source_file_, line_number))
261         return false;
262       ++iterator_;
263     } else if (iterator_->type == N_SOL) {
264       current_source_file_ = SymbolString();
265       ++iterator_;
266     } else
267       // Ignore anything else.
268       ++iterator_;
269   }
270 
271   // We've reached the end of the function. See if we can figure out its
272   // ending address.
273   uint64_t ending_address = 0;
274   if (!iterator_->at_end) {
275     assert(iterator_->type == N_SO || iterator_->type == N_FUN);
276     if (iterator_->type == N_FUN) {
277       const char* symbol_name = SymbolString();
278       if (symbol_name[0] == '\0') {
279         // An N_FUN entry with no name is a terminator for this function;
280         // its value is the function's size.
281         ending_address = function_address + iterator_->value;
282         ++iterator_;
283       } else {
284         // An N_FUN entry with a name is the next function, and we can take
285         // its value as our ending address. Don't advance the iterator, as
286         // we'll use this symbol to start the next function as well.
287         ending_address = iterator_->value;
288       }
289     } else {
290       // An N_SO entry could be an end-of-compilation-unit marker, or the
291       // start of the next compilation unit, but in either case, its value
292       // is our ending address. We don't advance the iterator;
293       // ProcessCompilationUnit will decide what to do with this symbol.
294       ending_address = iterator_->value;
295     }
296   }
297 
298   if (! handler_->EndFunction(ending_address))
299     return false;
300 
301   return true;
302 }
303 
ProcessExtern()304 bool StabsReader::ProcessExtern() {
305 #if defined(HAVE_MACH_O_NLIST_H)
306   assert(!iterator_->at_end &&
307          (iterator_->type & N_STAB) == 0 &&
308          (iterator_->type & N_TYPE) == N_SECT);
309 #endif
310 
311   // TODO(mark): only do symbols in the text section?
312   if (!handler_->Extern(SymbolString(), iterator_->value))
313     return false;
314 
315   ++iterator_;
316   return true;
317 }
318 
319 } // namespace google_breakpad
320