xref: /aosp_15_r20/external/google-breakpad/src/common/module.h (revision 9712c20fc9bbfbac4935993a2ca0b3958c5adad2)
1 // -*- mode: c++ -*-
2 
3 // Copyright 2010 Google LLC
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google LLC nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 // Original author: Jim Blandy <[email protected]> <[email protected]>
32 
33 // module.h: Define google_breakpad::Module. A Module holds debugging
34 // information, and can write that information out as a Breakpad
35 // symbol file.
36 
37 #ifndef COMMON_LINUX_MODULE_H__
38 #define COMMON_LINUX_MODULE_H__
39 
40 #include <functional>
41 #include <iostream>
42 #include <limits>
43 #include <map>
44 #include <memory>
45 #include <set>
46 #include <string>
47 #include <vector>
48 
49 #include "common/string_view.h"
50 #include "common/symbol_data.h"
51 #include "common/unordered.h"
52 #include "common/using_std_string.h"
53 #include "google_breakpad/common/breakpad_types.h"
54 
55 namespace google_breakpad {
56 
57 using std::set;
58 using std::vector;
59 using std::map;
60 
61 // A Module represents the contents of a module, and supports methods
62 // for adding information produced by parsing STABS or DWARF data
63 // --- possibly both from the same file --- and then writing out the
64 // unified contents as a Breakpad-format symbol file.
65 class Module {
66  public:
67   // The type of addresses and sizes in a symbol table.
68   typedef uint64_t Address;
69   static constexpr uint64_t kMaxAddress = std::numeric_limits<Address>::max();
70   struct File;
71   struct Function;
72   struct InlineOrigin;
73   struct Inline;
74   struct Line;
75   struct Extern;
76 
77   // Addresses appearing in File, Function, and Line structures are
78   // absolute, not relative to the the module's load address.  That
79   // is, if the module were loaded at its nominal load address, the
80   // addresses would be correct.
81 
82   // A source file.
83   struct File {
FileFile84     explicit File(const string& name_input) : name(name_input), source_id(0) {}
85 
86     // The name of the source file.
87     const string name;
88 
89     // The file's source id.  The Write member function clears this
90     // field and assigns source ids a fresh, so any value placed here
91     // before calling Write will be lost.
92     int source_id;
93   };
94 
95   // An address range.
96   struct Range {
RangeRange97     Range(const Address address_input, const Address size_input) :
98         address(address_input), size(size_input) { }
99 
100     Address address;
101     Address size;
102   };
103 
104   // A function.
105   struct Function {
FunctionFunction106     Function(StringView name_input, const Address& address_input) :
107         name(name_input), address(address_input), parameter_size(0) {}
108 
109     // For sorting by address.  (Not style-guide compliant, but it's
110     // stupid not to put this in the struct.)
CompareByAddressFunction111     static bool CompareByAddress(const Function* x, const Function* y) {
112       return x->address < y->address;
113     }
114 
115     // The function's name.
116     StringView name;
117 
118     // The start address and the address ranges covered by the function.
119     const Address address;
120     vector<Range> ranges;
121 
122     // The function's parameter size.
123     Address parameter_size;
124 
125     // Source lines belonging to this function, sorted by increasing
126     // address.
127     vector<Line> lines;
128 
129     // Inlined call sites belonging to this functions.
130     vector<std::unique_ptr<Inline>> inlines;
131 
132     // If this symbol has been folded with other symbols in the linked binary.
133     bool is_multiple = false;
134 
135     // If the function's name should be filled out from a matching Extern,
136     // should they not match.
137     bool prefer_extern_name = false;
138   };
139 
140   struct InlineOrigin {
InlineOriginInlineOrigin141     explicit InlineOrigin(StringView name) : id(-1), name(name) {}
142 
143     // A unique id for each InlineOrigin object. INLINE records use the id to
144     // refer to its INLINE_ORIGIN record.
145     int id;
146 
147     // The inlined function's name.
148     StringView name;
149   };
150 
151   // A inlined call site.
152   struct Inline {
InlineInline153     Inline(InlineOrigin* origin,
154            const vector<Range>& ranges,
155            int call_site_line,
156            int call_site_file_id,
157            int inline_nest_level,
158            vector<std::unique_ptr<Inline>> child_inlines)
159         : origin(origin),
160           ranges(ranges),
161           call_site_line(call_site_line),
162           call_site_file_id(call_site_file_id),
163           call_site_file(nullptr),
164           inline_nest_level(inline_nest_level),
165           child_inlines(std::move(child_inlines)) {}
166 
167     InlineOrigin* origin;
168 
169     // The list of addresses and sizes.
170     vector<Range> ranges;
171 
172     int call_site_line;
173 
174     // The id is only meanful inside a CU. It's only used for looking up real
175     // File* after scanning a CU.
176     int call_site_file_id;
177 
178     File* call_site_file;
179 
180     int inline_nest_level;
181 
182     // A list of inlines which are children of this inline.
183     vector<std::unique_ptr<Inline>> child_inlines;
184 
getCallSiteFileIDInline185     int getCallSiteFileID() const {
186       return call_site_file ? call_site_file->source_id : -1;
187     }
188 
InlineDFSInline189     static void InlineDFS(
190         vector<std::unique_ptr<Module::Inline>>& inlines,
191         std::function<void(std::unique_ptr<Module::Inline>&)> const& forEach) {
192       for (std::unique_ptr<Module::Inline>& in : inlines) {
193         forEach(in);
194         InlineDFS(in->child_inlines, forEach);
195       }
196     }
197   };
198 
199   typedef map<uint64_t, InlineOrigin*> InlineOriginByOffset;
200 
201   class InlineOriginMap {
202    public:
203     // Add INLINE ORIGIN to the module. Return a pointer to origin .
204     InlineOrigin* GetOrCreateInlineOrigin(uint64_t offset, StringView name);
205 
206     // offset is the offset of a DW_TAG_subprogram. specification_offset is the
207     // value of its DW_AT_specification or equals to offset if
208     // DW_AT_specification doesn't exist in that DIE.
209     void SetReference(uint64_t offset, uint64_t specification_offset);
210 
~InlineOriginMap()211     ~InlineOriginMap() {
212       for (const auto& iter : inline_origins_) {
213         delete iter.second;
214       }
215     }
216 
217    private:
218     // A map from a DW_TAG_subprogram's offset to the DW_TAG_subprogram.
219     InlineOriginByOffset inline_origins_;
220 
221     // A map from a DW_TAG_subprogram's offset to the offset of its
222     // specification or abstract origin subprogram. The set of values in this
223     // map should always be the same set of keys in inline_origins_.
224     map<uint64_t, uint64_t> references_;
225   };
226 
227   map<std::string, InlineOriginMap> inline_origin_maps;
228 
229   // A source line.
230   struct Line {
231     // For sorting by address.  (Not style-guide compliant, but it's
232     // stupid not to put this in the struct.)
CompareByAddressLine233     static bool CompareByAddress(const Module::Line& x, const Module::Line& y) {
234       return x.address < y.address;
235     }
236 
237     Address address, size;    // The address and size of the line's code.
238     File* file;                // The source file.
239     int number;                // The source line number.
240   };
241 
242   // An exported symbol.
243   struct Extern {
ExternExtern244     explicit Extern(const Address& address_input) : address(address_input) {}
245     const Address address;
246     string name;
247     // If this symbol has been folded with other symbols in the linked binary.
248     bool is_multiple = false;
249   };
250 
251   // A map from register names to postfix expressions that recover
252   // their their values. This can represent a complete set of rules to
253   // follow at some address, or a set of changes to be applied to an
254   // extant set of rules.
255   typedef map<string, string> RuleMap;
256 
257   // A map from addresses to RuleMaps, representing changes that take
258   // effect at given addresses.
259   typedef map<Address, RuleMap> RuleChangeMap;
260 
261   // A range of 'STACK CFI' stack walking information. An instance of
262   // this structure corresponds to a 'STACK CFI INIT' record and the
263   // subsequent 'STACK CFI' records that fall within its range.
264   struct StackFrameEntry {
265     // The starting address and number of bytes of machine code this
266     // entry covers.
267     Address address, size;
268 
269     // The initial register recovery rules, in force at the starting
270     // address.
271     RuleMap initial_rules;
272 
273     // A map from addresses to rule changes. To find the rules in
274     // force at a given address, start with initial_rules, and then
275     // apply the changes given in this map for all addresses up to and
276     // including the address you're interested in.
277     RuleChangeMap rule_changes;
278   };
279 
280   struct FunctionCompare {
operatorFunctionCompare281     bool operator() (const Function* lhs, const Function* rhs) const {
282       if (lhs->address == rhs->address)
283         return lhs->name < rhs->name;
284       return lhs->address < rhs->address;
285     }
286   };
287 
288   struct InlineOriginCompare {
operatorInlineOriginCompare289     bool operator()(const InlineOrigin* lhs, const InlineOrigin* rhs) const {
290       return lhs->name < rhs->name;
291     }
292   };
293 
294   struct ExternCompare {
295     // Defining is_transparent allows
296     // std::set<std::unique_ptr<Extern>, ExternCompare>::find() to be called
297     // with an Extern* and have set use the overloads below.
298     using is_transparent = void;
operatorExternCompare299     bool operator() (const std::unique_ptr<Extern>& lhs,
300                      const std::unique_ptr<Extern>& rhs) const {
301       return lhs->address < rhs->address;
302     }
operatorExternCompare303     bool operator() (const Extern* lhs, const std::unique_ptr<Extern>& rhs) const {
304       return lhs->address < rhs->address;
305     }
operatorExternCompare306     bool operator() (const std::unique_ptr<Extern>& lhs, const Extern* rhs) const {
307       return lhs->address < rhs->address;
308     }
309   };
310 
311   // Create a new module with the given name, operating system,
312   // architecture, and ID string.
313   // NB: `enable_multiple_field` is temporary while transitioning to enabling
314   // writing the multiple field permanently.
315   Module(const string& name,
316          const string& os,
317          const string& architecture,
318          const string& id,
319          const string& code_id = "",
320          bool enable_multiple_field = false,
321          bool prefer_extern_name = false);
322   ~Module();
323 
324   // Set the module's load address to LOAD_ADDRESS; addresses given
325   // for functions and lines will be written to the Breakpad symbol
326   // file as offsets from this address.  Construction initializes this
327   // module's load address to zero: addresses written to the symbol
328   // file will be the same as they appear in the Function, Line, and
329   // StackFrameEntry structures.
330   //
331   // Note that this member function has no effect on addresses stored
332   // in the data added to this module; the Write member function
333   // simply subtracts off the load address from addresses before it
334   // prints them. Only the last load address given before calling
335   // Write is used.
336   void SetLoadAddress(Address load_address);
337 
338   // Sets address filtering on elements added to the module.  This allows
339   // libraries with extraneous debug symbols to generate symbol files containing
340   // only relevant symbols.  For example, an LLD-generated partition library may
341   // contain debug information pertaining to all partitions derived from a
342   // single "combined" library.  Filtering applies only to elements added after
343   // this method is called.
344   void SetAddressRanges(const vector<Range>& ranges);
345 
346   // Add FUNCTION to the module. FUNCTION's name must not be empty.
347   // This module owns all Function objects added with this function:
348   // destroying the module destroys them as well.
349   // Return false if the function is duplicate and needs to be freed.
350   bool AddFunction(Function* function);
351 
352   // Add STACK_FRAME_ENTRY to the module.
353   // This module owns all StackFrameEntry objects added with this
354   // function: destroying the module destroys them as well.
355   void AddStackFrameEntry(std::unique_ptr<StackFrameEntry> stack_frame_entry);
356 
357   // Add PUBLIC to the module.
358   // This module owns all Extern objects added with this function:
359   // destroying the module destroys them as well.
360   void AddExtern(std::unique_ptr<Extern> ext);
361 
362   // If this module has a file named NAME, return a pointer to it. If
363   // it has none, then create one and return a pointer to the new
364   // file. This module owns all File objects created using these
365   // functions; destroying the module destroys them as well.
366   File* FindFile(const string& name);
367   File* FindFile(const char* name);
368 
369   // If this module has a file named NAME, return a pointer to it.
370   // Otherwise, return NULL.
371   File* FindExistingFile(const string& name);
372 
373   // Insert pointers to the functions added to this module at I in
374   // VEC. The pointed-to Functions are still owned by this module.
375   // (Since this is effectively a copy of the function list, this is
376   // mostly useful for testing; other uses should probably get a more
377   // appropriate interface.)
378   void GetFunctions(vector<Function*>* vec, vector<Function*>::iterator i);
379 
380   // Insert pointers to the externs added to this module at I in
381   // VEC. The pointed-to Externs are still owned by this module.
382   // (Since this is effectively a copy of the extern list, this is
383   // mostly useful for testing; other uses should probably get a more
384   // appropriate interface.)
385   void GetExterns(vector<Extern*>* vec, vector<Extern*>::iterator i);
386 
387   // Clear VEC and fill it with pointers to the Files added to this
388   // module, sorted by name. The pointed-to Files are still owned by
389   // this module. (Since this is effectively a copy of the file list,
390   // this is mostly useful for testing; other uses should probably get
391   // a more appropriate interface.)
392   void GetFiles(vector<File*>* vec);
393 
394   // Clear VEC and fill it with pointers to the StackFrameEntry
395   // objects that have been added to this module. (Since this is
396   // effectively a copy of the stack frame entry list, this is mostly
397   // useful for testing; other uses should probably get
398   // a more appropriate interface.)
399   void GetStackFrameEntries(vector<StackFrameEntry*>* vec) const;
400 
401   // Find those files in this module that are actually referred to by
402   // functions' line number data, and assign them source id numbers.
403   // Set the source id numbers for all other files --- unused by the
404   // source line data --- to -1.  We do this before writing out the
405   // symbol file, at which point we omit any unused files.
406   void AssignSourceIds();
407 
408   // This function should be called before AssignSourceIds() to get the set of
409   // valid InlineOrigins*.
410   void CreateInlineOrigins(
411       set<InlineOrigin*, InlineOriginCompare>& inline_origins);
412 
413   // Call AssignSourceIds, and write this module to STREAM in the
414   // breakpad symbol format. Return true if all goes well, or false if
415   // an error occurs. This method writes out:
416   // - a header based on the values given to the constructor,
417   // If symbol_data is not CFI then:
418   // - the source files added via FindFile,
419   // - the functions added via AddFunctions, each with its lines,
420   // - all public records,
421   // If symbol_data is CFI then:
422   // - all CFI records.
423   // Addresses in the output are all relative to the load address
424   // established by SetLoadAddress.
425   bool Write(std::ostream& stream, SymbolData symbol_data);
426 
427   // Place the name in the global set of strings. Return a StringView points to
428   // a string inside the pool.
AddStringToPool(const string & str)429   StringView AddStringToPool(const string& str) {
430     auto result = common_strings_.insert(str);
431     return *(result.first);
432   }
433 
name()434   string name() const { return name_; }
os()435   string os() const { return os_; }
architecture()436   string architecture() const { return architecture_; }
identifier()437   string identifier() const { return id_; }
code_identifier()438   string code_identifier() const { return code_id_; }
439 
440  private:
441   // Report an error that has occurred writing the symbol file, using
442   // errno to find the appropriate cause.  Return false.
443   static bool ReportError();
444 
445   // Write RULE_MAP to STREAM, in the form appropriate for 'STACK CFI'
446   // records, without a final newline. Return true if all goes well;
447   // if an error occurs, return false, and leave errno set.
448   static bool WriteRuleMap(const RuleMap& rule_map, std::ostream& stream);
449 
450   // Returns true of the specified address resides with an specified address
451   // range, or if no ranges have been specified.
452   bool AddressIsInModule(Address address) const;
453 
454   // Module header entries.
455   string name_, os_, architecture_, id_, code_id_;
456 
457   // The module's nominal load address.  Addresses for functions and
458   // lines are absolute, assuming the module is loaded at this
459   // address.
460   Address load_address_;
461 
462   // The set of valid address ranges of the module.  If specified, attempts to
463   // add elements residing outside these ranges will be silently filtered.
464   vector<Range> address_ranges_;
465 
466   // Relation for maps whose keys are strings shared with some other
467   // structure.
468   struct CompareStringPtrs {
operatorCompareStringPtrs469     bool operator()(const string* x, const string* y) const { return *x < *y; }
470   };
471 
472   // A map from filenames to File structures.  The map's keys are
473   // pointers to the Files' names.
474   typedef map<const string*, File*, CompareStringPtrs> FileByNameMap;
475 
476   // A set containing Function structures, sorted by address.
477   typedef set<Function*, FunctionCompare> FunctionSet;
478 
479   // A set containing Extern structures, sorted by address.
480   typedef set<std::unique_ptr<Extern>, ExternCompare> ExternSet;
481 
482   // The module owns all the files and functions that have been added
483   // to it; destroying the module frees the Files and Functions these
484   // point to.
485   FileByNameMap files_;    // This module's source files.
486   FunctionSet functions_;  // This module's functions.
487   // Used to quickly look up whether a function exists at a particular address.
488   unordered_set<Address> function_addresses_;
489 
490   // The module owns all the call frame info entries that have been
491   // added to it.
492   vector<std::unique_ptr<StackFrameEntry>> stack_frame_entries_;
493 
494   // The module owns all the externs that have been added to it;
495   // destroying the module frees the Externs these point to.
496   ExternSet externs_;
497 
498   unordered_set<string> common_strings_;
499 
500   // Whether symbols sharing an address should be collapsed into a single entry
501   // and marked with an `m` in the output. See
502   // https://bugs.chromium.org/p/google-breakpad/issues/detail?id=751 and docs
503   // at
504   // https://chromium.googlesource.com/breakpad/breakpad/+/master/docs/symbol_files.md#records-3
505   bool enable_multiple_field_;
506 
507   // If a Function and an Extern share the same address but have a different
508   // name, prefer the name of the Extern.
509   //
510   // Use this when dumping Mach-O .dSYMs built with -gmlt (Minimum Line Tables),
511   // as the Function's fully-qualified name will only be present in the STABS
512   // (which are placed in the Extern), not in the DWARF symbols (which are
513   // placed in the Function).
514   bool prefer_extern_name_;
515 };
516 
517 }  // namespace google_breakpad
518 
519 #endif  // COMMON_LINUX_MODULE_H__
520