xref: /aosp_15_r20/external/google-breakpad/src/common/module.cc (revision 9712c20fc9bbfbac4935993a2ca0b3958c5adad2)
1 // Copyright 2011 Google LLC
2 //
3 // Redistribution and use in source and binary forms, with or without
4 // modification, are permitted provided that the following conditions are
5 // met:
6 //
7 //     * Redistributions of source code must retain the above copyright
8 // notice, this list of conditions and the following disclaimer.
9 //     * Redistributions in binary form must reproduce the above
10 // copyright notice, this list of conditions and the following disclaimer
11 // in the documentation and/or other materials provided with the
12 // distribution.
13 //     * Neither the name of Google LLC nor the names of its
14 // contributors may be used to endorse or promote products derived from
15 // this software without specific prior written permission.
16 //
17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 
29 // Original author: Jim Blandy <[email protected]> <[email protected]>
30 
31 // module.cc: Implement google_breakpad::Module.  See module.h.
32 
33 #ifdef HAVE_CONFIG_H
34 #include <config.h>  // Must come first
35 #endif
36 
37 #include "common/module.h"
38 #include "common/string_view.h"
39 
40 #include <assert.h>
41 #include <errno.h>
42 #include <stdio.h>
43 #include <string.h>
44 
45 #include <functional>
46 #include <iostream>
47 #include <memory>
48 #include <utility>
49 
50 namespace google_breakpad {
51 
52 using std::dec;
53 using std::hex;
54 using std::unique_ptr;
55 
GetOrCreateInlineOrigin(uint64_t offset,StringView name)56 Module::InlineOrigin* Module::InlineOriginMap::GetOrCreateInlineOrigin(
57     uint64_t offset,
58     StringView name) {
59   uint64_t specification_offset = references_[offset];
60   // Find the root offset.
61   auto iter = references_.find(specification_offset);
62   while (iter != references_.end() &&
63          specification_offset != references_[specification_offset]) {
64     specification_offset = references_[specification_offset];
65     iter = references_.find(specification_offset);
66   }
67   if (inline_origins_.find(specification_offset) != inline_origins_.end()) {
68     if (inline_origins_[specification_offset]->name == "<name omitted>") {
69       inline_origins_[specification_offset]->name = name;
70     }
71     return inline_origins_[specification_offset];
72   }
73   inline_origins_[specification_offset] = new Module::InlineOrigin(name);
74   return inline_origins_[specification_offset];
75 }
76 
SetReference(uint64_t offset,uint64_t specification_offset)77 void Module::InlineOriginMap::SetReference(uint64_t offset,
78                                            uint64_t specification_offset) {
79   // If we haven't seen this doesn't exist in reference map, always add it.
80   if (references_.find(offset) == references_.end()) {
81     references_[offset] = specification_offset;
82     return;
83   }
84   // If offset equals specification_offset and offset exists in
85   // references_, there is no need to update the references_ map.
86   // This early return is necessary because the call to erase in following if
87   // will remove the entry of specification_offset in inline_origins_. If
88   // specification_offset equals to references_[offset], it might be
89   // duplicate debug info.
90   if (offset == specification_offset ||
91       specification_offset == references_[offset])
92     return;
93 
94   // Fix up mapping in inline_origins_.
95   auto remove = inline_origins_.find(references_[offset]);
96   if (remove != inline_origins_.end()) {
97     inline_origins_[specification_offset] = std::move(remove->second);
98     inline_origins_.erase(remove);
99   }
100   references_[offset] = specification_offset;
101 }
102 
Module(const string & name,const string & os,const string & architecture,const string & id,const string & code_id,bool enable_multiple_field,bool prefer_extern_name)103 Module::Module(const string& name,
104                const string& os,
105                const string& architecture,
106                const string& id,
107                const string& code_id /* = "" */,
108                bool enable_multiple_field /* = false*/,
109                bool prefer_extern_name /* = false*/)
110     : name_(name),
111       os_(os),
112       architecture_(architecture),
113       id_(id),
114       code_id_(code_id),
115       load_address_(0),
116       enable_multiple_field_(enable_multiple_field),
117       prefer_extern_name_(prefer_extern_name) {}
118 
~Module()119 Module::~Module() {
120   for (FileByNameMap::iterator it = files_.begin(); it != files_.end(); ++it)
121     delete it->second;
122   for (FunctionSet::iterator it = functions_.begin();
123        it != functions_.end(); ++it) {
124     delete *it;
125   }
126 }
127 
SetLoadAddress(Address address)128 void Module::SetLoadAddress(Address address) {
129   load_address_ = address;
130 }
131 
SetAddressRanges(const vector<Range> & ranges)132 void Module::SetAddressRanges(const vector<Range>& ranges) {
133   address_ranges_ = ranges;
134 }
135 
AddFunction(Function * function)136 bool Module::AddFunction(Function* function) {
137   // FUNC lines must not hold an empty name, so catch the problem early if
138   // callers try to add one.
139   assert(!function->name.empty());
140 
141   if (!AddressIsInModule(function->address)) {
142     return false;
143   }
144 
145   // FUNCs are better than PUBLICs as they come with sizes, so remove an extern
146   // with the same address if present.
147   Extern ext(function->address);
148   ExternSet::iterator it_ext = externs_.find(&ext);
149   if (it_ext == externs_.end() &&
150       architecture_ == "arm" &&
151       (function->address & 0x1) == 0) {
152     // ARM THUMB functions have bit 0 set. ARM64 does not have THUMB.
153     Extern arm_thumb_ext(function->address | 0x1);
154     it_ext = externs_.find(&arm_thumb_ext);
155   }
156   if (it_ext != externs_.end()) {
157     Extern* found_ext = it_ext->get();
158     bool name_mismatch = found_ext->name != function->name;
159     if (enable_multiple_field_) {
160       bool is_multiple_based_on_name;
161       // In the case of a .dSYM built with -gmlt, the external name will be the
162       // fully-qualified symbol name, but the function name will be the partial
163       // name (or omitted).
164       //
165       // Don't mark multiple in this case.
166       if (name_mismatch &&
167           (function->name == "<name omitted>" ||
168            found_ext->name.find(function->name.str()) != string::npos)) {
169         is_multiple_based_on_name = false;
170       } else {
171         is_multiple_based_on_name = name_mismatch;
172       }
173       // If the PUBLIC is for the same symbol as the FUNC, don't mark multiple.
174       function->is_multiple |=
175           is_multiple_based_on_name || found_ext->is_multiple;
176     }
177     if (name_mismatch && prefer_extern_name_) {
178       function->name = AddStringToPool(it_ext->get()->name);
179     }
180     externs_.erase(it_ext);
181   }
182 #if _DEBUG
183   {
184     // There should be no other PUBLIC symbols that overlap with the function.
185     for (const Range& range : function->ranges) {
186       Extern debug_ext(range.address);
187       ExternSet::iterator it_debug = externs_.lower_bound(&ext);
188       assert(it_debug == externs_.end() ||
189              (*it_debug)->address >= range.address + range.size);
190     }
191   }
192 #endif
193   if (enable_multiple_field_ && function_addresses_.count(function->address)) {
194     FunctionSet::iterator existing_function = std::find_if(
195         functions_.begin(), functions_.end(),
196         [&](Function* other) { return other->address == function->address; });
197     assert(existing_function != functions_.end());
198     (*existing_function)->is_multiple = true;
199     // Free the duplicate that was not inserted because this Module
200     // now owns it.
201     return false;
202   }
203   function_addresses_.emplace(function->address);
204   std::pair<FunctionSet::iterator, bool> ret = functions_.insert(function);
205   if (!ret.second && (*ret.first != function)) {
206     // Free the duplicate that was not inserted because this Module
207     // now owns it.
208     return false;
209   }
210   return true;
211 }
212 
AddStackFrameEntry(std::unique_ptr<StackFrameEntry> stack_frame_entry)213 void Module::AddStackFrameEntry(std::unique_ptr<StackFrameEntry> stack_frame_entry) {
214   if (!AddressIsInModule(stack_frame_entry->address)) {
215     return;
216   }
217 
218   stack_frame_entries_.push_back(std::move(stack_frame_entry));
219 }
220 
AddExtern(std::unique_ptr<Extern> ext)221 void Module::AddExtern(std::unique_ptr<Extern> ext) {
222   if (!AddressIsInModule(ext->address)) {
223     return;
224   }
225 
226   std::pair<ExternSet::iterator,bool> ret = externs_.emplace(std::move(ext));
227   if (!ret.second && enable_multiple_field_) {
228     (*ret.first)->is_multiple = true;
229   }
230 }
231 
GetFunctions(vector<Function * > * vec,vector<Function * >::iterator i)232 void Module::GetFunctions(vector<Function*>* vec,
233                           vector<Function*>::iterator i) {
234   vec->insert(i, functions_.begin(), functions_.end());
235 }
236 
GetExterns(vector<Extern * > * vec,vector<Extern * >::iterator i)237 void Module::GetExterns(vector<Extern*>* vec,
238                         vector<Extern*>::iterator i) {
239   auto pos = vec->insert(i, externs_.size(), nullptr);
240   for (const std::unique_ptr<Extern>& ext : externs_) {
241     *pos = ext.get();
242     ++pos;
243   }
244 }
245 
FindFile(const string & name)246 Module::File* Module::FindFile(const string& name) {
247   // A tricky bit here.  The key of each map entry needs to be a
248   // pointer to the entry's File's name string.  This means that we
249   // can't do the initial lookup with any operation that would create
250   // an empty entry for us if the name isn't found (like, say,
251   // operator[] or insert do), because such a created entry's key will
252   // be a pointer the string passed as our argument.  Since the key of
253   // a map's value type is const, we can't fix it up once we've
254   // created our file.  lower_bound does the lookup without doing an
255   // insertion, and returns a good hint iterator to pass to insert.
256   // Our "destiny" is where we belong, whether we're there or not now.
257   FileByNameMap::iterator destiny = files_.lower_bound(&name);
258   if (destiny == files_.end()
259       || *destiny->first != name) {  // Repeated string comparison, boo hoo.
260     File* file = new File(name);
261     file->source_id = -1;
262     destiny = files_.insert(destiny,
263                             FileByNameMap::value_type(&file->name, file));
264   }
265   return destiny->second;
266 }
267 
FindFile(const char * name)268 Module::File* Module::FindFile(const char* name) {
269   string name_string = name;
270   return FindFile(name_string);
271 }
272 
FindExistingFile(const string & name)273 Module::File* Module::FindExistingFile(const string& name) {
274   FileByNameMap::iterator it = files_.find(&name);
275   return (it == files_.end()) ? NULL : it->second;
276 }
277 
GetFiles(vector<File * > * vec)278 void Module::GetFiles(vector<File*>* vec) {
279   vec->clear();
280   for (FileByNameMap::iterator it = files_.begin(); it != files_.end(); ++it)
281     vec->push_back(it->second);
282 }
283 
GetStackFrameEntries(vector<StackFrameEntry * > * vec) const284 void Module::GetStackFrameEntries(vector<StackFrameEntry*>* vec) const {
285   vec->clear();
286   vec->reserve(stack_frame_entries_.size());
287   for (const auto& ent : stack_frame_entries_) {
288     vec->push_back(ent.get());
289   }
290 }
291 
AssignSourceIds()292 void Module::AssignSourceIds() {
293   // First, give every source file an id of -1.
294   for (FileByNameMap::iterator file_it = files_.begin();
295        file_it != files_.end(); ++file_it) {
296     file_it->second->source_id = -1;
297   }
298 
299   // Next, mark all files actually cited by our functions' line number
300   // info, by setting each one's source id to zero.
301   for (FunctionSet::const_iterator func_it = functions_.begin();
302        func_it != functions_.end(); ++func_it) {
303     Function* func = *func_it;
304     for (vector<Line>::iterator line_it = func->lines.begin();
305          line_it != func->lines.end(); ++line_it)
306       line_it->file->source_id = 0;
307   }
308 
309   // Also mark all files cited by inline callsite by setting each one's source
310   // id to zero.
311   auto markInlineFiles = [](unique_ptr<Inline>& in) {
312     // There are some artificial inline functions which don't belong to
313     // any file. Those will have file id -1.
314     if (in->call_site_file) {
315       in->call_site_file->source_id = 0;
316     }
317   };
318   for (auto func : functions_) {
319     Inline::InlineDFS(func->inlines, markInlineFiles);
320   }
321 
322   // Finally, assign source ids to those files that have been marked.
323   // We could have just assigned source id numbers while traversing
324   // the line numbers, but doing it this way numbers the files in
325   // lexicographical order by name, which is neat.
326   int next_source_id = 0;
327   for (FileByNameMap::iterator file_it = files_.begin();
328        file_it != files_.end(); ++file_it) {
329     if (!file_it->second->source_id)
330       file_it->second->source_id = next_source_id++;
331   }
332 }
333 
CreateInlineOrigins(set<InlineOrigin *,InlineOriginCompare> & inline_origins)334 void Module::CreateInlineOrigins(
335     set<InlineOrigin*, InlineOriginCompare>& inline_origins) {
336   // Only add origins that have file and deduplicate origins with same name and
337   // file id by doing a DFS.
338   auto addInlineOrigins = [&](unique_ptr<Inline>& in) {
339     auto it = inline_origins.find(in->origin);
340     if (it == inline_origins.end())
341       inline_origins.insert(in->origin);
342     else
343       in->origin = *it;
344   };
345   for (Function* func : functions_)
346     Module::Inline::InlineDFS(func->inlines, addInlineOrigins);
347   int next_id = 0;
348   for (InlineOrigin* origin : inline_origins) {
349     origin->id = next_id++;
350   }
351 }
352 
ReportError()353 bool Module::ReportError() {
354   fprintf(stderr, "error writing symbol file: %s\n",
355           strerror(errno));
356   return false;
357 }
358 
WriteRuleMap(const RuleMap & rule_map,std::ostream & stream)359 bool Module::WriteRuleMap(const RuleMap& rule_map, std::ostream& stream) {
360   for (RuleMap::const_iterator it = rule_map.begin();
361        it != rule_map.end(); ++it) {
362     if (it != rule_map.begin())
363       stream << ' ';
364     stream << it->first << ": " << it->second;
365   }
366   return stream.good();
367 }
368 
AddressIsInModule(Address address) const369 bool Module::AddressIsInModule(Address address) const {
370   if (address_ranges_.empty()) {
371     return true;
372   }
373   for (const auto& segment : address_ranges_) {
374     if (address >= segment.address &&
375         address < segment.address + segment.size) {
376       return true;
377     }
378   }
379   return false;
380 }
381 
Write(std::ostream & stream,SymbolData symbol_data)382 bool Module::Write(std::ostream& stream, SymbolData symbol_data) {
383   stream << "MODULE " << os_ << " " << architecture_ << " "
384          << id_ << " " << name_ << "\n";
385   if (!stream.good())
386     return ReportError();
387 
388   if (!code_id_.empty()) {
389     stream << "INFO CODE_ID " << code_id_ << "\n";
390   }
391 
392   if (symbol_data & SYMBOLS_AND_FILES) {
393     // Get all referenced inline origins.
394     set<InlineOrigin*, InlineOriginCompare> inline_origins;
395     CreateInlineOrigins(inline_origins);
396     AssignSourceIds();
397 
398     // Write out files.
399     for (FileByNameMap::iterator file_it = files_.begin();
400          file_it != files_.end(); ++file_it) {
401       File* file = file_it->second;
402       if (file->source_id >= 0) {
403         stream << "FILE " << file->source_id << " " <<  file->name << "\n";
404         if (!stream.good())
405           return ReportError();
406       }
407     }
408     // Write out inline origins.
409     for (InlineOrigin* origin : inline_origins) {
410       stream << "INLINE_ORIGIN " << origin->id << " " << origin->name << "\n";
411       if (!stream.good())
412         return ReportError();
413     }
414 
415     // Write out functions and their inlines and lines.
416     for (FunctionSet::const_iterator func_it = functions_.begin();
417          func_it != functions_.end(); ++func_it) {
418       Function* func = *func_it;
419       vector<Line>::iterator line_it = func->lines.begin();
420       for (auto range_it = func->ranges.cbegin();
421            range_it != func->ranges.cend(); ++range_it) {
422         stream << "FUNC " << (func->is_multiple ? "m " : "") << hex
423                << (range_it->address - load_address_) << " " << range_it->size
424                << " " << func->parameter_size << " " << func->name << dec
425                << "\n";
426 
427         if (!stream.good())
428           return ReportError();
429 
430         // Write out inlines.
431         auto write_inline = [&](unique_ptr<Inline>& in) {
432           stream << "INLINE ";
433           stream << in->inline_nest_level << " " << in->call_site_line << " "
434                  << in->getCallSiteFileID() << " " << in->origin->id << hex;
435           for (const Range& r : in->ranges)
436             stream << " " << (r.address - load_address_) << " " << r.size;
437           stream << dec << "\n";
438         };
439         Module::Inline::InlineDFS(func->inlines, write_inline);
440         if (!stream.good())
441           return ReportError();
442 
443         while ((line_it != func->lines.end()) &&
444                (line_it->address >= range_it->address) &&
445                (line_it->address < (range_it->address + range_it->size))) {
446           stream << hex
447                  << (line_it->address - load_address_) << " "
448                  << line_it->size << " "
449                  << dec
450                  << line_it->number << " "
451                  << line_it->file->source_id << "\n";
452 
453           if (!stream.good())
454             return ReportError();
455 
456           ++line_it;
457         }
458       }
459     }
460 
461     // Write out 'PUBLIC' records.
462     for (ExternSet::const_iterator extern_it = externs_.begin();
463          extern_it != externs_.end(); ++extern_it) {
464       Extern* ext = extern_it->get();
465       stream << "PUBLIC " << (ext->is_multiple ? "m " : "") << hex
466              << (ext->address - load_address_) << " 0 " << ext->name << dec
467              << "\n";
468     }
469   }
470 
471   if (symbol_data & CFI) {
472     // Write out 'STACK CFI INIT' and 'STACK CFI' records.
473     for (auto frame_it = stack_frame_entries_.begin();
474          frame_it != stack_frame_entries_.end(); ++frame_it) {
475       StackFrameEntry* entry = frame_it->get();
476       stream << "STACK CFI INIT " << hex
477              << (entry->address - load_address_) << " "
478              << entry->size << " " << dec;
479       if (!stream.good()
480           || !WriteRuleMap(entry->initial_rules, stream))
481         return ReportError();
482 
483       stream << "\n";
484 
485       // Write out this entry's delta rules as 'STACK CFI' records.
486       for (RuleChangeMap::const_iterator delta_it = entry->rule_changes.begin();
487            delta_it != entry->rule_changes.end(); ++delta_it) {
488         stream << "STACK CFI " << hex
489                << (delta_it->first - load_address_) << " " << dec;
490         if (!stream.good()
491             || !WriteRuleMap(delta_it->second, stream))
492           return ReportError();
493 
494         stream << "\n";
495       }
496     }
497   }
498 
499   return true;
500 }
501 
502 }  // namespace google_breakpad
503