1 // Copyright 2011 Google LLC
2 //
3 // Redistribution and use in source and binary forms, with or without
4 // modification, are permitted provided that the following conditions are
5 // met:
6 //
7 // * Redistributions of source code must retain the above copyright
8 // notice, this list of conditions and the following disclaimer.
9 // * Redistributions in binary form must reproduce the above
10 // copyright notice, this list of conditions and the following disclaimer
11 // in the documentation and/or other materials provided with the
12 // distribution.
13 // * Neither the name of Google LLC nor the names of its
14 // contributors may be used to endorse or promote products derived from
15 // this software without specific prior written permission.
16 //
17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
29 // Original author: Jim Blandy <[email protected]> <[email protected]>
30
31 // module.cc: Implement google_breakpad::Module. See module.h.
32
33 #ifdef HAVE_CONFIG_H
34 #include <config.h> // Must come first
35 #endif
36
37 #include "common/module.h"
38 #include "common/string_view.h"
39
40 #include <assert.h>
41 #include <errno.h>
42 #include <stdio.h>
43 #include <string.h>
44
45 #include <functional>
46 #include <iostream>
47 #include <memory>
48 #include <utility>
49
50 namespace google_breakpad {
51
52 using std::dec;
53 using std::hex;
54 using std::unique_ptr;
55
GetOrCreateInlineOrigin(uint64_t offset,StringView name)56 Module::InlineOrigin* Module::InlineOriginMap::GetOrCreateInlineOrigin(
57 uint64_t offset,
58 StringView name) {
59 uint64_t specification_offset = references_[offset];
60 // Find the root offset.
61 auto iter = references_.find(specification_offset);
62 while (iter != references_.end() &&
63 specification_offset != references_[specification_offset]) {
64 specification_offset = references_[specification_offset];
65 iter = references_.find(specification_offset);
66 }
67 if (inline_origins_.find(specification_offset) != inline_origins_.end()) {
68 if (inline_origins_[specification_offset]->name == "<name omitted>") {
69 inline_origins_[specification_offset]->name = name;
70 }
71 return inline_origins_[specification_offset];
72 }
73 inline_origins_[specification_offset] = new Module::InlineOrigin(name);
74 return inline_origins_[specification_offset];
75 }
76
SetReference(uint64_t offset,uint64_t specification_offset)77 void Module::InlineOriginMap::SetReference(uint64_t offset,
78 uint64_t specification_offset) {
79 // If we haven't seen this doesn't exist in reference map, always add it.
80 if (references_.find(offset) == references_.end()) {
81 references_[offset] = specification_offset;
82 return;
83 }
84 // If offset equals specification_offset and offset exists in
85 // references_, there is no need to update the references_ map.
86 // This early return is necessary because the call to erase in following if
87 // will remove the entry of specification_offset in inline_origins_. If
88 // specification_offset equals to references_[offset], it might be
89 // duplicate debug info.
90 if (offset == specification_offset ||
91 specification_offset == references_[offset])
92 return;
93
94 // Fix up mapping in inline_origins_.
95 auto remove = inline_origins_.find(references_[offset]);
96 if (remove != inline_origins_.end()) {
97 inline_origins_[specification_offset] = std::move(remove->second);
98 inline_origins_.erase(remove);
99 }
100 references_[offset] = specification_offset;
101 }
102
Module(const string & name,const string & os,const string & architecture,const string & id,const string & code_id,bool enable_multiple_field,bool prefer_extern_name)103 Module::Module(const string& name,
104 const string& os,
105 const string& architecture,
106 const string& id,
107 const string& code_id /* = "" */,
108 bool enable_multiple_field /* = false*/,
109 bool prefer_extern_name /* = false*/)
110 : name_(name),
111 os_(os),
112 architecture_(architecture),
113 id_(id),
114 code_id_(code_id),
115 load_address_(0),
116 enable_multiple_field_(enable_multiple_field),
117 prefer_extern_name_(prefer_extern_name) {}
118
~Module()119 Module::~Module() {
120 for (FileByNameMap::iterator it = files_.begin(); it != files_.end(); ++it)
121 delete it->second;
122 for (FunctionSet::iterator it = functions_.begin();
123 it != functions_.end(); ++it) {
124 delete *it;
125 }
126 }
127
SetLoadAddress(Address address)128 void Module::SetLoadAddress(Address address) {
129 load_address_ = address;
130 }
131
SetAddressRanges(const vector<Range> & ranges)132 void Module::SetAddressRanges(const vector<Range>& ranges) {
133 address_ranges_ = ranges;
134 }
135
AddFunction(Function * function)136 bool Module::AddFunction(Function* function) {
137 // FUNC lines must not hold an empty name, so catch the problem early if
138 // callers try to add one.
139 assert(!function->name.empty());
140
141 if (!AddressIsInModule(function->address)) {
142 return false;
143 }
144
145 // FUNCs are better than PUBLICs as they come with sizes, so remove an extern
146 // with the same address if present.
147 Extern ext(function->address);
148 ExternSet::iterator it_ext = externs_.find(&ext);
149 if (it_ext == externs_.end() &&
150 architecture_ == "arm" &&
151 (function->address & 0x1) == 0) {
152 // ARM THUMB functions have bit 0 set. ARM64 does not have THUMB.
153 Extern arm_thumb_ext(function->address | 0x1);
154 it_ext = externs_.find(&arm_thumb_ext);
155 }
156 if (it_ext != externs_.end()) {
157 Extern* found_ext = it_ext->get();
158 bool name_mismatch = found_ext->name != function->name;
159 if (enable_multiple_field_) {
160 bool is_multiple_based_on_name;
161 // In the case of a .dSYM built with -gmlt, the external name will be the
162 // fully-qualified symbol name, but the function name will be the partial
163 // name (or omitted).
164 //
165 // Don't mark multiple in this case.
166 if (name_mismatch &&
167 (function->name == "<name omitted>" ||
168 found_ext->name.find(function->name.str()) != string::npos)) {
169 is_multiple_based_on_name = false;
170 } else {
171 is_multiple_based_on_name = name_mismatch;
172 }
173 // If the PUBLIC is for the same symbol as the FUNC, don't mark multiple.
174 function->is_multiple |=
175 is_multiple_based_on_name || found_ext->is_multiple;
176 }
177 if (name_mismatch && prefer_extern_name_) {
178 function->name = AddStringToPool(it_ext->get()->name);
179 }
180 externs_.erase(it_ext);
181 }
182 #if _DEBUG
183 {
184 // There should be no other PUBLIC symbols that overlap with the function.
185 for (const Range& range : function->ranges) {
186 Extern debug_ext(range.address);
187 ExternSet::iterator it_debug = externs_.lower_bound(&ext);
188 assert(it_debug == externs_.end() ||
189 (*it_debug)->address >= range.address + range.size);
190 }
191 }
192 #endif
193 if (enable_multiple_field_ && function_addresses_.count(function->address)) {
194 FunctionSet::iterator existing_function = std::find_if(
195 functions_.begin(), functions_.end(),
196 [&](Function* other) { return other->address == function->address; });
197 assert(existing_function != functions_.end());
198 (*existing_function)->is_multiple = true;
199 // Free the duplicate that was not inserted because this Module
200 // now owns it.
201 return false;
202 }
203 function_addresses_.emplace(function->address);
204 std::pair<FunctionSet::iterator, bool> ret = functions_.insert(function);
205 if (!ret.second && (*ret.first != function)) {
206 // Free the duplicate that was not inserted because this Module
207 // now owns it.
208 return false;
209 }
210 return true;
211 }
212
AddStackFrameEntry(std::unique_ptr<StackFrameEntry> stack_frame_entry)213 void Module::AddStackFrameEntry(std::unique_ptr<StackFrameEntry> stack_frame_entry) {
214 if (!AddressIsInModule(stack_frame_entry->address)) {
215 return;
216 }
217
218 stack_frame_entries_.push_back(std::move(stack_frame_entry));
219 }
220
AddExtern(std::unique_ptr<Extern> ext)221 void Module::AddExtern(std::unique_ptr<Extern> ext) {
222 if (!AddressIsInModule(ext->address)) {
223 return;
224 }
225
226 std::pair<ExternSet::iterator,bool> ret = externs_.emplace(std::move(ext));
227 if (!ret.second && enable_multiple_field_) {
228 (*ret.first)->is_multiple = true;
229 }
230 }
231
GetFunctions(vector<Function * > * vec,vector<Function * >::iterator i)232 void Module::GetFunctions(vector<Function*>* vec,
233 vector<Function*>::iterator i) {
234 vec->insert(i, functions_.begin(), functions_.end());
235 }
236
GetExterns(vector<Extern * > * vec,vector<Extern * >::iterator i)237 void Module::GetExterns(vector<Extern*>* vec,
238 vector<Extern*>::iterator i) {
239 auto pos = vec->insert(i, externs_.size(), nullptr);
240 for (const std::unique_ptr<Extern>& ext : externs_) {
241 *pos = ext.get();
242 ++pos;
243 }
244 }
245
FindFile(const string & name)246 Module::File* Module::FindFile(const string& name) {
247 // A tricky bit here. The key of each map entry needs to be a
248 // pointer to the entry's File's name string. This means that we
249 // can't do the initial lookup with any operation that would create
250 // an empty entry for us if the name isn't found (like, say,
251 // operator[] or insert do), because such a created entry's key will
252 // be a pointer the string passed as our argument. Since the key of
253 // a map's value type is const, we can't fix it up once we've
254 // created our file. lower_bound does the lookup without doing an
255 // insertion, and returns a good hint iterator to pass to insert.
256 // Our "destiny" is where we belong, whether we're there or not now.
257 FileByNameMap::iterator destiny = files_.lower_bound(&name);
258 if (destiny == files_.end()
259 || *destiny->first != name) { // Repeated string comparison, boo hoo.
260 File* file = new File(name);
261 file->source_id = -1;
262 destiny = files_.insert(destiny,
263 FileByNameMap::value_type(&file->name, file));
264 }
265 return destiny->second;
266 }
267
FindFile(const char * name)268 Module::File* Module::FindFile(const char* name) {
269 string name_string = name;
270 return FindFile(name_string);
271 }
272
FindExistingFile(const string & name)273 Module::File* Module::FindExistingFile(const string& name) {
274 FileByNameMap::iterator it = files_.find(&name);
275 return (it == files_.end()) ? NULL : it->second;
276 }
277
GetFiles(vector<File * > * vec)278 void Module::GetFiles(vector<File*>* vec) {
279 vec->clear();
280 for (FileByNameMap::iterator it = files_.begin(); it != files_.end(); ++it)
281 vec->push_back(it->second);
282 }
283
GetStackFrameEntries(vector<StackFrameEntry * > * vec) const284 void Module::GetStackFrameEntries(vector<StackFrameEntry*>* vec) const {
285 vec->clear();
286 vec->reserve(stack_frame_entries_.size());
287 for (const auto& ent : stack_frame_entries_) {
288 vec->push_back(ent.get());
289 }
290 }
291
AssignSourceIds()292 void Module::AssignSourceIds() {
293 // First, give every source file an id of -1.
294 for (FileByNameMap::iterator file_it = files_.begin();
295 file_it != files_.end(); ++file_it) {
296 file_it->second->source_id = -1;
297 }
298
299 // Next, mark all files actually cited by our functions' line number
300 // info, by setting each one's source id to zero.
301 for (FunctionSet::const_iterator func_it = functions_.begin();
302 func_it != functions_.end(); ++func_it) {
303 Function* func = *func_it;
304 for (vector<Line>::iterator line_it = func->lines.begin();
305 line_it != func->lines.end(); ++line_it)
306 line_it->file->source_id = 0;
307 }
308
309 // Also mark all files cited by inline callsite by setting each one's source
310 // id to zero.
311 auto markInlineFiles = [](unique_ptr<Inline>& in) {
312 // There are some artificial inline functions which don't belong to
313 // any file. Those will have file id -1.
314 if (in->call_site_file) {
315 in->call_site_file->source_id = 0;
316 }
317 };
318 for (auto func : functions_) {
319 Inline::InlineDFS(func->inlines, markInlineFiles);
320 }
321
322 // Finally, assign source ids to those files that have been marked.
323 // We could have just assigned source id numbers while traversing
324 // the line numbers, but doing it this way numbers the files in
325 // lexicographical order by name, which is neat.
326 int next_source_id = 0;
327 for (FileByNameMap::iterator file_it = files_.begin();
328 file_it != files_.end(); ++file_it) {
329 if (!file_it->second->source_id)
330 file_it->second->source_id = next_source_id++;
331 }
332 }
333
CreateInlineOrigins(set<InlineOrigin *,InlineOriginCompare> & inline_origins)334 void Module::CreateInlineOrigins(
335 set<InlineOrigin*, InlineOriginCompare>& inline_origins) {
336 // Only add origins that have file and deduplicate origins with same name and
337 // file id by doing a DFS.
338 auto addInlineOrigins = [&](unique_ptr<Inline>& in) {
339 auto it = inline_origins.find(in->origin);
340 if (it == inline_origins.end())
341 inline_origins.insert(in->origin);
342 else
343 in->origin = *it;
344 };
345 for (Function* func : functions_)
346 Module::Inline::InlineDFS(func->inlines, addInlineOrigins);
347 int next_id = 0;
348 for (InlineOrigin* origin : inline_origins) {
349 origin->id = next_id++;
350 }
351 }
352
ReportError()353 bool Module::ReportError() {
354 fprintf(stderr, "error writing symbol file: %s\n",
355 strerror(errno));
356 return false;
357 }
358
WriteRuleMap(const RuleMap & rule_map,std::ostream & stream)359 bool Module::WriteRuleMap(const RuleMap& rule_map, std::ostream& stream) {
360 for (RuleMap::const_iterator it = rule_map.begin();
361 it != rule_map.end(); ++it) {
362 if (it != rule_map.begin())
363 stream << ' ';
364 stream << it->first << ": " << it->second;
365 }
366 return stream.good();
367 }
368
AddressIsInModule(Address address) const369 bool Module::AddressIsInModule(Address address) const {
370 if (address_ranges_.empty()) {
371 return true;
372 }
373 for (const auto& segment : address_ranges_) {
374 if (address >= segment.address &&
375 address < segment.address + segment.size) {
376 return true;
377 }
378 }
379 return false;
380 }
381
Write(std::ostream & stream,SymbolData symbol_data)382 bool Module::Write(std::ostream& stream, SymbolData symbol_data) {
383 stream << "MODULE " << os_ << " " << architecture_ << " "
384 << id_ << " " << name_ << "\n";
385 if (!stream.good())
386 return ReportError();
387
388 if (!code_id_.empty()) {
389 stream << "INFO CODE_ID " << code_id_ << "\n";
390 }
391
392 if (symbol_data & SYMBOLS_AND_FILES) {
393 // Get all referenced inline origins.
394 set<InlineOrigin*, InlineOriginCompare> inline_origins;
395 CreateInlineOrigins(inline_origins);
396 AssignSourceIds();
397
398 // Write out files.
399 for (FileByNameMap::iterator file_it = files_.begin();
400 file_it != files_.end(); ++file_it) {
401 File* file = file_it->second;
402 if (file->source_id >= 0) {
403 stream << "FILE " << file->source_id << " " << file->name << "\n";
404 if (!stream.good())
405 return ReportError();
406 }
407 }
408 // Write out inline origins.
409 for (InlineOrigin* origin : inline_origins) {
410 stream << "INLINE_ORIGIN " << origin->id << " " << origin->name << "\n";
411 if (!stream.good())
412 return ReportError();
413 }
414
415 // Write out functions and their inlines and lines.
416 for (FunctionSet::const_iterator func_it = functions_.begin();
417 func_it != functions_.end(); ++func_it) {
418 Function* func = *func_it;
419 vector<Line>::iterator line_it = func->lines.begin();
420 for (auto range_it = func->ranges.cbegin();
421 range_it != func->ranges.cend(); ++range_it) {
422 stream << "FUNC " << (func->is_multiple ? "m " : "") << hex
423 << (range_it->address - load_address_) << " " << range_it->size
424 << " " << func->parameter_size << " " << func->name << dec
425 << "\n";
426
427 if (!stream.good())
428 return ReportError();
429
430 // Write out inlines.
431 auto write_inline = [&](unique_ptr<Inline>& in) {
432 stream << "INLINE ";
433 stream << in->inline_nest_level << " " << in->call_site_line << " "
434 << in->getCallSiteFileID() << " " << in->origin->id << hex;
435 for (const Range& r : in->ranges)
436 stream << " " << (r.address - load_address_) << " " << r.size;
437 stream << dec << "\n";
438 };
439 Module::Inline::InlineDFS(func->inlines, write_inline);
440 if (!stream.good())
441 return ReportError();
442
443 while ((line_it != func->lines.end()) &&
444 (line_it->address >= range_it->address) &&
445 (line_it->address < (range_it->address + range_it->size))) {
446 stream << hex
447 << (line_it->address - load_address_) << " "
448 << line_it->size << " "
449 << dec
450 << line_it->number << " "
451 << line_it->file->source_id << "\n";
452
453 if (!stream.good())
454 return ReportError();
455
456 ++line_it;
457 }
458 }
459 }
460
461 // Write out 'PUBLIC' records.
462 for (ExternSet::const_iterator extern_it = externs_.begin();
463 extern_it != externs_.end(); ++extern_it) {
464 Extern* ext = extern_it->get();
465 stream << "PUBLIC " << (ext->is_multiple ? "m " : "") << hex
466 << (ext->address - load_address_) << " 0 " << ext->name << dec
467 << "\n";
468 }
469 }
470
471 if (symbol_data & CFI) {
472 // Write out 'STACK CFI INIT' and 'STACK CFI' records.
473 for (auto frame_it = stack_frame_entries_.begin();
474 frame_it != stack_frame_entries_.end(); ++frame_it) {
475 StackFrameEntry* entry = frame_it->get();
476 stream << "STACK CFI INIT " << hex
477 << (entry->address - load_address_) << " "
478 << entry->size << " " << dec;
479 if (!stream.good()
480 || !WriteRuleMap(entry->initial_rules, stream))
481 return ReportError();
482
483 stream << "\n";
484
485 // Write out this entry's delta rules as 'STACK CFI' records.
486 for (RuleChangeMap::const_iterator delta_it = entry->rule_changes.begin();
487 delta_it != entry->rule_changes.end(); ++delta_it) {
488 stream << "STACK CFI " << hex
489 << (delta_it->first - load_address_) << " " << dec;
490 if (!stream.good()
491 || !WriteRuleMap(delta_it->second, stream))
492 return ReportError();
493
494 stream << "\n";
495 }
496 }
497 }
498
499 return true;
500 }
501
502 } // namespace google_breakpad
503