xref: /aosp_15_r20/external/stg/elf_reader.cc (revision 9e3b08ae94a55201065475453d799e8b1378bea6)
1 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
2 // -*- mode: C++ -*-
3 //
4 // Copyright 2022 Google LLC
5 //
6 // Licensed under the Apache License v2.0 with LLVM Exceptions (the
7 // "License"); you may not use this file except in compliance with the
8 // License.  You may obtain a copy of the License at
9 //
10 //     https://llvm.org/LICENSE.txt
11 //
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
18 // Author: Aleksei Vetrov
19 
20 #include "elf_reader.h"
21 
22 #include <cstddef>
23 #include <map>
24 #include <memory>
25 #include <optional>
26 #include <string>
27 #include <string_view>
28 #include <utility>
29 #include <vector>
30 
31 #include "dwarf_processor.h"
32 #include "dwarf_wrappers.h"
33 #include "elf_dwarf_handle.h"
34 #include "elf_loader.h"
35 #include "error.h"
36 #include "filter.h"
37 #include "graph.h"
38 #include "reader_options.h"
39 #include "runtime.h"
40 #include "type_normalisation.h"
41 #include "type_resolution.h"
42 #include "unification.h"
43 
44 namespace stg {
45 namespace elf {
46 namespace internal {
47 
48 namespace {
49 
50 template <typename M, typename K>
MaybeGet(const M & map,const K & key)51 std::optional<typename M::mapped_type> MaybeGet(const M& map, const K& key) {
52   const auto it = map.find(key);
53   if (it == map.end()) {
54     return {};
55   }
56   return {it->second};
57 }
58 
59 }  // namespace
60 
ConvertSymbolType(SymbolTableEntry::SymbolType symbol_type)61 ElfSymbol::SymbolType ConvertSymbolType(
62     SymbolTableEntry::SymbolType symbol_type) {
63   switch (symbol_type) {
64     case SymbolTableEntry::SymbolType::NOTYPE:
65       return ElfSymbol::SymbolType::NOTYPE;
66     case SymbolTableEntry::SymbolType::OBJECT:
67       return ElfSymbol::SymbolType::OBJECT;
68     case SymbolTableEntry::SymbolType::FUNCTION:
69       return ElfSymbol::SymbolType::FUNCTION;
70     case SymbolTableEntry::SymbolType::COMMON:
71       return ElfSymbol::SymbolType::COMMON;
72     case SymbolTableEntry::SymbolType::TLS:
73       return ElfSymbol::SymbolType::TLS;
74     case SymbolTableEntry::SymbolType::GNU_IFUNC:
75       return ElfSymbol::SymbolType::GNU_IFUNC;
76     default:
77       Die() << "Unsupported ELF symbol type: " << symbol_type;
78   }
79 }
80 
GetKsymtabSymbols(const SymbolTable & symbols)81 SymbolNameList GetKsymtabSymbols(const SymbolTable& symbols) {
82   constexpr std::string_view kKsymtabPrefix = "__ksymtab_";
83   SymbolNameList result;
84   result.reserve(symbols.size() / 2);
85   for (const auto& symbol : symbols) {
86     if (symbol.name.substr(0, kKsymtabPrefix.size()) == kKsymtabPrefix) {
87       result.emplace(symbol.name.substr(kKsymtabPrefix.size()));
88     }
89   }
90   return result;
91 }
92 
GetCRCValuesMap(const SymbolTable & symbols,const ElfLoader & elf)93 CRCValuesMap GetCRCValuesMap(const SymbolTable& symbols, const ElfLoader& elf) {
94   constexpr std::string_view kCRCPrefix = "__crc_";
95 
96   CRCValuesMap crc_values;
97 
98   for (const auto& symbol : symbols) {
99     const std::string_view name = symbol.name;
100     if (name.substr(0, kCRCPrefix.size()) == kCRCPrefix) {
101       const std::string_view name_suffix = name.substr(kCRCPrefix.size());
102       if (!crc_values.emplace(name_suffix, elf.GetElfSymbolCRC(symbol))
103                .second) {
104         Die() << "Multiple CRC values for symbol '" << name_suffix << '\'';
105       }
106     }
107   }
108 
109   return crc_values;
110 }
111 
GetNamespacesMap(const SymbolTable & symbols,const ElfLoader & elf)112 NamespacesMap GetNamespacesMap(const SymbolTable& symbols,
113                                const ElfLoader& elf) {
114   constexpr std::string_view kNSPrefix = "__kstrtabns_";
115 
116   NamespacesMap namespaces;
117 
118   for (const auto& symbol : symbols) {
119     const std::string_view name = symbol.name;
120     if (name.substr(0, kNSPrefix.size()) == kNSPrefix) {
121       const std::string_view name_suffix = name.substr(kNSPrefix.size());
122       const std::string_view ns = elf.GetElfSymbolNamespace(symbol);
123       if (ns.empty()) {
124         // The global namespace is explicitly represented as the empty string,
125         // but the common interpretation is that such symbols lack an export
126         // namespace.
127         continue;
128       }
129       if (!namespaces.emplace(name_suffix, ns).second) {
130         Die() << "Multiple namespaces for symbol '" << name_suffix << '\'';
131       }
132     }
133   }
134 
135   return namespaces;
136 }
137 
GetCFIAddressMap(const SymbolTable & symbols,const ElfLoader & elf)138 AddressMap GetCFIAddressMap(const SymbolTable& symbols, const ElfLoader& elf) {
139   AddressMap name_to_address;
140   for (const auto& symbol : symbols) {
141     const std::string_view name_prefix = UnwrapCFISymbolName(symbol.name);
142     const size_t address = elf.GetAbsoluteAddress(symbol);
143     if (!name_to_address.emplace(name_prefix, address).second) {
144       Die() << "Multiple CFI symbols referring to symbol '" << name_prefix
145             << '\'';
146     }
147   }
148   return name_to_address;
149 }
150 
IsPublicFunctionOrVariable(const SymbolTableEntry & symbol)151 bool IsPublicFunctionOrVariable(const SymbolTableEntry& symbol) {
152   const auto symbol_type = symbol.symbol_type;
153   // Reject symbols that are not functions or variables.
154   if (symbol_type != SymbolTableEntry::SymbolType::FUNCTION &&
155       symbol_type != SymbolTableEntry::SymbolType::OBJECT &&
156       symbol_type != SymbolTableEntry::SymbolType::TLS &&
157       symbol_type != SymbolTableEntry::SymbolType::GNU_IFUNC) {
158     return false;
159   }
160 
161   // Function or variable of ValueType::ABSOLUTE is not expected in any binary,
162   // but GNU `ld` adds object of such type for every version name defined in
163   // file. Such symbol should be rejected, because in fact it is not variable.
164   if (symbol.value_type == SymbolTableEntry::ValueType::ABSOLUTE) {
165     Check(symbol_type == SymbolTableEntry::SymbolType::OBJECT)
166         << "Unexpected function or variable with ABSOLUTE value type";
167     return false;
168   }
169 
170   // Undefined symbol is dependency of the binary but is not part of ABI
171   // provided by binary and should be rejected.
172   if (symbol.value_type == SymbolTableEntry::ValueType::UNDEFINED) {
173     return false;
174   }
175 
176   // Local symbol is not visible outside the binary, so it is not public
177   // and should be rejected.
178   if (symbol.binding == SymbolTableEntry::Binding::LOCAL) {
179     return false;
180   }
181 
182   // "Hidden" and "internal" visibility values mean that symbol is not public
183   // and should be rejected.
184   if (symbol.visibility == SymbolTableEntry::Visibility::HIDDEN ||
185       symbol.visibility == SymbolTableEntry::Visibility::INTERNAL) {
186     return false;
187   }
188 
189   return true;
190 }
191 
IsLinuxKernelFunctionOrVariable(const SymbolNameList & ksymtab,const SymbolTableEntry & symbol)192 bool IsLinuxKernelFunctionOrVariable(const SymbolNameList& ksymtab,
193                                      const SymbolTableEntry& symbol) {
194   // We use symbol name extracted from __ksymtab_ symbols as a proxy for the
195   // real symbol in the ksymtab. Such names can still be duplicated by LOCAL
196   // symbols so drop them to avoid false matches.
197   if (symbol.binding == SymbolTableEntry::Binding::LOCAL) {
198     return false;
199   }
200   // TODO: handle undefined ksymtab symbols
201   return ksymtab.contains(symbol.name);
202 }
203 
204 namespace {
205 
206 class Reader {
207  public:
Reader(Runtime & runtime,Graph & graph,ElfDwarfHandle & elf_dwarf_handle,ReadOptions options,const std::unique_ptr<Filter> & file_filter)208   Reader(Runtime& runtime, Graph& graph, ElfDwarfHandle& elf_dwarf_handle,
209          ReadOptions options, const std::unique_ptr<Filter>& file_filter)
210       : graph_(graph),
211         elf_dwarf_handle_(elf_dwarf_handle),
212         elf_(elf_dwarf_handle_.GetElf()),
213         options_(options),
214         file_filter_(file_filter),
215         runtime_(runtime) {}
216 
217   Id Read();
218 
219  private:
220   using SymbolIndex =
221       std::map<std::pair<dwarf::Address, std::string>, std::vector<size_t>>;
222 
223   void GetLinuxKernelSymbols(
224       const std::vector<SymbolTableEntry>& all_symbols,
225       std::vector<std::pair<ElfSymbol, size_t>>& symbols) const;
226   void GetUserspaceSymbols(
227       const std::vector<SymbolTableEntry>& all_symbols,
228       std::vector<std::pair<ElfSymbol, size_t>>& symbols) const;
229 
BuildRoot(const std::vector<std::pair<ElfSymbol,size_t>> & symbols)230   Id BuildRoot(const std::vector<std::pair<ElfSymbol, size_t>>& symbols) {
231     // On destruction, the unification object will remove or rewrite each graph
232     // node for which it has a mapping.
233     //
234     // Graph rewriting is expensive so an important optimisation is to restrict
235     // the nodes in consideration to the ones allocated by the DWARF processor
236     // here and any symbol or type roots that follow. This is done by setting
237     // the starting node ID to be the current graph limit.
238     Unification unification(runtime_, graph_, graph_.Limit());
239 
240     const dwarf::Types types =
241         dwarf::Process(elf_dwarf_handle_.GetDwarf(),
242                        elf_.IsLittleEndianBinary(), file_filter_, graph_);
243 
244     // A less important optimisation is avoiding copying the mapping array as it
245     // is populated. This is done by reserving space to the new graph limit.
246     unification.Reserve(graph_.Limit());
247 
248     // fill address to id
249     //
250     // In general, we want to handle as many of the following cases as possible.
251     // In practice, determining the correct ELF-DWARF match may be impossible.
252     //
253     // * compiler-driven aliasing - multiple symbols with same address
254     // * zero-size symbol false aliasing - multiple symbols and types with same
255     //   address
256     // * weak/strong linkage symbols - multiple symbols and types with same
257     //   address
258     // * assembly symbols - multiple declarations but no definition and no
259     //   address in DWARF.
260     SymbolIndex address_name_to_index;
261     for (size_t i = 0; i < types.symbols.size(); ++i) {
262       const auto& symbol = types.symbols[i];
263       address_name_to_index[{symbol.address, symbol.linkage_name}].push_back(i);
264     }
265 
266     std::map<std::string, Id> symbols_map;
267     for (auto [symbol, address] : symbols) {
268       // TODO: add VersionInfoToString to SymbolKey name
269       // TODO: check for uniqueness of SymbolKey in map after
270       // support for version info
271       MaybeAddTypeInfo(address_name_to_index, types.symbols, address, symbol,
272                        unification);
273       symbols_map.emplace(VersionedSymbolName(symbol),
274                           graph_.Add<ElfSymbol>(symbol));
275     }
276 
277     std::map<std::string, Id> types_map;
278     if (options_.Test(ReadOptions::TYPE_ROOTS)) {
279       const InterfaceKey get_key(graph_);
280       for (const auto id : types.named_type_ids) {
281         const auto [it, inserted] = types_map.emplace(get_key(id), id);
282         if (!inserted && !unification.Unify(id, it->second)) {
283           Die() << "found conflicting interface type: " << it->first;
284         }
285       }
286     }
287 
288     Id root = graph_.Add<Interface>(
289         std::move(symbols_map), std::move(types_map));
290 
291     // Use all named types and DWARF declarations as roots for type resolution.
292     std::vector<Id> roots;
293     roots.reserve(types.named_type_ids.size() + types.symbols.size() + 1);
294     for (const auto& symbol : types.symbols) {
295       roots.push_back(symbol.type_id);
296     }
297     for (const auto id : types.named_type_ids) {
298       roots.push_back(id);
299     }
300     roots.push_back(root);
301 
302     stg::ResolveTypes(runtime_, graph_, unification, {roots});
303 
304     unification.Update(root);
305     return root;
306   }
307 
IsEqual(Unification & unification,const dwarf::Types::Symbol & lhs,const dwarf::Types::Symbol & rhs)308   static bool IsEqual(Unification& unification,
309                       const dwarf::Types::Symbol& lhs,
310                       const dwarf::Types::Symbol& rhs) {
311     return lhs.scoped_name == rhs.scoped_name
312         && lhs.linkage_name == rhs.linkage_name
313         && lhs.address == rhs.address
314         && unification.Unify(lhs.type_id, rhs.type_id);
315   }
316 
SymbolTableEntryToElfSymbol(const CRCValuesMap & crc_values,const NamespacesMap & namespaces,const SymbolTableEntry & symbol)317   static ElfSymbol SymbolTableEntryToElfSymbol(
318       const CRCValuesMap& crc_values, const NamespacesMap& namespaces,
319       const SymbolTableEntry& symbol) {
320     return {
321         /* symbol_name = */ std::string(symbol.name),
322         /* version_info = */ std::nullopt,
323         /* is_defined = */
324         symbol.value_type != SymbolTableEntry::ValueType::UNDEFINED,
325         /* symbol_type = */ ConvertSymbolType(symbol.symbol_type),
326         /* binding = */ symbol.binding,
327         /* visibility = */ symbol.visibility,
328         /* crc = */ MaybeGet(crc_values, std::string(symbol.name)),
329         /* ns = */ MaybeGet(namespaces, std::string(symbol.name)),
330         /* type_id = */ std::nullopt,
331         /* full_name = */ std::nullopt};
332   }
333 
MaybeAddTypeInfo(const SymbolIndex & address_name_to_index,const std::vector<dwarf::Types::Symbol> & dwarf_symbols,size_t address_value,ElfSymbol & node,Unification & unification)334   static void MaybeAddTypeInfo(
335       const SymbolIndex& address_name_to_index,
336       const std::vector<dwarf::Types::Symbol>& dwarf_symbols,
337       size_t address_value, ElfSymbol& node, Unification& unification) {
338     // TLS symbols address may be incorrect because of unsupported
339     // relocations. Resetting it to zero the same way as it is done in
340     // dwarf::Entry::GetAddressFromLocation.
341     // TODO: match TLS variables by address
342     const dwarf::Address address =
343         node.symbol_type == ElfSymbol::SymbolType::TLS
344             ? dwarf::Address{dwarf::Address::Kind::TLS, 0}
345             : dwarf::Address{dwarf::Address::Kind::ADDRESS, address_value};
346     // try to find the first symbol with given address
347     const auto start_it = address_name_to_index.lower_bound(
348         std::make_pair(address, std::string()));
349     auto best_symbols_it = address_name_to_index.end();
350     bool matched_by_name = false;
351     size_t candidates = 0;
352     for (auto it = start_it;
353          it != address_name_to_index.end() && it->first.first == address;
354          ++it) {
355       ++candidates;
356       // We have at least matching addresses.
357       if (it->first.second == node.symbol_name) {
358         // If we have also matching names we can stop looking further.
359         matched_by_name = true;
360         best_symbols_it = it;
361         break;
362       }
363       if (best_symbols_it == address_name_to_index.end()) {
364         // Otherwise keep the first match.
365         best_symbols_it = it;
366       }
367     }
368     if (best_symbols_it != address_name_to_index.end()) {
369       const auto& best_symbols = best_symbols_it->second;
370       Check(!best_symbols.empty()) << "best_symbols.empty()";
371       const auto& best_symbol = dwarf_symbols[best_symbols[0]];
372       for (size_t i = 1; i < best_symbols.size(); ++i) {
373         const auto& other = dwarf_symbols[best_symbols[i]];
374         // TODO: allow "compatible" duplicates, for example
375         // "void foo(int bar)" vs "void foo(const int bar)"
376         if (!IsEqual(unification, best_symbol, other)) {
377           Die() << "Duplicate DWARF symbol: address="
378                 << best_symbols_it->first.first
379                 << ", name=" << best_symbols_it->first.second;
380         }
381       }
382       if (best_symbol.scoped_name.empty()) {
383         Die() << "Anonymous DWARF symbol: address="
384               << best_symbols_it->first.first
385               << ", name=" << best_symbols_it->first.second;
386       }
387       // There may be multiple DWARF symbols with same address (zero-length
388       // arrays), or ELF symbol has different name from DWARF symbol (aliases).
389       // But if we have both situations at once, we can't match ELF to DWARF and
390       // it should be fixed in analysed binary source code.
391       Check(matched_by_name || candidates == 1)
392           << "Multiple candidate symbols without matching name: address="
393           << best_symbols_it->first.first
394           << ", name=" << best_symbols_it->first.second;
395       node.type_id = best_symbol.type_id;
396       node.full_name = best_symbol.scoped_name;
397     }
398   }
399 
400   Graph& graph_;
401   ElfDwarfHandle& elf_dwarf_handle_;
402   ElfLoader elf_;
403   ReadOptions options_;
404   const std::unique_ptr<Filter>& file_filter_;
405   Runtime& runtime_;
406 };
407 
GetLinuxKernelSymbols(const std::vector<SymbolTableEntry> & all_symbols,std::vector<std::pair<ElfSymbol,size_t>> & symbols) const408 void Reader::GetLinuxKernelSymbols(
409     const std::vector<SymbolTableEntry>& all_symbols,
410     std::vector<std::pair<ElfSymbol, size_t>>& symbols) const {
411   const auto crcs = GetCRCValuesMap(all_symbols, elf_);
412   const auto namespaces = GetNamespacesMap(all_symbols, elf_);
413   const auto ksymtab_symbols = GetKsymtabSymbols(all_symbols);
414   for (const auto& symbol : all_symbols) {
415     if (IsLinuxKernelFunctionOrVariable(ksymtab_symbols, symbol)) {
416       const size_t address = elf_.GetAbsoluteAddress(symbol);
417       symbols.emplace_back(
418           SymbolTableEntryToElfSymbol(crcs, namespaces, symbol), address);
419     }
420   }
421 }
422 
GetUserspaceSymbols(const std::vector<SymbolTableEntry> & all_symbols,std::vector<std::pair<ElfSymbol,size_t>> & symbols) const423 void Reader::GetUserspaceSymbols(
424     const std::vector<SymbolTableEntry>& all_symbols,
425     std::vector<std::pair<ElfSymbol, size_t>>& symbols) const {
426   const auto cfi_address_map = GetCFIAddressMap(elf_.GetCFISymbols(), elf_);
427   for (const auto& symbol : all_symbols) {
428     if (IsPublicFunctionOrVariable(symbol)) {
429       const auto cfi_it = cfi_address_map.find(std::string(symbol.name));
430       const size_t address = cfi_it != cfi_address_map.end()
431                                  ? cfi_it->second
432                                  : elf_.GetAbsoluteAddress(symbol);
433       symbols.emplace_back(
434           SymbolTableEntryToElfSymbol({}, {}, symbol), address);
435     }
436   }
437 }
438 
Read()439 Id Reader::Read() {
440   const auto all_symbols = elf_.GetElfSymbols();
441   const auto get_symbols = elf_.IsLinuxKernelBinary()
442                            ? &Reader::GetLinuxKernelSymbols
443                            : &Reader::GetUserspaceSymbols;
444   std::vector<std::pair<ElfSymbol, size_t>> symbols;
445   symbols.reserve(all_symbols.size());
446   (this->*get_symbols)(all_symbols, symbols);
447   symbols.shrink_to_fit();
448 
449   const Id root = BuildRoot(symbols);
450 
451   // Types produced by ELF/DWARF readers may require removing useless
452   // qualifiers.
453   return RemoveUselessQualifiers(graph_, root);
454 }
455 
456 }  // namespace
457 }  // namespace internal
458 
Read(Runtime & runtime,Graph & graph,ElfDwarfHandle & elf_dwarf_handle,ReadOptions options,const std::unique_ptr<Filter> & file_filter)459 Id Read(Runtime& runtime, Graph& graph, ElfDwarfHandle& elf_dwarf_handle,
460         ReadOptions options, const std::unique_ptr<Filter>& file_filter) {
461   return internal::Reader(runtime, graph, elf_dwarf_handle, options,
462                           file_filter)
463       .Read();
464 }
465 
466 }  // namespace elf
467 }  // namespace stg
468