1 //==- include/llvm/CodeGen/AccelTable.h - Accelerator Tables -----*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file contains support for writing accelerator tables.
10 ///
11 //===----------------------------------------------------------------------===//
12
13 #ifndef LLVM_CODEGEN_ACCELTABLE_H
14 #define LLVM_CODEGEN_ACCELTABLE_H
15
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/MapVector.h"
18 #include "llvm/ADT/STLFunctionalExtras.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/BinaryFormat/Dwarf.h"
21 #include "llvm/CodeGen/DIE.h"
22 #include "llvm/CodeGen/DwarfStringPoolEntry.h"
23 #include "llvm/Support/Allocator.h"
24 #include "llvm/Support/DJB.h"
25 #include "llvm/Support/Debug.h"
26 #include <cstdint>
27 #include <variant>
28 #include <vector>
29
30 /// \file
31 /// The DWARF and Apple accelerator tables are an indirect hash table optimized
32 /// for null lookup rather than access to known data. The Apple accelerator
33 /// tables are a precursor of the newer DWARF v5 accelerator tables. Both
34 /// formats share common design ideas.
35 ///
36 /// The Apple accelerator table are output into an on-disk format that looks
37 /// like this:
38 ///
39 /// .------------------.
40 /// | HEADER |
41 /// |------------------|
42 /// | BUCKETS |
43 /// |------------------|
44 /// | HASHES |
45 /// |------------------|
46 /// | OFFSETS |
47 /// |------------------|
48 /// | DATA |
49 /// `------------------'
50 ///
51 /// The header contains a magic number, version, type of hash function,
52 /// the number of buckets, total number of hashes, and room for a special struct
53 /// of data and the length of that struct.
54 ///
55 /// The buckets contain an index (e.g. 6) into the hashes array. The hashes
56 /// section contains all of the 32-bit hash values in contiguous memory, and the
57 /// offsets contain the offset into the data area for the particular hash.
58 ///
59 /// For a lookup example, we could hash a function name and take it modulo the
60 /// number of buckets giving us our bucket. From there we take the bucket value
61 /// as an index into the hashes table and look at each successive hash as long
62 /// as the hash value is still the same modulo result (bucket value) as earlier.
63 /// If we have a match we look at that same entry in the offsets table and grab
64 /// the offset in the data for our final match.
65 ///
66 /// The DWARF v5 accelerator table consists of zero or more name indices that
67 /// are output into an on-disk format that looks like this:
68 ///
69 /// .------------------.
70 /// | HEADER |
71 /// |------------------|
72 /// | CU LIST |
73 /// |------------------|
74 /// | LOCAL TU LIST |
75 /// |------------------|
76 /// | FOREIGN TU LIST |
77 /// |------------------|
78 /// | HASH TABLE |
79 /// |------------------|
80 /// | NAME TABLE |
81 /// |------------------|
82 /// | ABBREV TABLE |
83 /// |------------------|
84 /// | ENTRY POOL |
85 /// `------------------'
86 ///
87 /// For the full documentation please refer to the DWARF 5 standard.
88 ///
89 ///
90 /// This file defines the class template AccelTable, which is represents an
91 /// abstract view of an Accelerator table, without any notion of an on-disk
92 /// layout. This class is parameterized by an entry type, which should derive
93 /// from AccelTableData. This is the type of individual entries in the table,
94 /// and it should store the data necessary to emit them. AppleAccelTableData is
95 /// the base class for Apple Accelerator Table entries, which have a uniform
96 /// structure based on a sequence of Atoms. There are different sub-classes
97 /// derived from AppleAccelTable, which differ in the set of Atoms and how they
98 /// obtain their values.
99 ///
100 /// An Apple Accelerator Table can be serialized by calling emitAppleAccelTable
101 /// function.
102
103 namespace llvm {
104
105 class AsmPrinter;
106 class DwarfDebug;
107 class DwarfTypeUnit;
108 class MCSymbol;
109 class raw_ostream;
110
111 /// Interface which the different types of accelerator table data have to
112 /// conform. It serves as a base class for different values of the template
113 /// argument of the AccelTable class template.
114 class AccelTableData {
115 public:
116 virtual ~AccelTableData() = default;
117
118 bool operator<(const AccelTableData &Other) const {
119 return order() < Other.order();
120 }
121
122 // Subclasses should implement:
123 // static uint32_t hash(StringRef Name);
124
125 #ifndef NDEBUG
126 virtual void print(raw_ostream &OS) const = 0;
127 #endif
128 protected:
129 virtual uint64_t order() const = 0;
130 };
131
132 /// A base class holding non-template-dependant functionality of the AccelTable
133 /// class. Clients should not use this class directly but rather instantiate
134 /// AccelTable with a type derived from AccelTableData.
135 class AccelTableBase {
136 public:
137 using HashFn = uint32_t(StringRef);
138
139 /// Represents a group of entries with identical name (and hence, hash value).
140 struct HashData {
141 DwarfStringPoolEntryRef Name;
142 uint32_t HashValue;
143 std::vector<AccelTableData *> Values;
144 MCSymbol *Sym;
145
146 #ifndef NDEBUG
147 void print(raw_ostream &OS) const;
dumpHashData148 void dump() const { print(dbgs()); }
149 #endif
150 };
151 using HashList = std::vector<HashData *>;
152 using BucketList = std::vector<HashList>;
153
154 protected:
155 /// Allocator for HashData and Values.
156 BumpPtrAllocator Allocator;
157
158 using StringEntries = MapVector<StringRef, HashData>;
159 StringEntries Entries;
160
161 HashFn *Hash;
162 uint32_t BucketCount = 0;
163 uint32_t UniqueHashCount = 0;
164
165 HashList Hashes;
166 BucketList Buckets;
167
168 void computeBucketCount();
169
AccelTableBase(HashFn * Hash)170 AccelTableBase(HashFn *Hash) : Hash(Hash) {}
171
172 public:
173 void finalize(AsmPrinter *Asm, StringRef Prefix);
getBuckets()174 ArrayRef<HashList> getBuckets() const { return Buckets; }
getBucketCount()175 uint32_t getBucketCount() const { return BucketCount; }
getUniqueHashCount()176 uint32_t getUniqueHashCount() const { return UniqueHashCount; }
getUniqueNameCount()177 uint32_t getUniqueNameCount() const { return Entries.size(); }
178
179 #ifndef NDEBUG
180 void print(raw_ostream &OS) const;
dump()181 void dump() const { print(dbgs()); }
182 #endif
183
184 AccelTableBase(const AccelTableBase &) = delete;
185 void operator=(const AccelTableBase &) = delete;
186 };
187
188 /// This class holds an abstract representation of an Accelerator Table,
189 /// consisting of a sequence of buckets, each bucket containint a sequence of
190 /// HashData entries. The class is parameterized by the type of entries it
191 /// holds. The type template parameter also defines the hash function to use for
192 /// hashing names.
193 template <typename DataT> class AccelTable : public AccelTableBase {
194 public:
AccelTable()195 AccelTable() : AccelTableBase(DataT::hash) {}
196
197 template <typename... Types>
198 void addName(DwarfStringPoolEntryRef Name, Types &&... Args);
clear()199 void clear() { Entries.clear(); }
200 void addEntries(AccelTable<DataT> &Table);
getEntries()201 const StringEntries getEntries() const { return Entries; }
202 };
203
204 template <typename AccelTableDataT>
205 template <typename... Types>
addName(DwarfStringPoolEntryRef Name,Types &&...Args)206 void AccelTable<AccelTableDataT>::addName(DwarfStringPoolEntryRef Name,
207 Types &&... Args) {
208 assert(Buckets.empty() && "Already finalized!");
209 // If the string is in the list already then add this die to the list
210 // otherwise add a new one.
211 auto &It = Entries[Name.getString()];
212 if (It.Values.empty()) {
213 It.Name = Name;
214 It.HashValue = Hash(Name.getString());
215 }
216 It.Values.push_back(new (Allocator)
217 AccelTableDataT(std::forward<Types>(Args)...));
218 }
219
220 /// A base class for different implementations of Data classes for Apple
221 /// Accelerator Tables. The columns in the table are defined by the static Atoms
222 /// variable defined on the subclasses.
223 class AppleAccelTableData : public AccelTableData {
224 public:
225 /// An Atom defines the form of the data in an Apple accelerator table.
226 /// Conceptually it is a column in the accelerator consisting of a type and a
227 /// specification of the form of its data.
228 struct Atom {
229 /// Atom Type.
230 const uint16_t Type;
231 /// DWARF Form.
232 const uint16_t Form;
233
AtomAtom234 constexpr Atom(uint16_t Type, uint16_t Form) : Type(Type), Form(Form) {}
235
236 #ifndef NDEBUG
237 void print(raw_ostream &OS) const;
dumpAtom238 void dump() const { print(dbgs()); }
239 #endif
240 };
241 // Subclasses should define:
242 // static constexpr Atom Atoms[];
243
244 virtual void emit(AsmPrinter *Asm) const = 0;
245
hash(StringRef Buffer)246 static uint32_t hash(StringRef Buffer) { return djbHash(Buffer); }
247 };
248
249 /// The Data class implementation for DWARF v5 accelerator table. Unlike the
250 /// Apple Data classes, this class is just a DIE wrapper, and does not know to
251 /// serialize itself. The complete serialization logic is in the
252 /// emitDWARF5AccelTable function.
253 class DWARF5AccelTableData : public AccelTableData {
254 public:
255 struct AttributeEncoding {
256 dwarf::Index Index;
257 dwarf::Form Form;
258 };
259
hash(StringRef Name)260 static uint32_t hash(StringRef Name) { return caseFoldingDjbHash(Name); }
261
262 DWARF5AccelTableData(const DIE &Die, const uint32_t UnitID,
263 const bool IsTU = false);
264 DWARF5AccelTableData(const uint64_t DieOffset, const unsigned DieTag,
265 const unsigned UnitID, const bool IsTU = false)
OffsetVal(DieOffset)266 : OffsetVal(DieOffset), DieTag(DieTag), UnitID(UnitID), IsTU(IsTU) {}
267
268 #ifndef NDEBUG
269 void print(raw_ostream &OS) const override;
270 #endif
271
getDieOffset()272 uint64_t getDieOffset() const {
273 assert(isNormalized() && "Accessing DIE Offset before normalizing.");
274 return std::get<uint64_t>(OffsetVal);
275 }
getDieTag()276 unsigned getDieTag() const { return DieTag; }
getUnitID()277 unsigned getUnitID() const { return UnitID; }
isTU()278 bool isTU() const { return IsTU; }
normalizeDIEToOffset()279 void normalizeDIEToOffset() {
280 assert(!isNormalized() && "Accessing offset after normalizing.");
281 OffsetVal = std::get<const DIE *>(OffsetVal)->getOffset();
282 }
isNormalized()283 bool isNormalized() const {
284 return std::holds_alternative<uint64_t>(OffsetVal);
285 }
286
287 protected:
288 std::variant<const DIE *, uint64_t> OffsetVal;
289 uint32_t DieTag : 16;
290 uint32_t UnitID : 15;
291 uint32_t IsTU : 1;
292
order()293 uint64_t order() const override { return getDieOffset(); }
294 };
295
296 struct TypeUnitMetaInfo {
297 // Symbol for start of the TU section or signature if this is SplitDwarf.
298 std::variant<MCSymbol *, uint64_t> LabelOrSignature;
299 // Unique ID of Type Unit.
300 unsigned UniqueID;
301 };
302 using TUVectorTy = SmallVector<TypeUnitMetaInfo, 1>;
303 class DWARF5AccelTable : public AccelTable<DWARF5AccelTableData> {
304 // Symbols to start of all the TU sections that were generated.
305 TUVectorTy TUSymbolsOrHashes;
306
307 public:
308 struct UnitIndexAndEncoding {
309 unsigned Index;
310 DWARF5AccelTableData::AttributeEncoding Encoding;
311 };
312 /// Returns type units that were constructed.
getTypeUnitsSymbols()313 const TUVectorTy &getTypeUnitsSymbols() { return TUSymbolsOrHashes; }
314 /// Add a type unit start symbol.
315 void addTypeUnitSymbol(DwarfTypeUnit &U);
316 /// Add a type unit Signature.
317 void addTypeUnitSignature(DwarfTypeUnit &U);
318 /// Convert DIE entries to explicit offset.
319 /// Needs to be called after DIE offsets are computed.
convertDieToOffset()320 void convertDieToOffset() {
321 for (auto &Entry : Entries) {
322 for (AccelTableData *Value : Entry.second.Values) {
323 DWARF5AccelTableData *Data = static_cast<DWARF5AccelTableData *>(Value);
324 // For TU we normalize as each Unit is emitted.
325 // So when this is invoked after CU construction we will be in mixed
326 // state.
327 if (!Data->isNormalized())
328 Data->normalizeDIEToOffset();
329 }
330 }
331 }
332
addTypeEntries(DWARF5AccelTable & Table)333 void addTypeEntries(DWARF5AccelTable &Table) {
334 for (auto &Entry : Table.getEntries()) {
335 for (AccelTableData *Value : Entry.second.Values) {
336 DWARF5AccelTableData *Data = static_cast<DWARF5AccelTableData *>(Value);
337 addName(Entry.second.Name, Data->getDieOffset(), Data->getDieTag(),
338 Data->getUnitID(), true);
339 }
340 }
341 }
342 };
343
344 void emitAppleAccelTableImpl(AsmPrinter *Asm, AccelTableBase &Contents,
345 StringRef Prefix, const MCSymbol *SecBegin,
346 ArrayRef<AppleAccelTableData::Atom> Atoms);
347
348 /// Emit an Apple Accelerator Table consisting of entries in the specified
349 /// AccelTable. The DataT template parameter should be derived from
350 /// AppleAccelTableData.
351 template <typename DataT>
emitAppleAccelTable(AsmPrinter * Asm,AccelTable<DataT> & Contents,StringRef Prefix,const MCSymbol * SecBegin)352 void emitAppleAccelTable(AsmPrinter *Asm, AccelTable<DataT> &Contents,
353 StringRef Prefix, const MCSymbol *SecBegin) {
354 static_assert(std::is_convertible<DataT *, AppleAccelTableData *>::value);
355 emitAppleAccelTableImpl(Asm, Contents, Prefix, SecBegin, DataT::Atoms);
356 }
357
358 void emitDWARF5AccelTable(AsmPrinter *Asm, DWARF5AccelTable &Contents,
359 const DwarfDebug &DD,
360 ArrayRef<std::unique_ptr<DwarfCompileUnit>> CUs);
361
362 /// Emit a DWARFv5 Accelerator Table consisting of entries in the specified
363 /// AccelTable. The \p CUs contains either symbols keeping offsets to the
364 /// start of compilation unit, either offsets to the start of compilation
365 /// unit themselves.
366 void emitDWARF5AccelTable(
367 AsmPrinter *Asm, DWARF5AccelTable &Contents,
368 ArrayRef<std::variant<MCSymbol *, uint64_t>> CUs,
369 llvm::function_ref<std::optional<DWARF5AccelTable::UnitIndexAndEncoding>(
370 const DWARF5AccelTableData &)>
371 getIndexForEntry);
372
373 /// Accelerator table data implementation for simple Apple accelerator tables
374 /// with just a DIE reference.
375 class AppleAccelTableOffsetData : public AppleAccelTableData {
376 public:
AppleAccelTableOffsetData(const DIE & D)377 AppleAccelTableOffsetData(const DIE &D) : Die(D) {}
378
379 void emit(AsmPrinter *Asm) const override;
380
381 static constexpr Atom Atoms[] = {
382 Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)};
383
384 #ifndef NDEBUG
385 void print(raw_ostream &OS) const override;
386 #endif
387 protected:
order()388 uint64_t order() const override { return Die.getOffset(); }
389
390 const DIE &Die;
391 };
392
393 /// Accelerator table data implementation for Apple type accelerator tables.
394 class AppleAccelTableTypeData : public AppleAccelTableOffsetData {
395 public:
AppleAccelTableTypeData(const DIE & D)396 AppleAccelTableTypeData(const DIE &D) : AppleAccelTableOffsetData(D) {}
397
398 void emit(AsmPrinter *Asm) const override;
399
400 static constexpr Atom Atoms[] = {
401 Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4),
402 Atom(dwarf::DW_ATOM_die_tag, dwarf::DW_FORM_data2),
403 Atom(dwarf::DW_ATOM_type_flags, dwarf::DW_FORM_data1)};
404
405 #ifndef NDEBUG
406 void print(raw_ostream &OS) const override;
407 #endif
408 };
409
410 /// Accelerator table data implementation for simple Apple accelerator tables
411 /// with a DIE offset but no actual DIE pointer.
412 class AppleAccelTableStaticOffsetData : public AppleAccelTableData {
413 public:
AppleAccelTableStaticOffsetData(uint32_t Offset)414 AppleAccelTableStaticOffsetData(uint32_t Offset) : Offset(Offset) {}
415
416 void emit(AsmPrinter *Asm) const override;
417
418 static constexpr Atom Atoms[] = {
419 Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)};
420
421 #ifndef NDEBUG
422 void print(raw_ostream &OS) const override;
423 #endif
424 protected:
order()425 uint64_t order() const override { return Offset; }
426
427 uint32_t Offset;
428 };
429
430 /// Accelerator table data implementation for type accelerator tables with
431 /// a DIE offset but no actual DIE pointer.
432 class AppleAccelTableStaticTypeData : public AppleAccelTableStaticOffsetData {
433 public:
AppleAccelTableStaticTypeData(uint32_t Offset,uint16_t Tag,bool ObjCClassIsImplementation,uint32_t QualifiedNameHash)434 AppleAccelTableStaticTypeData(uint32_t Offset, uint16_t Tag,
435 bool ObjCClassIsImplementation,
436 uint32_t QualifiedNameHash)
437 : AppleAccelTableStaticOffsetData(Offset),
438 QualifiedNameHash(QualifiedNameHash), Tag(Tag),
439 ObjCClassIsImplementation(ObjCClassIsImplementation) {}
440
441 void emit(AsmPrinter *Asm) const override;
442
443 static constexpr Atom Atoms[] = {
444 Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4),
445 Atom(dwarf::DW_ATOM_die_tag, dwarf::DW_FORM_data2),
446 Atom(5, dwarf::DW_FORM_data1), Atom(6, dwarf::DW_FORM_data4)};
447
448 #ifndef NDEBUG
449 void print(raw_ostream &OS) const override;
450 #endif
451 protected:
order()452 uint64_t order() const override { return Offset; }
453
454 uint32_t QualifiedNameHash;
455 uint16_t Tag;
456 bool ObjCClassIsImplementation;
457 };
458
459 } // end namespace llvm
460
461 #endif // LLVM_CODEGEN_ACCELTABLE_H
462