xref: /aosp_15_r20/external/zucchini/address_translator.h (revision a03ca8b91e029cd15055c20c78c2e087c84792e4)
1*a03ca8b9SKrzysztof Kosiński // Copyright 2017 The Chromium Authors. All rights reserved.
2*a03ca8b9SKrzysztof Kosiński // Use of this source code is governed by a BSD-style license that can be
3*a03ca8b9SKrzysztof Kosiński // found in the LICENSE file.
4*a03ca8b9SKrzysztof Kosiński 
5*a03ca8b9SKrzysztof Kosiński #ifndef COMPONENTS_ZUCCHINI_ADDRESS_TRANSLATOR_H_
6*a03ca8b9SKrzysztof Kosiński #define COMPONENTS_ZUCCHINI_ADDRESS_TRANSLATOR_H_
7*a03ca8b9SKrzysztof Kosiński 
8*a03ca8b9SKrzysztof Kosiński #include <stdint.h>
9*a03ca8b9SKrzysztof Kosiński 
10*a03ca8b9SKrzysztof Kosiński #include <tuple>
11*a03ca8b9SKrzysztof Kosiński #include <vector>
12*a03ca8b9SKrzysztof Kosiński 
13*a03ca8b9SKrzysztof Kosiński #include "components/zucchini/algorithm.h"
14*a03ca8b9SKrzysztof Kosiński #include "components/zucchini/image_utils.h"
15*a03ca8b9SKrzysztof Kosiński 
16*a03ca8b9SKrzysztof Kosiński namespace zucchini {
17*a03ca8b9SKrzysztof Kosiński 
18*a03ca8b9SKrzysztof Kosiński // There are several ways to reason about addresses in an image:
19*a03ca8b9SKrzysztof Kosiński // - Offset: Position relative to start of image.
20*a03ca8b9SKrzysztof Kosiński // - VA (Virtual Address): Virtual memory address of a loaded image. This is
21*a03ca8b9SKrzysztof Kosiński //   subject to relocation by the OS.
22*a03ca8b9SKrzysztof Kosiński // - RVA (Relative Virtual Address): VA relative to some base address. This is
23*a03ca8b9SKrzysztof Kosiński //   the preferred way to specify pointers in an image.
24*a03ca8b9SKrzysztof Kosiński //
25*a03ca8b9SKrzysztof Kosiński // Zucchini is primarily concerned with offsets and RVAs. Executable images like
26*a03ca8b9SKrzysztof Kosiński // PE and ELF are organized into sections. Each section specifies offset and RVA
27*a03ca8b9SKrzysztof Kosiński // ranges as:
28*a03ca8b9SKrzysztof Kosiński //   {Offset start, offset size, RVA start, RVA size}.
29*a03ca8b9SKrzysztof Kosiński // This constitutes a basic unit to translate between offsets and RVAs. Note:
30*a03ca8b9SKrzysztof Kosiński // |offset size| < |RVA size| is possible. For example, the .bss section can can
31*a03ca8b9SKrzysztof Kosiński // have zero-filled statically-allocated data that have no corresponding bytes
32*a03ca8b9SKrzysztof Kosiński // on image (to save space). This poses a problem for Zucchini, which stores
33*a03ca8b9SKrzysztof Kosiński // addresses as offsets: now we'd have "dangling RVAs" that don't map to
34*a03ca8b9SKrzysztof Kosiński // offsets! Some ways to handling this are:
35*a03ca8b9SKrzysztof Kosiński // 1. Ignore all dangling RVAs. This simplifies the algorithm, but also means
36*a03ca8b9SKrzysztof Kosiński //    some reference targets would escape detection and processing.
37*a03ca8b9SKrzysztof Kosiński // 2. Create distinct "fake offsets" to accommodate dangling RVAs. Image data
38*a03ca8b9SKrzysztof Kosiński //    must not be read on these fake offsets, which are only valid as target
39*a03ca8b9SKrzysztof Kosiński //    addresses for reference matching.
40*a03ca8b9SKrzysztof Kosiński // As for |RVA size| < |offset size|, the extra portion just gets ignored.
41*a03ca8b9SKrzysztof Kosiński //
42*a03ca8b9SKrzysztof Kosiński // Status: Zucchini implements (2) in a simple way: dangling RVAs are mapped to
43*a03ca8b9SKrzysztof Kosiński // fake offsets by adding a large value. This value can be chosen as an
44*a03ca8b9SKrzysztof Kosiński // exclusive upper bound of all offsets (i.e., image size). This allows them to
45*a03ca8b9SKrzysztof Kosiński // be easily detected and processed as a special-case.
46*a03ca8b9SKrzysztof Kosiński // TODO(huangs): Investigate option (1), now that the refactored code makes
47*a03ca8b9SKrzysztof Kosiński // experimentation easier.
48*a03ca8b9SKrzysztof Kosiński // TODO(huangs): Make AddressTranslator smarter: Allocate unused |offset_t|
49*a03ca8b9SKrzysztof Kosiński // ranges and create "fake" units to accommodate dangling RVAs. Then
50*a03ca8b9SKrzysztof Kosiński // AddressTranslator can be simplified.
51*a03ca8b9SKrzysztof Kosiński 
52*a03ca8b9SKrzysztof Kosiński // Virtual Address relative to some base address (RVA). There's distinction
53*a03ca8b9SKrzysztof Kosiński // between "valid RVA" and "existent RVA":
54*a03ca8b9SKrzysztof Kosiński // - Valid RVA: An RVA that's reasonably small, i.e., below |kRvaBound|.
55*a03ca8b9SKrzysztof Kosiński // - Existent RVA: An RVA that has semantic meaning in an image, and may
56*a03ca8b9SKrzysztof Kosiński //   translate to an offset in an image or (if a dangling RVA) a fake offset.
57*a03ca8b9SKrzysztof Kosiński //   All existent RVAs are valid RVAs.
58*a03ca8b9SKrzysztof Kosiński using rva_t = uint32_t;
59*a03ca8b9SKrzysztof Kosiński // Divide by 2 to match |kOffsetBound|.
60*a03ca8b9SKrzysztof Kosiński constexpr rva_t kRvaBound = static_cast<rva_t>(-1) / 2;
61*a03ca8b9SKrzysztof Kosiński constexpr rva_t kInvalidRva = static_cast<rva_t>(-2);
62*a03ca8b9SKrzysztof Kosiński 
63*a03ca8b9SKrzysztof Kosiński // A utility to translate between offsets and RVAs in an image.
64*a03ca8b9SKrzysztof Kosiński class AddressTranslator {
65*a03ca8b9SKrzysztof Kosiński  public:
66*a03ca8b9SKrzysztof Kosiński   // A basic unit for address translation, roughly maps to a section, but may
67*a03ca8b9SKrzysztof Kosiński   // be processed (e.g., merged) as an optimization.
68*a03ca8b9SKrzysztof Kosiński   struct Unit {
offset_endUnit69*a03ca8b9SKrzysztof Kosiński     offset_t offset_end() const { return offset_begin + offset_size; }
rva_endUnit70*a03ca8b9SKrzysztof Kosiński     rva_t rva_end() const { return rva_begin + rva_size; }
IsEmptyUnit71*a03ca8b9SKrzysztof Kosiński     bool IsEmpty() const {
72*a03ca8b9SKrzysztof Kosiński       // |rva_size == 0| and |offset_size > 0| means Unit hasn't been trimmed
73*a03ca8b9SKrzysztof Kosiński       // yet, and once it is then it's empty.
74*a03ca8b9SKrzysztof Kosiński       // |rva_size > 0| and |offset_size == 0| means Unit has dangling RVA, but
75*a03ca8b9SKrzysztof Kosiński       // is not empty.
76*a03ca8b9SKrzysztof Kosiński       return rva_size == 0;
77*a03ca8b9SKrzysztof Kosiński     }
CoversOffsetUnit78*a03ca8b9SKrzysztof Kosiński     bool CoversOffset(offset_t offset) const {
79*a03ca8b9SKrzysztof Kosiński       return RangeCovers(offset_begin, offset_size, offset);
80*a03ca8b9SKrzysztof Kosiński     }
CoversRvaUnit81*a03ca8b9SKrzysztof Kosiński     bool CoversRva(rva_t rva) const {
82*a03ca8b9SKrzysztof Kosiński       return RangeCovers(rva_begin, rva_size, rva);
83*a03ca8b9SKrzysztof Kosiński     }
CoversDanglingRvaUnit84*a03ca8b9SKrzysztof Kosiński     bool CoversDanglingRva(rva_t rva) const {
85*a03ca8b9SKrzysztof Kosiński       return CoversRva(rva) && rva - rva_begin >= offset_size;
86*a03ca8b9SKrzysztof Kosiński     }
87*a03ca8b9SKrzysztof Kosiński     // Assumes valid |offset| (*cannot* be fake offset).
OffsetToRvaUnsafeUnit88*a03ca8b9SKrzysztof Kosiński     rva_t OffsetToRvaUnsafe(offset_t offset) const {
89*a03ca8b9SKrzysztof Kosiński       return offset - offset_begin + rva_begin;
90*a03ca8b9SKrzysztof Kosiński     }
91*a03ca8b9SKrzysztof Kosiński     // Assumes valid |rva| (*can* be danging RVA).
RvaToOffsetUnsafeUnit92*a03ca8b9SKrzysztof Kosiński     offset_t RvaToOffsetUnsafe(rva_t rva, offset_t fake_offset_begin) const {
93*a03ca8b9SKrzysztof Kosiński       rva_t delta = rva - rva_begin;
94*a03ca8b9SKrzysztof Kosiński       return delta < offset_size ? delta + offset_begin
95*a03ca8b9SKrzysztof Kosiński                                  : fake_offset_begin + rva;
96*a03ca8b9SKrzysztof Kosiński     }
HasDanglingRvaUnit97*a03ca8b9SKrzysztof Kosiński     bool HasDanglingRva() const { return rva_size > offset_size; }
98*a03ca8b9SKrzysztof Kosiński     friend bool operator==(const Unit& a, const Unit& b) {
99*a03ca8b9SKrzysztof Kosiński       return std::tie(a.offset_begin, a.offset_size, a.rva_begin, a.rva_size) ==
100*a03ca8b9SKrzysztof Kosiński              std::tie(b.offset_begin, b.offset_size, b.rva_begin, b.rva_size);
101*a03ca8b9SKrzysztof Kosiński     }
102*a03ca8b9SKrzysztof Kosiński 
103*a03ca8b9SKrzysztof Kosiński     offset_t offset_begin;
104*a03ca8b9SKrzysztof Kosiński     offset_t offset_size;
105*a03ca8b9SKrzysztof Kosiński     rva_t rva_begin;
106*a03ca8b9SKrzysztof Kosiński     rva_t rva_size;
107*a03ca8b9SKrzysztof Kosiński   };
108*a03ca8b9SKrzysztof Kosiński 
109*a03ca8b9SKrzysztof Kosiński   // An adaptor for AddressTranslator::OffsetToRva() that caches the last Unit
110*a03ca8b9SKrzysztof Kosiński   // found, to reduce the number of OffsetToUnit() calls for clustered queries.
111*a03ca8b9SKrzysztof Kosiński   class OffsetToRvaCache {
112*a03ca8b9SKrzysztof Kosiński    public:
113*a03ca8b9SKrzysztof Kosiński     // Embeds |translator| for use. Now object lifetime is tied to |translator|
114*a03ca8b9SKrzysztof Kosiński     // lifetime.
115*a03ca8b9SKrzysztof Kosiński     explicit OffsetToRvaCache(const AddressTranslator& translator);
116*a03ca8b9SKrzysztof Kosiński     OffsetToRvaCache(const OffsetToRvaCache&) = delete;
117*a03ca8b9SKrzysztof Kosiński     const OffsetToRvaCache& operator=(const OffsetToRvaCache&) = delete;
118*a03ca8b9SKrzysztof Kosiński 
119*a03ca8b9SKrzysztof Kosiński     rva_t Convert(offset_t offset) const;
120*a03ca8b9SKrzysztof Kosiński 
121*a03ca8b9SKrzysztof Kosiński    private:
122*a03ca8b9SKrzysztof Kosiński     const AddressTranslator& translator_;
123*a03ca8b9SKrzysztof Kosiński     mutable const AddressTranslator::Unit* cached_unit_ = nullptr;
124*a03ca8b9SKrzysztof Kosiński   };
125*a03ca8b9SKrzysztof Kosiński 
126*a03ca8b9SKrzysztof Kosiński   // An adaptor for AddressTranslator::RvaToOffset() that caches the last Unit
127*a03ca8b9SKrzysztof Kosiński   // found, to reduce the number of RvaToUnit() calls for clustered queries.
128*a03ca8b9SKrzysztof Kosiński   class RvaToOffsetCache {
129*a03ca8b9SKrzysztof Kosiński    public:
130*a03ca8b9SKrzysztof Kosiński     // Embeds |translator| for use. Now object lifetime is tied to |translator|
131*a03ca8b9SKrzysztof Kosiński     // lifetime.
132*a03ca8b9SKrzysztof Kosiński     explicit RvaToOffsetCache(const AddressTranslator& translator);
133*a03ca8b9SKrzysztof Kosiński     RvaToOffsetCache(const RvaToOffsetCache&) = delete;
134*a03ca8b9SKrzysztof Kosiński     const RvaToOffsetCache& operator=(const RvaToOffsetCache&) = delete;
135*a03ca8b9SKrzysztof Kosiński 
136*a03ca8b9SKrzysztof Kosiński     bool IsValid(rva_t rva) const;
137*a03ca8b9SKrzysztof Kosiński 
138*a03ca8b9SKrzysztof Kosiński     offset_t Convert(rva_t rva) const;
139*a03ca8b9SKrzysztof Kosiński 
140*a03ca8b9SKrzysztof Kosiński    private:
141*a03ca8b9SKrzysztof Kosiński     const AddressTranslator& translator_;
142*a03ca8b9SKrzysztof Kosiński     mutable const AddressTranslator::Unit* cached_unit_ = nullptr;
143*a03ca8b9SKrzysztof Kosiński   };
144*a03ca8b9SKrzysztof Kosiński 
145*a03ca8b9SKrzysztof Kosiński   enum Status {
146*a03ca8b9SKrzysztof Kosiński     kSuccess = 0,
147*a03ca8b9SKrzysztof Kosiński     kErrorOverflow,
148*a03ca8b9SKrzysztof Kosiński     kErrorBadOverlap,
149*a03ca8b9SKrzysztof Kosiński     kErrorBadOverlapDanglingRva,
150*a03ca8b9SKrzysztof Kosiński     kErrorFakeOffsetBeginTooLarge,
151*a03ca8b9SKrzysztof Kosiński   };
152*a03ca8b9SKrzysztof Kosiński 
153*a03ca8b9SKrzysztof Kosiński   AddressTranslator();
154*a03ca8b9SKrzysztof Kosiński   AddressTranslator(AddressTranslator&&);
155*a03ca8b9SKrzysztof Kosiński   AddressTranslator(const AddressTranslator&) = delete;
156*a03ca8b9SKrzysztof Kosiński   const AddressTranslator& operator=(const AddressTranslator&) = delete;
157*a03ca8b9SKrzysztof Kosiński   ~AddressTranslator();
158*a03ca8b9SKrzysztof Kosiński 
159*a03ca8b9SKrzysztof Kosiński   // Consumes |units| to populate data in this class. Performs consistency
160*a03ca8b9SKrzysztof Kosiński   // checks and overlapping Units. Returns Status to indicate success.
161*a03ca8b9SKrzysztof Kosiński   Status Initialize(std::vector<Unit>&& units);
162*a03ca8b9SKrzysztof Kosiński 
163*a03ca8b9SKrzysztof Kosiński   // Returns the (possibly dangling) RVA corresponding to |offset|, or
164*a03ca8b9SKrzysztof Kosiński   // kInvalidRva if not found.
165*a03ca8b9SKrzysztof Kosiński   rva_t OffsetToRva(offset_t offset) const;
166*a03ca8b9SKrzysztof Kosiński 
167*a03ca8b9SKrzysztof Kosiński   // Returns the (possibly fake) offset corresponding to |rva|, or
168*a03ca8b9SKrzysztof Kosiński   // kInvalidOffset if not found (i.e., |rva| is non-existent).
169*a03ca8b9SKrzysztof Kosiński   offset_t RvaToOffset(rva_t rva) const;
170*a03ca8b9SKrzysztof Kosiński 
171*a03ca8b9SKrzysztof Kosiński   // For testing.
fake_offset_begin()172*a03ca8b9SKrzysztof Kosiński   offset_t fake_offset_begin() const { return fake_offset_begin_; }
173*a03ca8b9SKrzysztof Kosiński 
units_sorted_by_offset()174*a03ca8b9SKrzysztof Kosiński   const std::vector<Unit>& units_sorted_by_offset() const {
175*a03ca8b9SKrzysztof Kosiński     return units_sorted_by_offset_;
176*a03ca8b9SKrzysztof Kosiński   }
177*a03ca8b9SKrzysztof Kosiński 
units_sorted_by_rva()178*a03ca8b9SKrzysztof Kosiński   const std::vector<Unit>& units_sorted_by_rva() const {
179*a03ca8b9SKrzysztof Kosiński     return units_sorted_by_rva_;
180*a03ca8b9SKrzysztof Kosiński   }
181*a03ca8b9SKrzysztof Kosiński 
182*a03ca8b9SKrzysztof Kosiński  private:
183*a03ca8b9SKrzysztof Kosiński   // Helper to find the Unit that contains given |offset| or |rva|. Returns null
184*a03ca8b9SKrzysztof Kosiński   // if not found.
185*a03ca8b9SKrzysztof Kosiński   const Unit* OffsetToUnit(offset_t offset) const;
186*a03ca8b9SKrzysztof Kosiński   const Unit* RvaToUnit(rva_t rva) const;
187*a03ca8b9SKrzysztof Kosiński 
188*a03ca8b9SKrzysztof Kosiński   // Storage of Units. All offset ranges are non-empty and disjoint. Likewise
189*a03ca8b9SKrzysztof Kosiński   // for all RVA ranges.
190*a03ca8b9SKrzysztof Kosiński   std::vector<Unit> units_sorted_by_offset_;
191*a03ca8b9SKrzysztof Kosiński   std::vector<Unit> units_sorted_by_rva_;
192*a03ca8b9SKrzysztof Kosiński 
193*a03ca8b9SKrzysztof Kosiński   // Conversion factor to translate between dangling RVAs and fake offsets.
194*a03ca8b9SKrzysztof Kosiński   offset_t fake_offset_begin_;
195*a03ca8b9SKrzysztof Kosiński };
196*a03ca8b9SKrzysztof Kosiński 
197*a03ca8b9SKrzysztof Kosiński }  // namespace zucchini
198*a03ca8b9SKrzysztof Kosiński 
199*a03ca8b9SKrzysztof Kosiński #endif  // COMPONENTS_ZUCCHINI_ADDRESS_TRANSLATOR_H_
200