1*a03ca8b9SKrzysztof Kosiński // Copyright 2017 The Chromium Authors. All rights reserved. 2*a03ca8b9SKrzysztof Kosiński // Use of this source code is governed by a BSD-style license that can be 3*a03ca8b9SKrzysztof Kosiński // found in the LICENSE file. 4*a03ca8b9SKrzysztof Kosiński 5*a03ca8b9SKrzysztof Kosiński #ifndef COMPONENTS_ZUCCHINI_ADDRESS_TRANSLATOR_H_ 6*a03ca8b9SKrzysztof Kosiński #define COMPONENTS_ZUCCHINI_ADDRESS_TRANSLATOR_H_ 7*a03ca8b9SKrzysztof Kosiński 8*a03ca8b9SKrzysztof Kosiński #include <stdint.h> 9*a03ca8b9SKrzysztof Kosiński 10*a03ca8b9SKrzysztof Kosiński #include <tuple> 11*a03ca8b9SKrzysztof Kosiński #include <vector> 12*a03ca8b9SKrzysztof Kosiński 13*a03ca8b9SKrzysztof Kosiński #include "components/zucchini/algorithm.h" 14*a03ca8b9SKrzysztof Kosiński #include "components/zucchini/image_utils.h" 15*a03ca8b9SKrzysztof Kosiński 16*a03ca8b9SKrzysztof Kosiński namespace zucchini { 17*a03ca8b9SKrzysztof Kosiński 18*a03ca8b9SKrzysztof Kosiński // There are several ways to reason about addresses in an image: 19*a03ca8b9SKrzysztof Kosiński // - Offset: Position relative to start of image. 20*a03ca8b9SKrzysztof Kosiński // - VA (Virtual Address): Virtual memory address of a loaded image. This is 21*a03ca8b9SKrzysztof Kosiński // subject to relocation by the OS. 22*a03ca8b9SKrzysztof Kosiński // - RVA (Relative Virtual Address): VA relative to some base address. This is 23*a03ca8b9SKrzysztof Kosiński // the preferred way to specify pointers in an image. 24*a03ca8b9SKrzysztof Kosiński // 25*a03ca8b9SKrzysztof Kosiński // Zucchini is primarily concerned with offsets and RVAs. Executable images like 26*a03ca8b9SKrzysztof Kosiński // PE and ELF are organized into sections. Each section specifies offset and RVA 27*a03ca8b9SKrzysztof Kosiński // ranges as: 28*a03ca8b9SKrzysztof Kosiński // {Offset start, offset size, RVA start, RVA size}. 29*a03ca8b9SKrzysztof Kosiński // This constitutes a basic unit to translate between offsets and RVAs. Note: 30*a03ca8b9SKrzysztof Kosiński // |offset size| < |RVA size| is possible. For example, the .bss section can can 31*a03ca8b9SKrzysztof Kosiński // have zero-filled statically-allocated data that have no corresponding bytes 32*a03ca8b9SKrzysztof Kosiński // on image (to save space). This poses a problem for Zucchini, which stores 33*a03ca8b9SKrzysztof Kosiński // addresses as offsets: now we'd have "dangling RVAs" that don't map to 34*a03ca8b9SKrzysztof Kosiński // offsets! Some ways to handling this are: 35*a03ca8b9SKrzysztof Kosiński // 1. Ignore all dangling RVAs. This simplifies the algorithm, but also means 36*a03ca8b9SKrzysztof Kosiński // some reference targets would escape detection and processing. 37*a03ca8b9SKrzysztof Kosiński // 2. Create distinct "fake offsets" to accommodate dangling RVAs. Image data 38*a03ca8b9SKrzysztof Kosiński // must not be read on these fake offsets, which are only valid as target 39*a03ca8b9SKrzysztof Kosiński // addresses for reference matching. 40*a03ca8b9SKrzysztof Kosiński // As for |RVA size| < |offset size|, the extra portion just gets ignored. 41*a03ca8b9SKrzysztof Kosiński // 42*a03ca8b9SKrzysztof Kosiński // Status: Zucchini implements (2) in a simple way: dangling RVAs are mapped to 43*a03ca8b9SKrzysztof Kosiński // fake offsets by adding a large value. This value can be chosen as an 44*a03ca8b9SKrzysztof Kosiński // exclusive upper bound of all offsets (i.e., image size). This allows them to 45*a03ca8b9SKrzysztof Kosiński // be easily detected and processed as a special-case. 46*a03ca8b9SKrzysztof Kosiński // TODO(huangs): Investigate option (1), now that the refactored code makes 47*a03ca8b9SKrzysztof Kosiński // experimentation easier. 48*a03ca8b9SKrzysztof Kosiński // TODO(huangs): Make AddressTranslator smarter: Allocate unused |offset_t| 49*a03ca8b9SKrzysztof Kosiński // ranges and create "fake" units to accommodate dangling RVAs. Then 50*a03ca8b9SKrzysztof Kosiński // AddressTranslator can be simplified. 51*a03ca8b9SKrzysztof Kosiński 52*a03ca8b9SKrzysztof Kosiński // Virtual Address relative to some base address (RVA). There's distinction 53*a03ca8b9SKrzysztof Kosiński // between "valid RVA" and "existent RVA": 54*a03ca8b9SKrzysztof Kosiński // - Valid RVA: An RVA that's reasonably small, i.e., below |kRvaBound|. 55*a03ca8b9SKrzysztof Kosiński // - Existent RVA: An RVA that has semantic meaning in an image, and may 56*a03ca8b9SKrzysztof Kosiński // translate to an offset in an image or (if a dangling RVA) a fake offset. 57*a03ca8b9SKrzysztof Kosiński // All existent RVAs are valid RVAs. 58*a03ca8b9SKrzysztof Kosiński using rva_t = uint32_t; 59*a03ca8b9SKrzysztof Kosiński // Divide by 2 to match |kOffsetBound|. 60*a03ca8b9SKrzysztof Kosiński constexpr rva_t kRvaBound = static_cast<rva_t>(-1) / 2; 61*a03ca8b9SKrzysztof Kosiński constexpr rva_t kInvalidRva = static_cast<rva_t>(-2); 62*a03ca8b9SKrzysztof Kosiński 63*a03ca8b9SKrzysztof Kosiński // A utility to translate between offsets and RVAs in an image. 64*a03ca8b9SKrzysztof Kosiński class AddressTranslator { 65*a03ca8b9SKrzysztof Kosiński public: 66*a03ca8b9SKrzysztof Kosiński // A basic unit for address translation, roughly maps to a section, but may 67*a03ca8b9SKrzysztof Kosiński // be processed (e.g., merged) as an optimization. 68*a03ca8b9SKrzysztof Kosiński struct Unit { offset_endUnit69*a03ca8b9SKrzysztof Kosiński offset_t offset_end() const { return offset_begin + offset_size; } rva_endUnit70*a03ca8b9SKrzysztof Kosiński rva_t rva_end() const { return rva_begin + rva_size; } IsEmptyUnit71*a03ca8b9SKrzysztof Kosiński bool IsEmpty() const { 72*a03ca8b9SKrzysztof Kosiński // |rva_size == 0| and |offset_size > 0| means Unit hasn't been trimmed 73*a03ca8b9SKrzysztof Kosiński // yet, and once it is then it's empty. 74*a03ca8b9SKrzysztof Kosiński // |rva_size > 0| and |offset_size == 0| means Unit has dangling RVA, but 75*a03ca8b9SKrzysztof Kosiński // is not empty. 76*a03ca8b9SKrzysztof Kosiński return rva_size == 0; 77*a03ca8b9SKrzysztof Kosiński } CoversOffsetUnit78*a03ca8b9SKrzysztof Kosiński bool CoversOffset(offset_t offset) const { 79*a03ca8b9SKrzysztof Kosiński return RangeCovers(offset_begin, offset_size, offset); 80*a03ca8b9SKrzysztof Kosiński } CoversRvaUnit81*a03ca8b9SKrzysztof Kosiński bool CoversRva(rva_t rva) const { 82*a03ca8b9SKrzysztof Kosiński return RangeCovers(rva_begin, rva_size, rva); 83*a03ca8b9SKrzysztof Kosiński } CoversDanglingRvaUnit84*a03ca8b9SKrzysztof Kosiński bool CoversDanglingRva(rva_t rva) const { 85*a03ca8b9SKrzysztof Kosiński return CoversRva(rva) && rva - rva_begin >= offset_size; 86*a03ca8b9SKrzysztof Kosiński } 87*a03ca8b9SKrzysztof Kosiński // Assumes valid |offset| (*cannot* be fake offset). OffsetToRvaUnsafeUnit88*a03ca8b9SKrzysztof Kosiński rva_t OffsetToRvaUnsafe(offset_t offset) const { 89*a03ca8b9SKrzysztof Kosiński return offset - offset_begin + rva_begin; 90*a03ca8b9SKrzysztof Kosiński } 91*a03ca8b9SKrzysztof Kosiński // Assumes valid |rva| (*can* be danging RVA). RvaToOffsetUnsafeUnit92*a03ca8b9SKrzysztof Kosiński offset_t RvaToOffsetUnsafe(rva_t rva, offset_t fake_offset_begin) const { 93*a03ca8b9SKrzysztof Kosiński rva_t delta = rva - rva_begin; 94*a03ca8b9SKrzysztof Kosiński return delta < offset_size ? delta + offset_begin 95*a03ca8b9SKrzysztof Kosiński : fake_offset_begin + rva; 96*a03ca8b9SKrzysztof Kosiński } HasDanglingRvaUnit97*a03ca8b9SKrzysztof Kosiński bool HasDanglingRva() const { return rva_size > offset_size; } 98*a03ca8b9SKrzysztof Kosiński friend bool operator==(const Unit& a, const Unit& b) { 99*a03ca8b9SKrzysztof Kosiński return std::tie(a.offset_begin, a.offset_size, a.rva_begin, a.rva_size) == 100*a03ca8b9SKrzysztof Kosiński std::tie(b.offset_begin, b.offset_size, b.rva_begin, b.rva_size); 101*a03ca8b9SKrzysztof Kosiński } 102*a03ca8b9SKrzysztof Kosiński 103*a03ca8b9SKrzysztof Kosiński offset_t offset_begin; 104*a03ca8b9SKrzysztof Kosiński offset_t offset_size; 105*a03ca8b9SKrzysztof Kosiński rva_t rva_begin; 106*a03ca8b9SKrzysztof Kosiński rva_t rva_size; 107*a03ca8b9SKrzysztof Kosiński }; 108*a03ca8b9SKrzysztof Kosiński 109*a03ca8b9SKrzysztof Kosiński // An adaptor for AddressTranslator::OffsetToRva() that caches the last Unit 110*a03ca8b9SKrzysztof Kosiński // found, to reduce the number of OffsetToUnit() calls for clustered queries. 111*a03ca8b9SKrzysztof Kosiński class OffsetToRvaCache { 112*a03ca8b9SKrzysztof Kosiński public: 113*a03ca8b9SKrzysztof Kosiński // Embeds |translator| for use. Now object lifetime is tied to |translator| 114*a03ca8b9SKrzysztof Kosiński // lifetime. 115*a03ca8b9SKrzysztof Kosiński explicit OffsetToRvaCache(const AddressTranslator& translator); 116*a03ca8b9SKrzysztof Kosiński OffsetToRvaCache(const OffsetToRvaCache&) = delete; 117*a03ca8b9SKrzysztof Kosiński const OffsetToRvaCache& operator=(const OffsetToRvaCache&) = delete; 118*a03ca8b9SKrzysztof Kosiński 119*a03ca8b9SKrzysztof Kosiński rva_t Convert(offset_t offset) const; 120*a03ca8b9SKrzysztof Kosiński 121*a03ca8b9SKrzysztof Kosiński private: 122*a03ca8b9SKrzysztof Kosiński const AddressTranslator& translator_; 123*a03ca8b9SKrzysztof Kosiński mutable const AddressTranslator::Unit* cached_unit_ = nullptr; 124*a03ca8b9SKrzysztof Kosiński }; 125*a03ca8b9SKrzysztof Kosiński 126*a03ca8b9SKrzysztof Kosiński // An adaptor for AddressTranslator::RvaToOffset() that caches the last Unit 127*a03ca8b9SKrzysztof Kosiński // found, to reduce the number of RvaToUnit() calls for clustered queries. 128*a03ca8b9SKrzysztof Kosiński class RvaToOffsetCache { 129*a03ca8b9SKrzysztof Kosiński public: 130*a03ca8b9SKrzysztof Kosiński // Embeds |translator| for use. Now object lifetime is tied to |translator| 131*a03ca8b9SKrzysztof Kosiński // lifetime. 132*a03ca8b9SKrzysztof Kosiński explicit RvaToOffsetCache(const AddressTranslator& translator); 133*a03ca8b9SKrzysztof Kosiński RvaToOffsetCache(const RvaToOffsetCache&) = delete; 134*a03ca8b9SKrzysztof Kosiński const RvaToOffsetCache& operator=(const RvaToOffsetCache&) = delete; 135*a03ca8b9SKrzysztof Kosiński 136*a03ca8b9SKrzysztof Kosiński bool IsValid(rva_t rva) const; 137*a03ca8b9SKrzysztof Kosiński 138*a03ca8b9SKrzysztof Kosiński offset_t Convert(rva_t rva) const; 139*a03ca8b9SKrzysztof Kosiński 140*a03ca8b9SKrzysztof Kosiński private: 141*a03ca8b9SKrzysztof Kosiński const AddressTranslator& translator_; 142*a03ca8b9SKrzysztof Kosiński mutable const AddressTranslator::Unit* cached_unit_ = nullptr; 143*a03ca8b9SKrzysztof Kosiński }; 144*a03ca8b9SKrzysztof Kosiński 145*a03ca8b9SKrzysztof Kosiński enum Status { 146*a03ca8b9SKrzysztof Kosiński kSuccess = 0, 147*a03ca8b9SKrzysztof Kosiński kErrorOverflow, 148*a03ca8b9SKrzysztof Kosiński kErrorBadOverlap, 149*a03ca8b9SKrzysztof Kosiński kErrorBadOverlapDanglingRva, 150*a03ca8b9SKrzysztof Kosiński kErrorFakeOffsetBeginTooLarge, 151*a03ca8b9SKrzysztof Kosiński }; 152*a03ca8b9SKrzysztof Kosiński 153*a03ca8b9SKrzysztof Kosiński AddressTranslator(); 154*a03ca8b9SKrzysztof Kosiński AddressTranslator(AddressTranslator&&); 155*a03ca8b9SKrzysztof Kosiński AddressTranslator(const AddressTranslator&) = delete; 156*a03ca8b9SKrzysztof Kosiński const AddressTranslator& operator=(const AddressTranslator&) = delete; 157*a03ca8b9SKrzysztof Kosiński ~AddressTranslator(); 158*a03ca8b9SKrzysztof Kosiński 159*a03ca8b9SKrzysztof Kosiński // Consumes |units| to populate data in this class. Performs consistency 160*a03ca8b9SKrzysztof Kosiński // checks and overlapping Units. Returns Status to indicate success. 161*a03ca8b9SKrzysztof Kosiński Status Initialize(std::vector<Unit>&& units); 162*a03ca8b9SKrzysztof Kosiński 163*a03ca8b9SKrzysztof Kosiński // Returns the (possibly dangling) RVA corresponding to |offset|, or 164*a03ca8b9SKrzysztof Kosiński // kInvalidRva if not found. 165*a03ca8b9SKrzysztof Kosiński rva_t OffsetToRva(offset_t offset) const; 166*a03ca8b9SKrzysztof Kosiński 167*a03ca8b9SKrzysztof Kosiński // Returns the (possibly fake) offset corresponding to |rva|, or 168*a03ca8b9SKrzysztof Kosiński // kInvalidOffset if not found (i.e., |rva| is non-existent). 169*a03ca8b9SKrzysztof Kosiński offset_t RvaToOffset(rva_t rva) const; 170*a03ca8b9SKrzysztof Kosiński 171*a03ca8b9SKrzysztof Kosiński // For testing. fake_offset_begin()172*a03ca8b9SKrzysztof Kosiński offset_t fake_offset_begin() const { return fake_offset_begin_; } 173*a03ca8b9SKrzysztof Kosiński units_sorted_by_offset()174*a03ca8b9SKrzysztof Kosiński const std::vector<Unit>& units_sorted_by_offset() const { 175*a03ca8b9SKrzysztof Kosiński return units_sorted_by_offset_; 176*a03ca8b9SKrzysztof Kosiński } 177*a03ca8b9SKrzysztof Kosiński units_sorted_by_rva()178*a03ca8b9SKrzysztof Kosiński const std::vector<Unit>& units_sorted_by_rva() const { 179*a03ca8b9SKrzysztof Kosiński return units_sorted_by_rva_; 180*a03ca8b9SKrzysztof Kosiński } 181*a03ca8b9SKrzysztof Kosiński 182*a03ca8b9SKrzysztof Kosiński private: 183*a03ca8b9SKrzysztof Kosiński // Helper to find the Unit that contains given |offset| or |rva|. Returns null 184*a03ca8b9SKrzysztof Kosiński // if not found. 185*a03ca8b9SKrzysztof Kosiński const Unit* OffsetToUnit(offset_t offset) const; 186*a03ca8b9SKrzysztof Kosiński const Unit* RvaToUnit(rva_t rva) const; 187*a03ca8b9SKrzysztof Kosiński 188*a03ca8b9SKrzysztof Kosiński // Storage of Units. All offset ranges are non-empty and disjoint. Likewise 189*a03ca8b9SKrzysztof Kosiński // for all RVA ranges. 190*a03ca8b9SKrzysztof Kosiński std::vector<Unit> units_sorted_by_offset_; 191*a03ca8b9SKrzysztof Kosiński std::vector<Unit> units_sorted_by_rva_; 192*a03ca8b9SKrzysztof Kosiński 193*a03ca8b9SKrzysztof Kosiński // Conversion factor to translate between dangling RVAs and fake offsets. 194*a03ca8b9SKrzysztof Kosiński offset_t fake_offset_begin_; 195*a03ca8b9SKrzysztof Kosiński }; 196*a03ca8b9SKrzysztof Kosiński 197*a03ca8b9SKrzysztof Kosiński } // namespace zucchini 198*a03ca8b9SKrzysztof Kosiński 199*a03ca8b9SKrzysztof Kosiński #endif // COMPONENTS_ZUCCHINI_ADDRESS_TRANSLATOR_H_ 200