xref: /aosp_15_r20/external/zucchini/image_utils.h (revision a03ca8b91e029cd15055c20c78c2e087c84792e4)
1*a03ca8b9SKrzysztof Kosiński // Copyright 2017 The Chromium Authors. All rights reserved.
2*a03ca8b9SKrzysztof Kosiński // Use of this source code is governed by a BSD-style license that can be
3*a03ca8b9SKrzysztof Kosiński // found in the LICENSE file.
4*a03ca8b9SKrzysztof Kosiński 
5*a03ca8b9SKrzysztof Kosiński #ifndef COMPONENTS_ZUCCHINI_IMAGE_UTILS_H_
6*a03ca8b9SKrzysztof Kosiński #define COMPONENTS_ZUCCHINI_IMAGE_UTILS_H_
7*a03ca8b9SKrzysztof Kosiński 
8*a03ca8b9SKrzysztof Kosiński #include <stddef.h>
9*a03ca8b9SKrzysztof Kosiński #include <stdint.h>
10*a03ca8b9SKrzysztof Kosiński 
11*a03ca8b9SKrzysztof Kosiński #include <optional>
12*a03ca8b9SKrzysztof Kosiński #include <string>
13*a03ca8b9SKrzysztof Kosiński 
14*a03ca8b9SKrzysztof Kosiński #include "base/format_macros.h"
15*a03ca8b9SKrzysztof Kosiński #include "base/numerics/safe_conversions.h"
16*a03ca8b9SKrzysztof Kosiński #include "base/strings/stringprintf.h"
17*a03ca8b9SKrzysztof Kosiński #include "components/zucchini/buffer_view.h"
18*a03ca8b9SKrzysztof Kosiński #include "components/zucchini/typed_value.h"
19*a03ca8b9SKrzysztof Kosiński 
20*a03ca8b9SKrzysztof Kosiński namespace zucchini {
21*a03ca8b9SKrzysztof Kosiński 
22*a03ca8b9SKrzysztof Kosiński // offset_t is used to describe an offset in an image.
23*a03ca8b9SKrzysztof Kosiński // Files bigger than 4GB are not supported.
24*a03ca8b9SKrzysztof Kosiński using offset_t = uint32_t;
25*a03ca8b9SKrzysztof Kosiński // Divide by 2 since label marking uses the most significant bit.
26*a03ca8b9SKrzysztof Kosiński constexpr offset_t kOffsetBound = static_cast<offset_t>(-1) / 2;
27*a03ca8b9SKrzysztof Kosiński // Use 0xFFFFFFF*E*, since 0xFFFFFFF*F* is a sentinel value for Dex references.
28*a03ca8b9SKrzysztof Kosiński constexpr offset_t kInvalidOffset = static_cast<offset_t>(-2);
29*a03ca8b9SKrzysztof Kosiński 
30*a03ca8b9SKrzysztof Kosiński // key_t is used to identify an offset in a table.
31*a03ca8b9SKrzysztof Kosiński using key_t = uint32_t;
32*a03ca8b9SKrzysztof Kosiński 
33*a03ca8b9SKrzysztof Kosiński enum Bitness : uint8_t {
34*a03ca8b9SKrzysztof Kosiński   // The numerical values are intended to simplify WidthOf() below.
35*a03ca8b9SKrzysztof Kosiński   kBit32 = 4,
36*a03ca8b9SKrzysztof Kosiński   kBit64 = 8
37*a03ca8b9SKrzysztof Kosiński };
38*a03ca8b9SKrzysztof Kosiński 
WidthOf(Bitness bitness)39*a03ca8b9SKrzysztof Kosiński inline uint32_t WidthOf(Bitness bitness) {
40*a03ca8b9SKrzysztof Kosiński   return static_cast<uint32_t>(bitness);
41*a03ca8b9SKrzysztof Kosiński }
42*a03ca8b9SKrzysztof Kosiński 
43*a03ca8b9SKrzysztof Kosiński // Used to uniquely identify a reference type.
44*a03ca8b9SKrzysztof Kosiński // Strongly typed objects are used to avoid ambiguitees with PoolTag.
45*a03ca8b9SKrzysztof Kosiński struct TypeTag : public TypedValue<TypeTag, uint8_t> {
46*a03ca8b9SKrzysztof Kosiński   // inheriting constructor:
47*a03ca8b9SKrzysztof Kosiński   using TypedValue<TypeTag, uint8_t>::TypedValue;
48*a03ca8b9SKrzysztof Kosiński };
49*a03ca8b9SKrzysztof Kosiński 
50*a03ca8b9SKrzysztof Kosiński // Used to uniquely identify a pool.
51*a03ca8b9SKrzysztof Kosiński struct PoolTag : public TypedValue<PoolTag, uint8_t> {
52*a03ca8b9SKrzysztof Kosiński   // inheriting constructor:
53*a03ca8b9SKrzysztof Kosiński   using TypedValue<PoolTag, uint8_t>::TypedValue;
54*a03ca8b9SKrzysztof Kosiński };
55*a03ca8b9SKrzysztof Kosiński 
56*a03ca8b9SKrzysztof Kosiński constexpr TypeTag kNoTypeTag(0xFF);  // Typically used to identify raw data.
57*a03ca8b9SKrzysztof Kosiński constexpr PoolTag kNoPoolTag(0xFF);
58*a03ca8b9SKrzysztof Kosiński 
59*a03ca8b9SKrzysztof Kosiński // Specification of references in an image file.
60*a03ca8b9SKrzysztof Kosiński struct ReferenceTypeTraits {
ReferenceTypeTraitsReferenceTypeTraits61*a03ca8b9SKrzysztof Kosiński   constexpr ReferenceTypeTraits(offset_t width_in,
62*a03ca8b9SKrzysztof Kosiński                                 TypeTag type_tag_in,
63*a03ca8b9SKrzysztof Kosiński                                 PoolTag pool_tag_in)
64*a03ca8b9SKrzysztof Kosiński       : width(width_in), type_tag(type_tag_in), pool_tag(pool_tag_in) {}
65*a03ca8b9SKrzysztof Kosiński 
66*a03ca8b9SKrzysztof Kosiński   // |width| specifies number of bytes covered by the reference's binary
67*a03ca8b9SKrzysztof Kosiński   // encoding.
68*a03ca8b9SKrzysztof Kosiński   const offset_t width;
69*a03ca8b9SKrzysztof Kosiński   // |type_tag| identifies the reference type being described.
70*a03ca8b9SKrzysztof Kosiński   const TypeTag type_tag;
71*a03ca8b9SKrzysztof Kosiński   // |pool_tag| identifies the pool this type belongs to.
72*a03ca8b9SKrzysztof Kosiński   const PoolTag pool_tag;
73*a03ca8b9SKrzysztof Kosiński };
74*a03ca8b9SKrzysztof Kosiński 
75*a03ca8b9SKrzysztof Kosiński // There is no need to store |type| because references of the same type are
76*a03ca8b9SKrzysztof Kosiński // always aggregated into the same container, and so during iteration we'd have
77*a03ca8b9SKrzysztof Kosiński // |type| already.
78*a03ca8b9SKrzysztof Kosiński struct Reference {
79*a03ca8b9SKrzysztof Kosiński   offset_t location;
80*a03ca8b9SKrzysztof Kosiński   offset_t target;
81*a03ca8b9SKrzysztof Kosiński };
82*a03ca8b9SKrzysztof Kosiński 
83*a03ca8b9SKrzysztof Kosiński inline bool operator==(const Reference& a, const Reference& b) {
84*a03ca8b9SKrzysztof Kosiński   return a.location == b.location && a.target == b.target;
85*a03ca8b9SKrzysztof Kosiński }
86*a03ca8b9SKrzysztof Kosiński 
87*a03ca8b9SKrzysztof Kosiński // Interface for extracting References through member function GetNext().
88*a03ca8b9SKrzysztof Kosiński // This is used by Disassemblers to extract references from an image file.
89*a03ca8b9SKrzysztof Kosiński // Typically, a Reader lazily extracts values and does not hold any storage.
90*a03ca8b9SKrzysztof Kosiński class ReferenceReader {
91*a03ca8b9SKrzysztof Kosiński  public:
92*a03ca8b9SKrzysztof Kosiński   virtual ~ReferenceReader() = default;
93*a03ca8b9SKrzysztof Kosiński 
94*a03ca8b9SKrzysztof Kosiński   // Returns the next available Reference, or nullopt_t if exhausted.
95*a03ca8b9SKrzysztof Kosiński   // Extracted References must be ordered by their location in the image.
96*a03ca8b9SKrzysztof Kosiński   virtual std::optional<Reference> GetNext() = 0;
97*a03ca8b9SKrzysztof Kosiński };
98*a03ca8b9SKrzysztof Kosiński 
99*a03ca8b9SKrzysztof Kosiński // Interface for writing References through member function
100*a03ca8b9SKrzysztof Kosiński // PutNext(reference). This is used by Disassemblers to write new References
101*a03ca8b9SKrzysztof Kosiński // in the image file.
102*a03ca8b9SKrzysztof Kosiński class ReferenceWriter {
103*a03ca8b9SKrzysztof Kosiński  public:
104*a03ca8b9SKrzysztof Kosiński   virtual ~ReferenceWriter() = default;
105*a03ca8b9SKrzysztof Kosiński 
106*a03ca8b9SKrzysztof Kosiński   // Writes |reference| in the underlying image file. This operation always
107*a03ca8b9SKrzysztof Kosiński   // succeeds.
108*a03ca8b9SKrzysztof Kosiński   virtual void PutNext(Reference reference) = 0;
109*a03ca8b9SKrzysztof Kosiński };
110*a03ca8b9SKrzysztof Kosiński 
111*a03ca8b9SKrzysztof Kosiński // An Equivalence is a block of length |length| that approximately match in
112*a03ca8b9SKrzysztof Kosiński // |old_image| at an offset of |src_offset| and in |new_image| at an offset of
113*a03ca8b9SKrzysztof Kosiński // |dst_offset|.
114*a03ca8b9SKrzysztof Kosiński struct Equivalence {
115*a03ca8b9SKrzysztof Kosiński   offset_t src_offset;
116*a03ca8b9SKrzysztof Kosiński   offset_t dst_offset;
117*a03ca8b9SKrzysztof Kosiński   offset_t length;
118*a03ca8b9SKrzysztof Kosiński 
src_endEquivalence119*a03ca8b9SKrzysztof Kosiński   offset_t src_end() const { return src_offset + length; }
dst_endEquivalence120*a03ca8b9SKrzysztof Kosiński   offset_t dst_end() const { return dst_offset + length; }
121*a03ca8b9SKrzysztof Kosiński };
122*a03ca8b9SKrzysztof Kosiński 
123*a03ca8b9SKrzysztof Kosiński inline bool operator==(const Equivalence& a, const Equivalence& b) {
124*a03ca8b9SKrzysztof Kosiński   return a.src_offset == b.src_offset && a.dst_offset == b.dst_offset &&
125*a03ca8b9SKrzysztof Kosiński          a.length == b.length;
126*a03ca8b9SKrzysztof Kosiński }
127*a03ca8b9SKrzysztof Kosiński 
128*a03ca8b9SKrzysztof Kosiński // Same as Equivalence, but with a similarity score. This is only used when
129*a03ca8b9SKrzysztof Kosiński // generating the patch.
130*a03ca8b9SKrzysztof Kosiński struct EquivalenceCandidate {
131*a03ca8b9SKrzysztof Kosiński   Equivalence eq;
132*a03ca8b9SKrzysztof Kosiński   double similarity;
133*a03ca8b9SKrzysztof Kosiński };
134*a03ca8b9SKrzysztof Kosiński 
135*a03ca8b9SKrzysztof Kosiński template <size_t N>
ExeTypeToUint32(const char (& exe_type)[N])136*a03ca8b9SKrzysztof Kosiński inline constexpr uint32_t ExeTypeToUint32(const char (&exe_type)[N]) {
137*a03ca8b9SKrzysztof Kosiński   static_assert(N == 5, "Expected ExeType of length 4 + 1 null byte.");
138*a03ca8b9SKrzysztof Kosiński   return (exe_type[3] << 24) | (exe_type[2] << 16) | (exe_type[1] << 8) |
139*a03ca8b9SKrzysztof Kosiński          exe_type[0];
140*a03ca8b9SKrzysztof Kosiński }
141*a03ca8b9SKrzysztof Kosiński 
142*a03ca8b9SKrzysztof Kosiński // Enumerations for supported executables. Values in this enum must be distinct.
143*a03ca8b9SKrzysztof Kosiński // Once present, values should never be altered or removed to ensure backwards
144*a03ca8b9SKrzysztof Kosiński // compatibility and patch type collision avoidance.
145*a03ca8b9SKrzysztof Kosiński enum ExecutableType : uint32_t {
146*a03ca8b9SKrzysztof Kosiński   kExeTypeUnknown = UINT32_MAX,
147*a03ca8b9SKrzysztof Kosiński   kExeTypeNoOp = ExeTypeToUint32("NoOp"),
148*a03ca8b9SKrzysztof Kosiński   kExeTypeWin32X86 = ExeTypeToUint32("Px86"),
149*a03ca8b9SKrzysztof Kosiński   kExeTypeWin32X64 = ExeTypeToUint32("Px64"),
150*a03ca8b9SKrzysztof Kosiński   kExeTypeElfX86 = ExeTypeToUint32("Ex86"),
151*a03ca8b9SKrzysztof Kosiński   kExeTypeElfX64 = ExeTypeToUint32("Ex64"),
152*a03ca8b9SKrzysztof Kosiński   kExeTypeElfAArch32 = ExeTypeToUint32("EA32"),
153*a03ca8b9SKrzysztof Kosiński   kExeTypeElfAArch64 = ExeTypeToUint32("EA64"),
154*a03ca8b9SKrzysztof Kosiński   kExeTypeDex = ExeTypeToUint32("DEX "),
155*a03ca8b9SKrzysztof Kosiński   kExeTypeZtf = ExeTypeToUint32("ZTF "),
156*a03ca8b9SKrzysztof Kosiński };
157*a03ca8b9SKrzysztof Kosiński 
CastToExecutableType(uint32_t possible_exe_type)158*a03ca8b9SKrzysztof Kosiński constexpr ExecutableType CastToExecutableType(uint32_t possible_exe_type) {
159*a03ca8b9SKrzysztof Kosiński   switch (static_cast<ExecutableType>(possible_exe_type)) {
160*a03ca8b9SKrzysztof Kosiński     case kExeTypeNoOp:        // Falls through.
161*a03ca8b9SKrzysztof Kosiński     case kExeTypeWin32X86:    // Falls through.
162*a03ca8b9SKrzysztof Kosiński     case kExeTypeWin32X64:    // Falls through.
163*a03ca8b9SKrzysztof Kosiński     case kExeTypeElfX86:      // Falls through.
164*a03ca8b9SKrzysztof Kosiński     case kExeTypeElfX64:      // Falls through.
165*a03ca8b9SKrzysztof Kosiński     case kExeTypeElfAArch32:  // Falls through.
166*a03ca8b9SKrzysztof Kosiński     case kExeTypeElfAArch64:  // Falls through.
167*a03ca8b9SKrzysztof Kosiński     case kExeTypeDex:         // Falls through.
168*a03ca8b9SKrzysztof Kosiński     case kExeTypeZtf:         // Falls through.
169*a03ca8b9SKrzysztof Kosiński     case kExeTypeUnknown:
170*a03ca8b9SKrzysztof Kosiński       return static_cast<ExecutableType>(possible_exe_type);
171*a03ca8b9SKrzysztof Kosiński     default:
172*a03ca8b9SKrzysztof Kosiński       return kExeTypeUnknown;
173*a03ca8b9SKrzysztof Kosiński   }
174*a03ca8b9SKrzysztof Kosiński }
175*a03ca8b9SKrzysztof Kosiński 
CastExecutableTypeToString(ExecutableType exe_type)176*a03ca8b9SKrzysztof Kosiński inline std::string CastExecutableTypeToString(ExecutableType exe_type) {
177*a03ca8b9SKrzysztof Kosiński   uint32_t v = static_cast<uint32_t>(exe_type);
178*a03ca8b9SKrzysztof Kosiński   char result[] = {static_cast<char>(v), static_cast<char>(v >> 8),
179*a03ca8b9SKrzysztof Kosiński                    static_cast<char>(v >> 16), static_cast<char>(v >> 24), 0};
180*a03ca8b9SKrzysztof Kosiński   return result;
181*a03ca8b9SKrzysztof Kosiński }
182*a03ca8b9SKrzysztof Kosiński 
183*a03ca8b9SKrzysztof Kosiński // A region in an image with associated executable type |exe_type|. If
184*a03ca8b9SKrzysztof Kosiński // |exe_type == kExeTypeNoOp|, then the Element represents a region of raw data.
185*a03ca8b9SKrzysztof Kosiński struct Element : public BufferRegion {
186*a03ca8b9SKrzysztof Kosiński   Element() = default;
ElementElement187*a03ca8b9SKrzysztof Kosiński   constexpr Element(const BufferRegion& region_in, ExecutableType exe_type_in)
188*a03ca8b9SKrzysztof Kosiński       : BufferRegion(region_in), exe_type(exe_type_in) {}
ElementElement189*a03ca8b9SKrzysztof Kosiński   constexpr explicit Element(const BufferRegion& region_in)
190*a03ca8b9SKrzysztof Kosiński       : BufferRegion(region_in), exe_type(kExeTypeNoOp) {}
191*a03ca8b9SKrzysztof Kosiński 
192*a03ca8b9SKrzysztof Kosiński   // Similar to lo() and hi(), but returns values in offset_t.
BeginOffsetElement193*a03ca8b9SKrzysztof Kosiński   offset_t BeginOffset() const { return base::checked_cast<offset_t>(lo()); }
EndOffsetElement194*a03ca8b9SKrzysztof Kosiński   offset_t EndOffset() const { return base::checked_cast<offset_t>(hi()); }
195*a03ca8b9SKrzysztof Kosiński 
regionElement196*a03ca8b9SKrzysztof Kosiński   BufferRegion region() const { return {offset, size}; }
197*a03ca8b9SKrzysztof Kosiński 
198*a03ca8b9SKrzysztof Kosiński   friend bool operator==(const Element& a, const Element& b) {
199*a03ca8b9SKrzysztof Kosiński     return a.exe_type == b.exe_type && a.offset == b.offset && a.size == b.size;
200*a03ca8b9SKrzysztof Kosiński   }
201*a03ca8b9SKrzysztof Kosiński 
202*a03ca8b9SKrzysztof Kosiński   ExecutableType exe_type;
203*a03ca8b9SKrzysztof Kosiński };
204*a03ca8b9SKrzysztof Kosiński 
205*a03ca8b9SKrzysztof Kosiński // A matched pair of Elements.
206*a03ca8b9SKrzysztof Kosiński struct ElementMatch {
IsValidElementMatch207*a03ca8b9SKrzysztof Kosiński   bool IsValid() const { return old_element.exe_type == new_element.exe_type; }
exe_typeElementMatch208*a03ca8b9SKrzysztof Kosiński   ExecutableType exe_type() const { return old_element.exe_type; }
209*a03ca8b9SKrzysztof Kosiński 
210*a03ca8b9SKrzysztof Kosiński   // Represents match as "#+#=#+#", where "#" denotes the integers:
211*a03ca8b9SKrzysztof Kosiński   //   [offset in "old", size in "old", offset in "new", size in "new"].
212*a03ca8b9SKrzysztof Kosiński   // Note that element type is omitted.
ToStringElementMatch213*a03ca8b9SKrzysztof Kosiński   std::string ToString() const {
214*a03ca8b9SKrzysztof Kosiński     return base::StringPrintf("%" PRIuS "+%" PRIuS "=%" PRIuS "+%" PRIuS "",
215*a03ca8b9SKrzysztof Kosiński                               old_element.offset, old_element.size,
216*a03ca8b9SKrzysztof Kosiński                               new_element.offset, new_element.size);
217*a03ca8b9SKrzysztof Kosiński   }
218*a03ca8b9SKrzysztof Kosiński 
219*a03ca8b9SKrzysztof Kosiński   Element old_element;
220*a03ca8b9SKrzysztof Kosiński   Element new_element;
221*a03ca8b9SKrzysztof Kosiński };
222*a03ca8b9SKrzysztof Kosiński 
223*a03ca8b9SKrzysztof Kosiński }  // namespace zucchini
224*a03ca8b9SKrzysztof Kosiński 
225*a03ca8b9SKrzysztof Kosiński #endif  // COMPONENTS_ZUCCHINI_IMAGE_UTILS_H_
226