1 // Copyright 2017 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef COMPONENTS_ZUCCHINI_IMAGE_UTILS_H_
6 #define COMPONENTS_ZUCCHINI_IMAGE_UTILS_H_
7 
8 #include <stddef.h>
9 #include <stdint.h>
10 
11 #include <optional>
12 #include <string>
13 
14 #include "base/format_macros.h"
15 #include "base/numerics/safe_conversions.h"
16 #include "base/strings/stringprintf.h"
17 #include "components/zucchini/buffer_view.h"
18 #include "components/zucchini/typed_value.h"
19 
20 namespace zucchini {
21 
22 // offset_t is used to describe an offset in an image.
23 // Files bigger than 4GB are not supported.
24 using offset_t = uint32_t;
25 // Divide by 2 since label marking uses the most significant bit.
26 constexpr offset_t kOffsetBound = static_cast<offset_t>(-1) / 2;
27 // Use 0xFFFFFFF*E*, since 0xFFFFFFF*F* is a sentinel value for Dex references.
28 constexpr offset_t kInvalidOffset = static_cast<offset_t>(-2);
29 
30 // key_t is used to identify an offset in a table.
31 using key_t = uint32_t;
32 
33 enum Bitness : uint8_t {
34   // The numerical values are intended to simplify WidthOf() below.
35   kBit32 = 4,
36   kBit64 = 8
37 };
38 
WidthOf(Bitness bitness)39 inline uint32_t WidthOf(Bitness bitness) {
40   return static_cast<uint32_t>(bitness);
41 }
42 
43 // Used to uniquely identify a reference type.
44 // Strongly typed objects are used to avoid ambiguitees with PoolTag.
45 struct TypeTag : public TypedValue<TypeTag, uint8_t> {
46   // inheriting constructor:
47   using TypedValue<TypeTag, uint8_t>::TypedValue;
48 };
49 
50 // Used to uniquely identify a pool.
51 struct PoolTag : public TypedValue<PoolTag, uint8_t> {
52   // inheriting constructor:
53   using TypedValue<PoolTag, uint8_t>::TypedValue;
54 };
55 
56 constexpr TypeTag kNoTypeTag(0xFF);  // Typically used to identify raw data.
57 constexpr PoolTag kNoPoolTag(0xFF);
58 
59 // Specification of references in an image file.
60 struct ReferenceTypeTraits {
ReferenceTypeTraitsReferenceTypeTraits61   constexpr ReferenceTypeTraits(offset_t width_in,
62                                 TypeTag type_tag_in,
63                                 PoolTag pool_tag_in)
64       : width(width_in), type_tag(type_tag_in), pool_tag(pool_tag_in) {}
65 
66   // |width| specifies number of bytes covered by the reference's binary
67   // encoding.
68   const offset_t width;
69   // |type_tag| identifies the reference type being described.
70   const TypeTag type_tag;
71   // |pool_tag| identifies the pool this type belongs to.
72   const PoolTag pool_tag;
73 };
74 
75 // There is no need to store |type| because references of the same type are
76 // always aggregated into the same container, and so during iteration we'd have
77 // |type| already.
78 struct Reference {
79   offset_t location;
80   offset_t target;
81 };
82 
83 inline bool operator==(const Reference& a, const Reference& b) {
84   return a.location == b.location && a.target == b.target;
85 }
86 
87 // Interface for extracting References through member function GetNext().
88 // This is used by Disassemblers to extract references from an image file.
89 // Typically, a Reader lazily extracts values and does not hold any storage.
90 class ReferenceReader {
91  public:
92   virtual ~ReferenceReader() = default;
93 
94   // Returns the next available Reference, or nullopt_t if exhausted.
95   // Extracted References must be ordered by their location in the image.
96   virtual std::optional<Reference> GetNext() = 0;
97 };
98 
99 // Interface for writing References through member function
100 // PutNext(reference). This is used by Disassemblers to write new References
101 // in the image file.
102 class ReferenceWriter {
103  public:
104   virtual ~ReferenceWriter() = default;
105 
106   // Writes |reference| in the underlying image file. This operation always
107   // succeeds.
108   virtual void PutNext(Reference reference) = 0;
109 };
110 
111 // An Equivalence is a block of length |length| that approximately match in
112 // |old_image| at an offset of |src_offset| and in |new_image| at an offset of
113 // |dst_offset|.
114 struct Equivalence {
115   offset_t src_offset;
116   offset_t dst_offset;
117   offset_t length;
118 
src_endEquivalence119   offset_t src_end() const { return src_offset + length; }
dst_endEquivalence120   offset_t dst_end() const { return dst_offset + length; }
121 };
122 
123 inline bool operator==(const Equivalence& a, const Equivalence& b) {
124   return a.src_offset == b.src_offset && a.dst_offset == b.dst_offset &&
125          a.length == b.length;
126 }
127 
128 // Same as Equivalence, but with a similarity score. This is only used when
129 // generating the patch.
130 struct EquivalenceCandidate {
131   Equivalence eq;
132   double similarity;
133 };
134 
135 template <size_t N>
ExeTypeToUint32(const char (& exe_type)[N])136 inline constexpr uint32_t ExeTypeToUint32(const char (&exe_type)[N]) {
137   static_assert(N == 5, "Expected ExeType of length 4 + 1 null byte.");
138   return (exe_type[3] << 24) | (exe_type[2] << 16) | (exe_type[1] << 8) |
139          exe_type[0];
140 }
141 
142 // Enumerations for supported executables. Values in this enum must be distinct.
143 // Once present, values should never be altered or removed to ensure backwards
144 // compatibility and patch type collision avoidance.
145 enum ExecutableType : uint32_t {
146   kExeTypeUnknown = UINT32_MAX,
147   kExeTypeNoOp = ExeTypeToUint32("NoOp"),
148   kExeTypeWin32X86 = ExeTypeToUint32("Px86"),
149   kExeTypeWin32X64 = ExeTypeToUint32("Px64"),
150   kExeTypeElfX86 = ExeTypeToUint32("Ex86"),
151   kExeTypeElfX64 = ExeTypeToUint32("Ex64"),
152   kExeTypeElfAArch32 = ExeTypeToUint32("EA32"),
153   kExeTypeElfAArch64 = ExeTypeToUint32("EA64"),
154   kExeTypeDex = ExeTypeToUint32("DEX "),
155   kExeTypeZtf = ExeTypeToUint32("ZTF "),
156 };
157 
CastToExecutableType(uint32_t possible_exe_type)158 constexpr ExecutableType CastToExecutableType(uint32_t possible_exe_type) {
159   switch (static_cast<ExecutableType>(possible_exe_type)) {
160     case kExeTypeNoOp:        // Falls through.
161     case kExeTypeWin32X86:    // Falls through.
162     case kExeTypeWin32X64:    // Falls through.
163     case kExeTypeElfX86:      // Falls through.
164     case kExeTypeElfX64:      // Falls through.
165     case kExeTypeElfAArch32:  // Falls through.
166     case kExeTypeElfAArch64:  // Falls through.
167     case kExeTypeDex:         // Falls through.
168     case kExeTypeZtf:         // Falls through.
169     case kExeTypeUnknown:
170       return static_cast<ExecutableType>(possible_exe_type);
171     default:
172       return kExeTypeUnknown;
173   }
174 }
175 
CastExecutableTypeToString(ExecutableType exe_type)176 inline std::string CastExecutableTypeToString(ExecutableType exe_type) {
177   uint32_t v = static_cast<uint32_t>(exe_type);
178   char result[] = {static_cast<char>(v), static_cast<char>(v >> 8),
179                    static_cast<char>(v >> 16), static_cast<char>(v >> 24), 0};
180   return result;
181 }
182 
183 // A region in an image with associated executable type |exe_type|. If
184 // |exe_type == kExeTypeNoOp|, then the Element represents a region of raw data.
185 struct Element : public BufferRegion {
186   Element() = default;
ElementElement187   constexpr Element(const BufferRegion& region_in, ExecutableType exe_type_in)
188       : BufferRegion(region_in), exe_type(exe_type_in) {}
ElementElement189   constexpr explicit Element(const BufferRegion& region_in)
190       : BufferRegion(region_in), exe_type(kExeTypeNoOp) {}
191 
192   // Similar to lo() and hi(), but returns values in offset_t.
BeginOffsetElement193   offset_t BeginOffset() const { return base::checked_cast<offset_t>(lo()); }
EndOffsetElement194   offset_t EndOffset() const { return base::checked_cast<offset_t>(hi()); }
195 
regionElement196   BufferRegion region() const { return {offset, size}; }
197 
198   friend bool operator==(const Element& a, const Element& b) {
199     return a.exe_type == b.exe_type && a.offset == b.offset && a.size == b.size;
200   }
201 
202   ExecutableType exe_type;
203 };
204 
205 // A matched pair of Elements.
206 struct ElementMatch {
IsValidElementMatch207   bool IsValid() const { return old_element.exe_type == new_element.exe_type; }
exe_typeElementMatch208   ExecutableType exe_type() const { return old_element.exe_type; }
209 
210   // Represents match as "#+#=#+#", where "#" denotes the integers:
211   //   [offset in "old", size in "old", offset in "new", size in "new"].
212   // Note that element type is omitted.
ToStringElementMatch213   std::string ToString() const {
214     return base::StringPrintf("%" PRIuS "+%" PRIuS "=%" PRIuS "+%" PRIuS "",
215                               old_element.offset, old_element.size,
216                               new_element.offset, new_element.size);
217   }
218 
219   Element old_element;
220   Element new_element;
221 };
222 
223 }  // namespace zucchini
224 
225 #endif  // COMPONENTS_ZUCCHINI_IMAGE_UTILS_H_
226