1*a03ca8b9SKrzysztof Kosiński // Copyright 2017 The Chromium Authors. All rights reserved.
2*a03ca8b9SKrzysztof Kosiński // Use of this source code is governed by a BSD-style license that can be
3*a03ca8b9SKrzysztof Kosiński // found in the LICENSE file.
4*a03ca8b9SKrzysztof Kosiński
5*a03ca8b9SKrzysztof Kosiński #include "components/zucchini/zucchini_gen.h"
6*a03ca8b9SKrzysztof Kosiński
7*a03ca8b9SKrzysztof Kosiński #include <stddef.h>
8*a03ca8b9SKrzysztof Kosiński #include <stdint.h>
9*a03ca8b9SKrzysztof Kosiński
10*a03ca8b9SKrzysztof Kosiński #include <algorithm>
11*a03ca8b9SKrzysztof Kosiński #include <map>
12*a03ca8b9SKrzysztof Kosiński #include <memory>
13*a03ca8b9SKrzysztof Kosiński #include <string>
14*a03ca8b9SKrzysztof Kosiński #include <utility>
15*a03ca8b9SKrzysztof Kosiński
16*a03ca8b9SKrzysztof Kosiński #include "base/logging.h"
17*a03ca8b9SKrzysztof Kosiński #include "base/numerics/safe_conversions.h"
18*a03ca8b9SKrzysztof Kosiński #include "components/zucchini/disassembler.h"
19*a03ca8b9SKrzysztof Kosiński #include "components/zucchini/element_detection.h"
20*a03ca8b9SKrzysztof Kosiński #include "components/zucchini/encoded_view.h"
21*a03ca8b9SKrzysztof Kosiński #include "components/zucchini/ensemble_matcher.h"
22*a03ca8b9SKrzysztof Kosiński #include "components/zucchini/equivalence_map.h"
23*a03ca8b9SKrzysztof Kosiński #include "components/zucchini/heuristic_ensemble_matcher.h"
24*a03ca8b9SKrzysztof Kosiński #include "components/zucchini/image_index.h"
25*a03ca8b9SKrzysztof Kosiński #include "components/zucchini/imposed_ensemble_matcher.h"
26*a03ca8b9SKrzysztof Kosiński #include "components/zucchini/patch_writer.h"
27*a03ca8b9SKrzysztof Kosiński #include "components/zucchini/reference_bytes_mixer.h"
28*a03ca8b9SKrzysztof Kosiński #include "components/zucchini/suffix_array.h"
29*a03ca8b9SKrzysztof Kosiński #include "components/zucchini/targets_affinity.h"
30*a03ca8b9SKrzysztof Kosiński
31*a03ca8b9SKrzysztof Kosiński namespace zucchini {
32*a03ca8b9SKrzysztof Kosiński
33*a03ca8b9SKrzysztof Kosiński namespace {
34*a03ca8b9SKrzysztof Kosiński
35*a03ca8b9SKrzysztof Kosiński // Parameters for patch generation.
36*a03ca8b9SKrzysztof Kosiński constexpr double kMinEquivalenceSimilarity = 12.0;
37*a03ca8b9SKrzysztof Kosiński constexpr double kMinLabelAffinity = 64.0;
38*a03ca8b9SKrzysztof Kosiński
39*a03ca8b9SKrzysztof Kosiński } // namespace
40*a03ca8b9SKrzysztof Kosiński
FindExtraTargets(const TargetPool & projected_old_targets,const TargetPool & new_targets)41*a03ca8b9SKrzysztof Kosiński std::vector<offset_t> FindExtraTargets(const TargetPool& projected_old_targets,
42*a03ca8b9SKrzysztof Kosiński const TargetPool& new_targets) {
43*a03ca8b9SKrzysztof Kosiński std::vector<offset_t> extra_targets;
44*a03ca8b9SKrzysztof Kosiński std::set_difference(
45*a03ca8b9SKrzysztof Kosiński new_targets.begin(), new_targets.end(), projected_old_targets.begin(),
46*a03ca8b9SKrzysztof Kosiński projected_old_targets.end(), std::back_inserter(extra_targets));
47*a03ca8b9SKrzysztof Kosiński return extra_targets;
48*a03ca8b9SKrzysztof Kosiński }
49*a03ca8b9SKrzysztof Kosiński
50*a03ca8b9SKrzysztof Kosiński // Label matching (between "old" and "new") can guide EquivalenceMap
51*a03ca8b9SKrzysztof Kosiński // construction; but EquivalenceMap induces Label matching. This apparent "chick
52*a03ca8b9SKrzysztof Kosiński // and egg" problem is solved by alternating 2 steps |num_iterations| times:
53*a03ca8b9SKrzysztof Kosiński // - Associate targets based on previous EquivalenceMap. Note on the first
54*a03ca8b9SKrzysztof Kosiński // iteration, EquivalenceMap is empty, resulting in a no-op.
55*a03ca8b9SKrzysztof Kosiński // - Construct refined EquivalenceMap based on new targets associations.
CreateEquivalenceMap(const ImageIndex & old_image_index,const ImageIndex & new_image_index,int num_iterations)56*a03ca8b9SKrzysztof Kosiński EquivalenceMap CreateEquivalenceMap(const ImageIndex& old_image_index,
57*a03ca8b9SKrzysztof Kosiński const ImageIndex& new_image_index,
58*a03ca8b9SKrzysztof Kosiński int num_iterations) {
59*a03ca8b9SKrzysztof Kosiński size_t pool_count = old_image_index.PoolCount();
60*a03ca8b9SKrzysztof Kosiński // |target_affinities| is outside the loop to reduce allocation.
61*a03ca8b9SKrzysztof Kosiński std::vector<TargetsAffinity> target_affinities(pool_count);
62*a03ca8b9SKrzysztof Kosiński
63*a03ca8b9SKrzysztof Kosiński EquivalenceMap equivalence_map;
64*a03ca8b9SKrzysztof Kosiński for (int i = 0; i < num_iterations; ++i) {
65*a03ca8b9SKrzysztof Kosiński EncodedView old_view(old_image_index);
66*a03ca8b9SKrzysztof Kosiński EncodedView new_view(new_image_index);
67*a03ca8b9SKrzysztof Kosiński
68*a03ca8b9SKrzysztof Kosiński // Associate targets from "old" to "new" image based on |equivalence_map|
69*a03ca8b9SKrzysztof Kosiński // for each reference pool.
70*a03ca8b9SKrzysztof Kosiński for (const auto& old_pool_tag_and_targets :
71*a03ca8b9SKrzysztof Kosiński old_image_index.target_pools()) {
72*a03ca8b9SKrzysztof Kosiński PoolTag pool_tag = old_pool_tag_and_targets.first;
73*a03ca8b9SKrzysztof Kosiński target_affinities[pool_tag.value()].InferFromSimilarities(
74*a03ca8b9SKrzysztof Kosiński equivalence_map, old_pool_tag_and_targets.second.targets(),
75*a03ca8b9SKrzysztof Kosiński new_image_index.pool(pool_tag).targets());
76*a03ca8b9SKrzysztof Kosiński
77*a03ca8b9SKrzysztof Kosiński // Creates labels for strongly associated targets.
78*a03ca8b9SKrzysztof Kosiński std::vector<uint32_t> old_labels;
79*a03ca8b9SKrzysztof Kosiński std::vector<uint32_t> new_labels;
80*a03ca8b9SKrzysztof Kosiński size_t label_bound = target_affinities[pool_tag.value()].AssignLabels(
81*a03ca8b9SKrzysztof Kosiński kMinLabelAffinity, &old_labels, &new_labels);
82*a03ca8b9SKrzysztof Kosiński old_view.SetLabels(pool_tag, std::move(old_labels), label_bound);
83*a03ca8b9SKrzysztof Kosiński new_view.SetLabels(pool_tag, std::move(new_labels), label_bound);
84*a03ca8b9SKrzysztof Kosiński }
85*a03ca8b9SKrzysztof Kosiński // Build equivalence map, where references in "old" and "new" that share
86*a03ca8b9SKrzysztof Kosiński // common semantics (i.e., their respective targets were associated earlier
87*a03ca8b9SKrzysztof Kosiński // on) are considered equivalent.
88*a03ca8b9SKrzysztof Kosiński equivalence_map.Build(
89*a03ca8b9SKrzysztof Kosiński MakeSuffixArray<InducedSuffixSort>(old_view, old_view.Cardinality()),
90*a03ca8b9SKrzysztof Kosiński old_view, new_view, target_affinities, kMinEquivalenceSimilarity);
91*a03ca8b9SKrzysztof Kosiński }
92*a03ca8b9SKrzysztof Kosiński
93*a03ca8b9SKrzysztof Kosiński return equivalence_map;
94*a03ca8b9SKrzysztof Kosiński }
95*a03ca8b9SKrzysztof Kosiński
GenerateEquivalencesAndExtraData(ConstBufferView new_image,const EquivalenceMap & equivalence_map,PatchElementWriter * patch_writer)96*a03ca8b9SKrzysztof Kosiński bool GenerateEquivalencesAndExtraData(ConstBufferView new_image,
97*a03ca8b9SKrzysztof Kosiński const EquivalenceMap& equivalence_map,
98*a03ca8b9SKrzysztof Kosiński PatchElementWriter* patch_writer) {
99*a03ca8b9SKrzysztof Kosiński // Make 2 passes through |equivalence_map| to reduce write churn.
100*a03ca8b9SKrzysztof Kosiński // Pass 1: Write all equivalences.
101*a03ca8b9SKrzysztof Kosiński EquivalenceSink equivalences_sink;
102*a03ca8b9SKrzysztof Kosiński for (const EquivalenceCandidate& candidate : equivalence_map)
103*a03ca8b9SKrzysztof Kosiński equivalences_sink.PutNext(candidate.eq);
104*a03ca8b9SKrzysztof Kosiński patch_writer->SetEquivalenceSink(std::move(equivalences_sink));
105*a03ca8b9SKrzysztof Kosiński
106*a03ca8b9SKrzysztof Kosiński // Pass 2: Write data in gaps in |new_image| before / between after
107*a03ca8b9SKrzysztof Kosiński // |equivalence_map| as "extra data".
108*a03ca8b9SKrzysztof Kosiński ExtraDataSink extra_data_sink;
109*a03ca8b9SKrzysztof Kosiński offset_t dst_offset = 0;
110*a03ca8b9SKrzysztof Kosiński for (const EquivalenceCandidate& candidate : equivalence_map) {
111*a03ca8b9SKrzysztof Kosiński extra_data_sink.PutNext(
112*a03ca8b9SKrzysztof Kosiński new_image[{dst_offset, candidate.eq.dst_offset - dst_offset}]);
113*a03ca8b9SKrzysztof Kosiński dst_offset = candidate.eq.dst_end();
114*a03ca8b9SKrzysztof Kosiński DCHECK_LE(dst_offset, new_image.size());
115*a03ca8b9SKrzysztof Kosiński }
116*a03ca8b9SKrzysztof Kosiński extra_data_sink.PutNext(
117*a03ca8b9SKrzysztof Kosiński new_image[{dst_offset, new_image.size() - dst_offset}]);
118*a03ca8b9SKrzysztof Kosiński patch_writer->SetExtraDataSink(std::move(extra_data_sink));
119*a03ca8b9SKrzysztof Kosiński return true;
120*a03ca8b9SKrzysztof Kosiński }
121*a03ca8b9SKrzysztof Kosiński
GenerateRawDelta(ConstBufferView old_image,ConstBufferView new_image,const EquivalenceMap & equivalence_map,const ImageIndex & new_image_index,ReferenceBytesMixer * reference_bytes_mixer,PatchElementWriter * patch_writer)122*a03ca8b9SKrzysztof Kosiński bool GenerateRawDelta(ConstBufferView old_image,
123*a03ca8b9SKrzysztof Kosiński ConstBufferView new_image,
124*a03ca8b9SKrzysztof Kosiński const EquivalenceMap& equivalence_map,
125*a03ca8b9SKrzysztof Kosiński const ImageIndex& new_image_index,
126*a03ca8b9SKrzysztof Kosiński ReferenceBytesMixer* reference_bytes_mixer,
127*a03ca8b9SKrzysztof Kosiński PatchElementWriter* patch_writer) {
128*a03ca8b9SKrzysztof Kosiński RawDeltaSink raw_delta_sink;
129*a03ca8b9SKrzysztof Kosiński
130*a03ca8b9SKrzysztof Kosiński // Visit |equivalence_map| blocks in |new_image| order. Find and emit all
131*a03ca8b9SKrzysztof Kosiński // bytewise differences.
132*a03ca8b9SKrzysztof Kosiński offset_t base_copy_offset = 0;
133*a03ca8b9SKrzysztof Kosiński for (const EquivalenceCandidate& candidate : equivalence_map) {
134*a03ca8b9SKrzysztof Kosiński Equivalence equivalence = candidate.eq;
135*a03ca8b9SKrzysztof Kosiński // For each bytewise delta from |old_image| to |new_image|, compute "copy
136*a03ca8b9SKrzysztof Kosiński // offset" and pass it along with delta to the sink.
137*a03ca8b9SKrzysztof Kosiński for (offset_t i = 0; i < equivalence.length;) {
138*a03ca8b9SKrzysztof Kosiński if (new_image_index.IsReference(equivalence.dst_offset + i)) {
139*a03ca8b9SKrzysztof Kosiński DCHECK(new_image_index.IsToken(equivalence.dst_offset + i));
140*a03ca8b9SKrzysztof Kosiński TypeTag type_tag =
141*a03ca8b9SKrzysztof Kosiński new_image_index.LookupType(equivalence.dst_offset + i);
142*a03ca8b9SKrzysztof Kosiński
143*a03ca8b9SKrzysztof Kosiński // Reference delta has its own flow. On some architectures (e.g., x86)
144*a03ca8b9SKrzysztof Kosiński // this does not involve raw delta, so we skip. On other architectures
145*a03ca8b9SKrzysztof Kosiński // (e.g., ARM) references are mixed with other bits that may change, so
146*a03ca8b9SKrzysztof Kosiński // we need to "mix" data and store some changed bits into raw delta.
147*a03ca8b9SKrzysztof Kosiński int num_bytes = reference_bytes_mixer->NumBytes(type_tag.value());
148*a03ca8b9SKrzysztof Kosiński if (num_bytes) {
149*a03ca8b9SKrzysztof Kosiński ConstBufferView mixed_ref_bytes = reference_bytes_mixer->Mix(
150*a03ca8b9SKrzysztof Kosiński type_tag.value(), old_image, equivalence.src_offset + i,
151*a03ca8b9SKrzysztof Kosiński new_image, equivalence.dst_offset + i);
152*a03ca8b9SKrzysztof Kosiński for (int j = 0; j < num_bytes; ++j) {
153*a03ca8b9SKrzysztof Kosiński int8_t diff =
154*a03ca8b9SKrzysztof Kosiński mixed_ref_bytes[j] - old_image[equivalence.src_offset + i + j];
155*a03ca8b9SKrzysztof Kosiński if (diff)
156*a03ca8b9SKrzysztof Kosiński raw_delta_sink.PutNext({base_copy_offset + i + j, diff});
157*a03ca8b9SKrzysztof Kosiński }
158*a03ca8b9SKrzysztof Kosiński }
159*a03ca8b9SKrzysztof Kosiński i += new_image_index.refs(type_tag).width();
160*a03ca8b9SKrzysztof Kosiński DCHECK_LE(i, equivalence.length);
161*a03ca8b9SKrzysztof Kosiński } else {
162*a03ca8b9SKrzysztof Kosiński int8_t diff = new_image[equivalence.dst_offset + i] -
163*a03ca8b9SKrzysztof Kosiński old_image[equivalence.src_offset + i];
164*a03ca8b9SKrzysztof Kosiński if (diff)
165*a03ca8b9SKrzysztof Kosiński raw_delta_sink.PutNext({base_copy_offset + i, diff});
166*a03ca8b9SKrzysztof Kosiński ++i;
167*a03ca8b9SKrzysztof Kosiński }
168*a03ca8b9SKrzysztof Kosiński }
169*a03ca8b9SKrzysztof Kosiński base_copy_offset += equivalence.length;
170*a03ca8b9SKrzysztof Kosiński }
171*a03ca8b9SKrzysztof Kosiński patch_writer->SetRawDeltaSink(std::move(raw_delta_sink));
172*a03ca8b9SKrzysztof Kosiński return true;
173*a03ca8b9SKrzysztof Kosiński }
174*a03ca8b9SKrzysztof Kosiński
GenerateReferencesDelta(const ReferenceSet & src_refs,const ReferenceSet & dst_refs,const TargetPool & projected_target_pool,const OffsetMapper & offset_mapper,const EquivalenceMap & equivalence_map,ReferenceDeltaSink * reference_delta_sink)175*a03ca8b9SKrzysztof Kosiński bool GenerateReferencesDelta(const ReferenceSet& src_refs,
176*a03ca8b9SKrzysztof Kosiński const ReferenceSet& dst_refs,
177*a03ca8b9SKrzysztof Kosiński const TargetPool& projected_target_pool,
178*a03ca8b9SKrzysztof Kosiński const OffsetMapper& offset_mapper,
179*a03ca8b9SKrzysztof Kosiński const EquivalenceMap& equivalence_map,
180*a03ca8b9SKrzysztof Kosiński ReferenceDeltaSink* reference_delta_sink) {
181*a03ca8b9SKrzysztof Kosiński size_t ref_width = src_refs.width();
182*a03ca8b9SKrzysztof Kosiński auto dst_ref = dst_refs.begin();
183*a03ca8b9SKrzysztof Kosiński
184*a03ca8b9SKrzysztof Kosiński // For each equivalence, for each covered |dst_ref| and the matching
185*a03ca8b9SKrzysztof Kosiński // |src_ref|, emit the delta between the respective target labels. Note: By
186*a03ca8b9SKrzysztof Kosiński // construction, each reference location (with |ref_width|) lies either
187*a03ca8b9SKrzysztof Kosiński // completely inside an equivalence or completely outside. We perform
188*a03ca8b9SKrzysztof Kosiński // "straddle checks" throughout to verify this assertion.
189*a03ca8b9SKrzysztof Kosiński for (const auto& candidate : equivalence_map) {
190*a03ca8b9SKrzysztof Kosiński const Equivalence equiv = candidate.eq;
191*a03ca8b9SKrzysztof Kosiński // Increment |dst_ref| until it catches up to |equiv|.
192*a03ca8b9SKrzysztof Kosiński while (dst_ref != dst_refs.end() && dst_ref->location < equiv.dst_offset)
193*a03ca8b9SKrzysztof Kosiński ++dst_ref;
194*a03ca8b9SKrzysztof Kosiński if (dst_ref == dst_refs.end())
195*a03ca8b9SKrzysztof Kosiński break;
196*a03ca8b9SKrzysztof Kosiński if (dst_ref->location >= equiv.dst_end())
197*a03ca8b9SKrzysztof Kosiński continue;
198*a03ca8b9SKrzysztof Kosiński // Straddle check.
199*a03ca8b9SKrzysztof Kosiński DCHECK_LE(dst_ref->location + ref_width, equiv.dst_end());
200*a03ca8b9SKrzysztof Kosiński
201*a03ca8b9SKrzysztof Kosiński offset_t src_loc =
202*a03ca8b9SKrzysztof Kosiński equiv.src_offset + (dst_ref->location - equiv.dst_offset);
203*a03ca8b9SKrzysztof Kosiński auto src_ref = std::lower_bound(
204*a03ca8b9SKrzysztof Kosiński src_refs.begin(), src_refs.end(), src_loc,
205*a03ca8b9SKrzysztof Kosiński [](const Reference& a, offset_t b) { return a.location < b; });
206*a03ca8b9SKrzysztof Kosiński for (; dst_ref != dst_refs.end() &&
207*a03ca8b9SKrzysztof Kosiński dst_ref->location + ref_width <= equiv.dst_end();
208*a03ca8b9SKrzysztof Kosiński ++dst_ref, ++src_ref) {
209*a03ca8b9SKrzysztof Kosiński // Local offset of |src_ref| should match that of |dst_ref|.
210*a03ca8b9SKrzysztof Kosiński DCHECK_EQ(src_ref->location - equiv.src_offset,
211*a03ca8b9SKrzysztof Kosiński dst_ref->location - equiv.dst_offset);
212*a03ca8b9SKrzysztof Kosiński offset_t old_offset = src_ref->target;
213*a03ca8b9SKrzysztof Kosiński offset_t new_estimated_offset =
214*a03ca8b9SKrzysztof Kosiński offset_mapper.ExtendedForwardProject(old_offset);
215*a03ca8b9SKrzysztof Kosiński offset_t new_estimated_key =
216*a03ca8b9SKrzysztof Kosiński projected_target_pool.KeyForNearestOffset(new_estimated_offset);
217*a03ca8b9SKrzysztof Kosiński offset_t new_offset = dst_ref->target;
218*a03ca8b9SKrzysztof Kosiński offset_t new_key = projected_target_pool.KeyForOffset(new_offset);
219*a03ca8b9SKrzysztof Kosiński
220*a03ca8b9SKrzysztof Kosiński reference_delta_sink->PutNext(
221*a03ca8b9SKrzysztof Kosiński static_cast<int32_t>(new_key - new_estimated_key));
222*a03ca8b9SKrzysztof Kosiński }
223*a03ca8b9SKrzysztof Kosiński if (dst_ref == dst_refs.end())
224*a03ca8b9SKrzysztof Kosiński break; // Done.
225*a03ca8b9SKrzysztof Kosiński // Straddle check.
226*a03ca8b9SKrzysztof Kosiński DCHECK_GE(dst_ref->location, equiv.dst_end());
227*a03ca8b9SKrzysztof Kosiński }
228*a03ca8b9SKrzysztof Kosiński return true;
229*a03ca8b9SKrzysztof Kosiński }
230*a03ca8b9SKrzysztof Kosiński
GenerateExtraTargets(const std::vector<offset_t> & extra_targets,PoolTag pool_tag,PatchElementWriter * patch_writer)231*a03ca8b9SKrzysztof Kosiński bool GenerateExtraTargets(const std::vector<offset_t>& extra_targets,
232*a03ca8b9SKrzysztof Kosiński PoolTag pool_tag,
233*a03ca8b9SKrzysztof Kosiński PatchElementWriter* patch_writer) {
234*a03ca8b9SKrzysztof Kosiński TargetSink target_sink;
235*a03ca8b9SKrzysztof Kosiński for (offset_t target : extra_targets)
236*a03ca8b9SKrzysztof Kosiński target_sink.PutNext(target);
237*a03ca8b9SKrzysztof Kosiński patch_writer->SetTargetSink(pool_tag, std::move(target_sink));
238*a03ca8b9SKrzysztof Kosiński return true;
239*a03ca8b9SKrzysztof Kosiński }
240*a03ca8b9SKrzysztof Kosiński
GenerateRawElement(const std::vector<offset_t> & old_sa,ConstBufferView old_image,ConstBufferView new_image,PatchElementWriter * patch_writer)241*a03ca8b9SKrzysztof Kosiński bool GenerateRawElement(const std::vector<offset_t>& old_sa,
242*a03ca8b9SKrzysztof Kosiński ConstBufferView old_image,
243*a03ca8b9SKrzysztof Kosiński ConstBufferView new_image,
244*a03ca8b9SKrzysztof Kosiński PatchElementWriter* patch_writer) {
245*a03ca8b9SKrzysztof Kosiński ImageIndex old_image_index(old_image);
246*a03ca8b9SKrzysztof Kosiński ImageIndex new_image_index(new_image);
247*a03ca8b9SKrzysztof Kosiński
248*a03ca8b9SKrzysztof Kosiński EquivalenceMap equivalences;
249*a03ca8b9SKrzysztof Kosiński equivalences.Build(old_sa, EncodedView(old_image_index),
250*a03ca8b9SKrzysztof Kosiński EncodedView(new_image_index), {},
251*a03ca8b9SKrzysztof Kosiński kMinEquivalenceSimilarity);
252*a03ca8b9SKrzysztof Kosiński
253*a03ca8b9SKrzysztof Kosiński patch_writer->SetReferenceDeltaSink({});
254*a03ca8b9SKrzysztof Kosiński
255*a03ca8b9SKrzysztof Kosiński ReferenceBytesMixer no_op_bytes_mixer;
256*a03ca8b9SKrzysztof Kosiński return GenerateEquivalencesAndExtraData(new_image, equivalences,
257*a03ca8b9SKrzysztof Kosiński patch_writer) &&
258*a03ca8b9SKrzysztof Kosiński GenerateRawDelta(old_image, new_image, equivalences, new_image_index,
259*a03ca8b9SKrzysztof Kosiński &no_op_bytes_mixer, patch_writer);
260*a03ca8b9SKrzysztof Kosiński }
261*a03ca8b9SKrzysztof Kosiński
GenerateExecutableElement(ExecutableType exe_type,ConstBufferView old_image,ConstBufferView new_image,PatchElementWriter * patch_writer)262*a03ca8b9SKrzysztof Kosiński bool GenerateExecutableElement(ExecutableType exe_type,
263*a03ca8b9SKrzysztof Kosiński ConstBufferView old_image,
264*a03ca8b9SKrzysztof Kosiński ConstBufferView new_image,
265*a03ca8b9SKrzysztof Kosiński PatchElementWriter* patch_writer) {
266*a03ca8b9SKrzysztof Kosiński // Initialize Disassemblers.
267*a03ca8b9SKrzysztof Kosiński std::unique_ptr<Disassembler> old_disasm =
268*a03ca8b9SKrzysztof Kosiński MakeDisassemblerOfType(old_image, exe_type);
269*a03ca8b9SKrzysztof Kosiński std::unique_ptr<Disassembler> new_disasm =
270*a03ca8b9SKrzysztof Kosiński MakeDisassemblerOfType(new_image, exe_type);
271*a03ca8b9SKrzysztof Kosiński if (!old_disasm || !new_disasm) {
272*a03ca8b9SKrzysztof Kosiński LOG(ERROR) << "Failed to create Disassembler.";
273*a03ca8b9SKrzysztof Kosiński return false;
274*a03ca8b9SKrzysztof Kosiński }
275*a03ca8b9SKrzysztof Kosiński DCHECK_EQ(old_disasm->GetExeType(), new_disasm->GetExeType());
276*a03ca8b9SKrzysztof Kosiński
277*a03ca8b9SKrzysztof Kosiński // Initialize ImageIndexes.
278*a03ca8b9SKrzysztof Kosiński ImageIndex old_image_index(old_image);
279*a03ca8b9SKrzysztof Kosiński ImageIndex new_image_index(new_image);
280*a03ca8b9SKrzysztof Kosiński if (!old_image_index.Initialize(old_disasm.get()) ||
281*a03ca8b9SKrzysztof Kosiński !new_image_index.Initialize(new_disasm.get())) {
282*a03ca8b9SKrzysztof Kosiński LOG(ERROR) << "Failed to create ImageIndex: Overlapping references found?";
283*a03ca8b9SKrzysztof Kosiński return false;
284*a03ca8b9SKrzysztof Kosiński }
285*a03ca8b9SKrzysztof Kosiński DCHECK_EQ(old_image_index.PoolCount(), new_image_index.PoolCount());
286*a03ca8b9SKrzysztof Kosiński
287*a03ca8b9SKrzysztof Kosiński EquivalenceMap equivalences =
288*a03ca8b9SKrzysztof Kosiński CreateEquivalenceMap(old_image_index, new_image_index,
289*a03ca8b9SKrzysztof Kosiński new_disasm->num_equivalence_iterations());
290*a03ca8b9SKrzysztof Kosiński OffsetMapper offset_mapper(equivalences,
291*a03ca8b9SKrzysztof Kosiński base::checked_cast<offset_t>(old_image.size()),
292*a03ca8b9SKrzysztof Kosiński base::checked_cast<offset_t>(new_image.size()));
293*a03ca8b9SKrzysztof Kosiński
294*a03ca8b9SKrzysztof Kosiński ReferenceDeltaSink reference_delta_sink;
295*a03ca8b9SKrzysztof Kosiński for (const auto& old_targets : old_image_index.target_pools()) {
296*a03ca8b9SKrzysztof Kosiński PoolTag pool_tag = old_targets.first;
297*a03ca8b9SKrzysztof Kosiński TargetPool projected_old_targets = old_targets.second;
298*a03ca8b9SKrzysztof Kosiński projected_old_targets.FilterAndProject(offset_mapper);
299*a03ca8b9SKrzysztof Kosiński std::vector<offset_t> extra_target =
300*a03ca8b9SKrzysztof Kosiński FindExtraTargets(projected_old_targets, new_image_index.pool(pool_tag));
301*a03ca8b9SKrzysztof Kosiński projected_old_targets.InsertTargets(extra_target);
302*a03ca8b9SKrzysztof Kosiński
303*a03ca8b9SKrzysztof Kosiński if (!GenerateExtraTargets(extra_target, pool_tag, patch_writer))
304*a03ca8b9SKrzysztof Kosiński return false;
305*a03ca8b9SKrzysztof Kosiński for (TypeTag type_tag : old_targets.second.types()) {
306*a03ca8b9SKrzysztof Kosiński if (!GenerateReferencesDelta(old_image_index.refs(type_tag),
307*a03ca8b9SKrzysztof Kosiński new_image_index.refs(type_tag),
308*a03ca8b9SKrzysztof Kosiński projected_old_targets, offset_mapper,
309*a03ca8b9SKrzysztof Kosiński equivalences, &reference_delta_sink)) {
310*a03ca8b9SKrzysztof Kosiński return false;
311*a03ca8b9SKrzysztof Kosiński }
312*a03ca8b9SKrzysztof Kosiński }
313*a03ca8b9SKrzysztof Kosiński }
314*a03ca8b9SKrzysztof Kosiński patch_writer->SetReferenceDeltaSink(std::move(reference_delta_sink));
315*a03ca8b9SKrzysztof Kosiński std::unique_ptr<ReferenceBytesMixer> reference_bytes_mixer =
316*a03ca8b9SKrzysztof Kosiński ReferenceBytesMixer::Create(*old_disasm, *new_disasm);
317*a03ca8b9SKrzysztof Kosiński return GenerateEquivalencesAndExtraData(new_image, equivalences,
318*a03ca8b9SKrzysztof Kosiński patch_writer) &&
319*a03ca8b9SKrzysztof Kosiński GenerateRawDelta(old_image, new_image, equivalences, new_image_index,
320*a03ca8b9SKrzysztof Kosiński reference_bytes_mixer.get(), patch_writer);
321*a03ca8b9SKrzysztof Kosiński }
322*a03ca8b9SKrzysztof Kosiński
GenerateBufferCommon(ConstBufferView old_image,ConstBufferView new_image,std::unique_ptr<EnsembleMatcher> matcher,EnsemblePatchWriter * patch_writer)323*a03ca8b9SKrzysztof Kosiński status::Code GenerateBufferCommon(ConstBufferView old_image,
324*a03ca8b9SKrzysztof Kosiński ConstBufferView new_image,
325*a03ca8b9SKrzysztof Kosiński std::unique_ptr<EnsembleMatcher> matcher,
326*a03ca8b9SKrzysztof Kosiński EnsemblePatchWriter* patch_writer) {
327*a03ca8b9SKrzysztof Kosiński if (!matcher->RunMatch(old_image, new_image)) {
328*a03ca8b9SKrzysztof Kosiński LOG(INFO) << "RunMatch() failed, generating raw patch.";
329*a03ca8b9SKrzysztof Kosiński return GenerateBufferRaw(old_image, new_image, patch_writer);
330*a03ca8b9SKrzysztof Kosiński }
331*a03ca8b9SKrzysztof Kosiński
332*a03ca8b9SKrzysztof Kosiński const std::vector<ElementMatch>& matches = matcher->matches();
333*a03ca8b9SKrzysztof Kosiński LOG(INFO) << "Matching: Found " << matches.size()
334*a03ca8b9SKrzysztof Kosiński << " nontrivial matches and " << matcher->num_identical()
335*a03ca8b9SKrzysztof Kosiński << " identical matches.";
336*a03ca8b9SKrzysztof Kosiński size_t num_elements = matches.size();
337*a03ca8b9SKrzysztof Kosiński if (num_elements == 0) {
338*a03ca8b9SKrzysztof Kosiński LOG(INFO) << "No nontrival matches, generating raw patch.";
339*a03ca8b9SKrzysztof Kosiński return GenerateBufferRaw(old_image, new_image, patch_writer);
340*a03ca8b9SKrzysztof Kosiński }
341*a03ca8b9SKrzysztof Kosiński
342*a03ca8b9SKrzysztof Kosiński // "Gaps" are |new_image| bytes not covered by new_elements in |matches|.
343*a03ca8b9SKrzysztof Kosiński // These are treated as raw data, and patched against the entire |old_image|.
344*a03ca8b9SKrzysztof Kosiński
345*a03ca8b9SKrzysztof Kosiński // |patch_element_map| (keyed by "new" offsets) stores PatchElementWriter
346*a03ca8b9SKrzysztof Kosiński // results so elements and "gap" results can be computed separately (to reduce
347*a03ca8b9SKrzysztof Kosiński // peak memory usage), and later, properly serialized to |patch_writer|
348*a03ca8b9SKrzysztof Kosiński // ordered by "new" offset.
349*a03ca8b9SKrzysztof Kosiński std::map<offset_t, PatchElementWriter> patch_element_map;
350*a03ca8b9SKrzysztof Kosiński
351*a03ca8b9SKrzysztof Kosiński // Variables to track element patching successes.
352*a03ca8b9SKrzysztof Kosiński std::vector<BufferRegion> covered_new_regions;
353*a03ca8b9SKrzysztof Kosiński size_t covered_new_bytes = 0;
354*a03ca8b9SKrzysztof Kosiński
355*a03ca8b9SKrzysztof Kosiński // Process elements first, since non-fatal failures may turn some into gaps.
356*a03ca8b9SKrzysztof Kosiński for (const ElementMatch& match : matches) {
357*a03ca8b9SKrzysztof Kosiński BufferRegion new_region = match.new_element.region();
358*a03ca8b9SKrzysztof Kosiński LOG(INFO) << "--- Match [" << new_region.lo() << "," << new_region.hi()
359*a03ca8b9SKrzysztof Kosiński << ")";
360*a03ca8b9SKrzysztof Kosiński
361*a03ca8b9SKrzysztof Kosiński auto it_and_success = patch_element_map.emplace(
362*a03ca8b9SKrzysztof Kosiński base::checked_cast<offset_t>(new_region.lo()), match);
363*a03ca8b9SKrzysztof Kosiński DCHECK(it_and_success.second);
364*a03ca8b9SKrzysztof Kosiński PatchElementWriter& patch_element = it_and_success.first->second;
365*a03ca8b9SKrzysztof Kosiński
366*a03ca8b9SKrzysztof Kosiński ConstBufferView old_sub_image = old_image[match.old_element.region()];
367*a03ca8b9SKrzysztof Kosiński ConstBufferView new_sub_image = new_image[new_region];
368*a03ca8b9SKrzysztof Kosiński if (GenerateExecutableElement(match.exe_type(), old_sub_image,
369*a03ca8b9SKrzysztof Kosiński new_sub_image, &patch_element)) {
370*a03ca8b9SKrzysztof Kosiński covered_new_regions.push_back(new_region);
371*a03ca8b9SKrzysztof Kosiński covered_new_bytes += new_region.size;
372*a03ca8b9SKrzysztof Kosiński } else {
373*a03ca8b9SKrzysztof Kosiński LOG(INFO) << "Fall back to raw patching.";
374*a03ca8b9SKrzysztof Kosiński patch_element_map.erase(it_and_success.first);
375*a03ca8b9SKrzysztof Kosiński }
376*a03ca8b9SKrzysztof Kosiński }
377*a03ca8b9SKrzysztof Kosiński
378*a03ca8b9SKrzysztof Kosiński if (covered_new_bytes < new_image.size()) {
379*a03ca8b9SKrzysztof Kosiński // Process all "gaps", which are patched against the entire "old" image. To
380*a03ca8b9SKrzysztof Kosiński // compute equivalence maps, "gaps" share a common suffix array
381*a03ca8b9SKrzysztof Kosiński // |old_sa_raw|, whose lifetime is kept separated from elements' suffix
382*a03ca8b9SKrzysztof Kosiński // arrays to reduce peak memory.
383*a03ca8b9SKrzysztof Kosiński Element entire_old_element(old_image.local_region(), kExeTypeNoOp);
384*a03ca8b9SKrzysztof Kosiński ImageIndex old_image_index(old_image);
385*a03ca8b9SKrzysztof Kosiński EncodedView old_view_raw(old_image_index);
386*a03ca8b9SKrzysztof Kosiński std::vector<offset_t> old_sa_raw =
387*a03ca8b9SKrzysztof Kosiński MakeSuffixArray<InducedSuffixSort>(old_view_raw, size_t(256));
388*a03ca8b9SKrzysztof Kosiński
389*a03ca8b9SKrzysztof Kosiński offset_t gap_lo = 0;
390*a03ca8b9SKrzysztof Kosiński // Add sentinel that points to end of "new" file, to simplify gap iteration.
391*a03ca8b9SKrzysztof Kosiński covered_new_regions.emplace_back(BufferRegion{new_image.size(), 0});
392*a03ca8b9SKrzysztof Kosiński
393*a03ca8b9SKrzysztof Kosiński for (const BufferRegion& covered : covered_new_regions) {
394*a03ca8b9SKrzysztof Kosiński offset_t gap_hi = base::checked_cast<offset_t>(covered.lo());
395*a03ca8b9SKrzysztof Kosiński DCHECK_GE(gap_hi, gap_lo);
396*a03ca8b9SKrzysztof Kosiński offset_t gap_size = gap_hi - gap_lo;
397*a03ca8b9SKrzysztof Kosiński if (gap_size > 0) {
398*a03ca8b9SKrzysztof Kosiński LOG(INFO) << "--- Gap [" << gap_lo << "," << gap_hi << ")";
399*a03ca8b9SKrzysztof Kosiński
400*a03ca8b9SKrzysztof Kosiński ElementMatch gap_match{{entire_old_element, kExeTypeNoOp},
401*a03ca8b9SKrzysztof Kosiński {{gap_lo, gap_size}, kExeTypeNoOp}};
402*a03ca8b9SKrzysztof Kosiński auto it_and_success = patch_element_map.emplace(gap_lo, gap_match);
403*a03ca8b9SKrzysztof Kosiński DCHECK(it_and_success.second);
404*a03ca8b9SKrzysztof Kosiński PatchElementWriter& patch_element = it_and_success.first->second;
405*a03ca8b9SKrzysztof Kosiński
406*a03ca8b9SKrzysztof Kosiński ConstBufferView new_sub_image = new_image[{gap_lo, gap_size}];
407*a03ca8b9SKrzysztof Kosiński if (!GenerateRawElement(old_sa_raw, old_image, new_sub_image,
408*a03ca8b9SKrzysztof Kosiński &patch_element)) {
409*a03ca8b9SKrzysztof Kosiński return status::kStatusFatal;
410*a03ca8b9SKrzysztof Kosiński }
411*a03ca8b9SKrzysztof Kosiński }
412*a03ca8b9SKrzysztof Kosiński gap_lo = base::checked_cast<offset_t>(covered.hi());
413*a03ca8b9SKrzysztof Kosiński }
414*a03ca8b9SKrzysztof Kosiński }
415*a03ca8b9SKrzysztof Kosiński
416*a03ca8b9SKrzysztof Kosiński // Write all PatchElementWriter sorted by "new" offset.
417*a03ca8b9SKrzysztof Kosiński for (auto& new_lo_and_patch_element : patch_element_map)
418*a03ca8b9SKrzysztof Kosiński patch_writer->AddElement(std::move(new_lo_and_patch_element.second));
419*a03ca8b9SKrzysztof Kosiński
420*a03ca8b9SKrzysztof Kosiński return status::kStatusSuccess;
421*a03ca8b9SKrzysztof Kosiński }
422*a03ca8b9SKrzysztof Kosiński
423*a03ca8b9SKrzysztof Kosiński /******** Exported Functions ********/
424*a03ca8b9SKrzysztof Kosiński
GenerateBuffer(ConstBufferView old_image,ConstBufferView new_image,EnsemblePatchWriter * patch_writer)425*a03ca8b9SKrzysztof Kosiński status::Code GenerateBuffer(ConstBufferView old_image,
426*a03ca8b9SKrzysztof Kosiński ConstBufferView new_image,
427*a03ca8b9SKrzysztof Kosiński EnsemblePatchWriter* patch_writer) {
428*a03ca8b9SKrzysztof Kosiński return GenerateBufferCommon(
429*a03ca8b9SKrzysztof Kosiński old_image, new_image, std::make_unique<HeuristicEnsembleMatcher>(nullptr),
430*a03ca8b9SKrzysztof Kosiński patch_writer);
431*a03ca8b9SKrzysztof Kosiński }
432*a03ca8b9SKrzysztof Kosiński
GenerateBufferImposed(ConstBufferView old_image,ConstBufferView new_image,std::string imposed_matches,EnsemblePatchWriter * patch_writer)433*a03ca8b9SKrzysztof Kosiński status::Code GenerateBufferImposed(ConstBufferView old_image,
434*a03ca8b9SKrzysztof Kosiński ConstBufferView new_image,
435*a03ca8b9SKrzysztof Kosiński std::string imposed_matches,
436*a03ca8b9SKrzysztof Kosiński EnsemblePatchWriter* patch_writer) {
437*a03ca8b9SKrzysztof Kosiński if (imposed_matches.empty())
438*a03ca8b9SKrzysztof Kosiński return GenerateBuffer(old_image, new_image, patch_writer);
439*a03ca8b9SKrzysztof Kosiński
440*a03ca8b9SKrzysztof Kosiński return GenerateBufferCommon(
441*a03ca8b9SKrzysztof Kosiński old_image, new_image,
442*a03ca8b9SKrzysztof Kosiński std::make_unique<ImposedEnsembleMatcher>(imposed_matches), patch_writer);
443*a03ca8b9SKrzysztof Kosiński }
444*a03ca8b9SKrzysztof Kosiński
GenerateBufferRaw(ConstBufferView old_image,ConstBufferView new_image,EnsemblePatchWriter * patch_writer)445*a03ca8b9SKrzysztof Kosiński status::Code GenerateBufferRaw(ConstBufferView old_image,
446*a03ca8b9SKrzysztof Kosiński ConstBufferView new_image,
447*a03ca8b9SKrzysztof Kosiński EnsemblePatchWriter* patch_writer) {
448*a03ca8b9SKrzysztof Kosiński ImageIndex old_image_index(old_image);
449*a03ca8b9SKrzysztof Kosiński EncodedView old_view(old_image_index);
450*a03ca8b9SKrzysztof Kosiński std::vector<offset_t> old_sa =
451*a03ca8b9SKrzysztof Kosiński MakeSuffixArray<InducedSuffixSort>(old_view, old_view.Cardinality());
452*a03ca8b9SKrzysztof Kosiński
453*a03ca8b9SKrzysztof Kosiński PatchElementWriter patch_element(
454*a03ca8b9SKrzysztof Kosiński {Element(old_image.local_region()), Element(new_image.local_region())});
455*a03ca8b9SKrzysztof Kosiński if (!GenerateRawElement(old_sa, old_image, new_image, &patch_element))
456*a03ca8b9SKrzysztof Kosiński return status::kStatusFatal;
457*a03ca8b9SKrzysztof Kosiński patch_writer->AddElement(std::move(patch_element));
458*a03ca8b9SKrzysztof Kosiński return status::kStatusSuccess;
459*a03ca8b9SKrzysztof Kosiński }
460*a03ca8b9SKrzysztof Kosiński
461*a03ca8b9SKrzysztof Kosiński } // namespace zucchini
462