xref: /aosp_15_r20/external/zucchini/equivalence_map.h (revision a03ca8b91e029cd15055c20c78c2e087c84792e4)
1*a03ca8b9SKrzysztof Kosiński // Copyright 2017 The Chromium Authors. All rights reserved.
2*a03ca8b9SKrzysztof Kosiński // Use of this source code is governed by a BSD-style license that can be
3*a03ca8b9SKrzysztof Kosiński // found in the LICENSE file.
4*a03ca8b9SKrzysztof Kosiński 
5*a03ca8b9SKrzysztof Kosiński #ifndef COMPONENTS_ZUCCHINI_EQUIVALENCE_MAP_H_
6*a03ca8b9SKrzysztof Kosiński #define COMPONENTS_ZUCCHINI_EQUIVALENCE_MAP_H_
7*a03ca8b9SKrzysztof Kosiński 
8*a03ca8b9SKrzysztof Kosiński #include <stddef.h>
9*a03ca8b9SKrzysztof Kosiński 
10*a03ca8b9SKrzysztof Kosiński #include <deque>
11*a03ca8b9SKrzysztof Kosiński #include <limits>
12*a03ca8b9SKrzysztof Kosiński #include <vector>
13*a03ca8b9SKrzysztof Kosiński 
14*a03ca8b9SKrzysztof Kosiński #include "components/zucchini/image_index.h"
15*a03ca8b9SKrzysztof Kosiński #include "components/zucchini/image_utils.h"
16*a03ca8b9SKrzysztof Kosiński #include "components/zucchini/targets_affinity.h"
17*a03ca8b9SKrzysztof Kosiński 
18*a03ca8b9SKrzysztof Kosiński namespace zucchini {
19*a03ca8b9SKrzysztof Kosiński 
20*a03ca8b9SKrzysztof Kosiński constexpr double kMismatchFatal = -std::numeric_limits<double>::infinity();
21*a03ca8b9SKrzysztof Kosiński 
22*a03ca8b9SKrzysztof Kosiński class EncodedView;
23*a03ca8b9SKrzysztof Kosiński class EquivalenceSource;
24*a03ca8b9SKrzysztof Kosiński 
25*a03ca8b9SKrzysztof Kosiński // Returns similarity score between a token (raw byte or first byte of a
26*a03ca8b9SKrzysztof Kosiński // reference) in |old_image_index| at |src| and a token in |new_image_index|
27*a03ca8b9SKrzysztof Kosiński // at |dst|. |targets_affinities| describes affinities for each target pool and
28*a03ca8b9SKrzysztof Kosiński // is used to evaluate similarity between references, hence it's size must be
29*a03ca8b9SKrzysztof Kosiński // equal to the number of pools in both |old_image_index| and |new_image_index|.
30*a03ca8b9SKrzysztof Kosiński // Both |src| and |dst| must refer to tokens in |old_image_index| and
31*a03ca8b9SKrzysztof Kosiński // |new_image_index|.
32*a03ca8b9SKrzysztof Kosiński double GetTokenSimilarity(
33*a03ca8b9SKrzysztof Kosiński     const ImageIndex& old_image_index,
34*a03ca8b9SKrzysztof Kosiński     const ImageIndex& new_image_index,
35*a03ca8b9SKrzysztof Kosiński     const std::vector<TargetsAffinity>& targets_affinities,
36*a03ca8b9SKrzysztof Kosiński     offset_t src,
37*a03ca8b9SKrzysztof Kosiński     offset_t dst);
38*a03ca8b9SKrzysztof Kosiński 
39*a03ca8b9SKrzysztof Kosiński // Returns a similarity score between content in |old_image_index| and
40*a03ca8b9SKrzysztof Kosiński // |new_image_index| at regions described by |equivalence|, using
41*a03ca8b9SKrzysztof Kosiński // |targets_affinities| to evaluate similarity between references.
42*a03ca8b9SKrzysztof Kosiński double GetEquivalenceSimilarity(
43*a03ca8b9SKrzysztof Kosiński     const ImageIndex& old_image_index,
44*a03ca8b9SKrzysztof Kosiński     const ImageIndex& new_image_index,
45*a03ca8b9SKrzysztof Kosiński     const std::vector<TargetsAffinity>& targets_affinities,
46*a03ca8b9SKrzysztof Kosiński     const Equivalence& equivalence);
47*a03ca8b9SKrzysztof Kosiński 
48*a03ca8b9SKrzysztof Kosiński // Extends |equivalence| forward and returns the result. This is related to
49*a03ca8b9SKrzysztof Kosiński // VisitEquivalenceSeed().
50*a03ca8b9SKrzysztof Kosiński EquivalenceCandidate ExtendEquivalenceForward(
51*a03ca8b9SKrzysztof Kosiński     const ImageIndex& old_image_index,
52*a03ca8b9SKrzysztof Kosiński     const ImageIndex& new_image_index,
53*a03ca8b9SKrzysztof Kosiński     const std::vector<TargetsAffinity>& targets_affinities,
54*a03ca8b9SKrzysztof Kosiński     const EquivalenceCandidate& equivalence,
55*a03ca8b9SKrzysztof Kosiński     double min_similarity);
56*a03ca8b9SKrzysztof Kosiński 
57*a03ca8b9SKrzysztof Kosiński // Extends |equivalence| backward and returns the result. This is related to
58*a03ca8b9SKrzysztof Kosiński // VisitEquivalenceSeed().
59*a03ca8b9SKrzysztof Kosiński EquivalenceCandidate ExtendEquivalenceBackward(
60*a03ca8b9SKrzysztof Kosiński     const ImageIndex& old_image_index,
61*a03ca8b9SKrzysztof Kosiński     const ImageIndex& new_image_index,
62*a03ca8b9SKrzysztof Kosiński     const std::vector<TargetsAffinity>& targets_affinities,
63*a03ca8b9SKrzysztof Kosiński     const EquivalenceCandidate& equivalence,
64*a03ca8b9SKrzysztof Kosiński     double min_similarity);
65*a03ca8b9SKrzysztof Kosiński 
66*a03ca8b9SKrzysztof Kosiński // Creates an equivalence, starting with |src| and |dst| as offset hint, and
67*a03ca8b9SKrzysztof Kosiński // extends it both forward and backward, trying to maximise similarity between
68*a03ca8b9SKrzysztof Kosiński // |old_image_index| and |new_image_index|, and returns the result.
69*a03ca8b9SKrzysztof Kosiński // |targets_affinities| is used to evaluate similarity between references.
70*a03ca8b9SKrzysztof Kosiński // |min_similarity| describes the minimum acceptable similarity score and is
71*a03ca8b9SKrzysztof Kosiński // used as threshold to discard bad equivalences.
72*a03ca8b9SKrzysztof Kosiński EquivalenceCandidate VisitEquivalenceSeed(
73*a03ca8b9SKrzysztof Kosiński     const ImageIndex& old_image_index,
74*a03ca8b9SKrzysztof Kosiński     const ImageIndex& new_image_index,
75*a03ca8b9SKrzysztof Kosiński     const std::vector<TargetsAffinity>& targets_affinities,
76*a03ca8b9SKrzysztof Kosiński     offset_t src,
77*a03ca8b9SKrzysztof Kosiński     offset_t dst,
78*a03ca8b9SKrzysztof Kosiński     double min_similarity);
79*a03ca8b9SKrzysztof Kosiński 
80*a03ca8b9SKrzysztof Kosiński // Container of pruned equivalences used to map offsets from |old_image| to
81*a03ca8b9SKrzysztof Kosiński // offsets in |new_image|. Equivalences are pruned by cropping smaller
82*a03ca8b9SKrzysztof Kosiński // equivalences to avoid overlaps, to make the equivalence map (for covered
83*a03ca8b9SKrzysztof Kosiński // bytes in |old_image| and |new_image|) one-to-one.
84*a03ca8b9SKrzysztof Kosiński class OffsetMapper {
85*a03ca8b9SKrzysztof Kosiński  public:
86*a03ca8b9SKrzysztof Kosiński   using const_iterator = std::deque<Equivalence>::const_iterator;
87*a03ca8b9SKrzysztof Kosiński 
88*a03ca8b9SKrzysztof Kosiński   // Constructors for various data sources. "Old" and "new" image sizes are
89*a03ca8b9SKrzysztof Kosiński   // needed for bounds checks and to handle dangling targets.
90*a03ca8b9SKrzysztof Kosiński   // - From a list of |equivalences|, already sorted (by |src_offset|) and
91*a03ca8b9SKrzysztof Kosiński   //   pruned, useful for tests.
92*a03ca8b9SKrzysztof Kosiński   OffsetMapper(std::deque<Equivalence>&& equivalences,
93*a03ca8b9SKrzysztof Kosiński                offset_t old_image_size,
94*a03ca8b9SKrzysztof Kosiński                offset_t new_image_size);
95*a03ca8b9SKrzysztof Kosiński   // - From a generator, useful for Zucchini-apply.
96*a03ca8b9SKrzysztof Kosiński   OffsetMapper(EquivalenceSource&& equivalence_source,
97*a03ca8b9SKrzysztof Kosiński                offset_t old_image_size,
98*a03ca8b9SKrzysztof Kosiński                offset_t new_image_size);
99*a03ca8b9SKrzysztof Kosiński   // - From an EquivalenceMap that needs to be processed, useful for
100*a03ca8b9SKrzysztof Kosiński   //   Zucchini-gen.
101*a03ca8b9SKrzysztof Kosiński   OffsetMapper(const EquivalenceMap& equivalence_map,
102*a03ca8b9SKrzysztof Kosiński                offset_t old_image_size,
103*a03ca8b9SKrzysztof Kosiński                offset_t new_image_size);
104*a03ca8b9SKrzysztof Kosiński   ~OffsetMapper();
105*a03ca8b9SKrzysztof Kosiński 
size()106*a03ca8b9SKrzysztof Kosiński   size_t size() const { return equivalences_.size(); }
begin()107*a03ca8b9SKrzysztof Kosiński   const_iterator begin() const { return equivalences_.begin(); }
end()108*a03ca8b9SKrzysztof Kosiński   const_iterator end() const { return equivalences_.end(); }
109*a03ca8b9SKrzysztof Kosiński 
110*a03ca8b9SKrzysztof Kosiński   // Returns naive extended forward-projection of "old" |offset| that follows
111*a03ca8b9SKrzysztof Kosiński   // |eq|'s delta. |eq| needs not cover |offset|.
112*a03ca8b9SKrzysztof Kosiński   // - Averts underflow / overflow by clamping to |[0, new_image_size_)|.
113*a03ca8b9SKrzysztof Kosiński   // - However, |offset| is *not* restricted to |[0, old_image_size_)|; the
114*a03ca8b9SKrzysztof Kosiński   //   caller must to make the check (hence "naive").
115*a03ca8b9SKrzysztof Kosiński   offset_t NaiveExtendedForwardProject(const Equivalence& unit,
116*a03ca8b9SKrzysztof Kosiński                                        offset_t offset) const;
117*a03ca8b9SKrzysztof Kosiński 
118*a03ca8b9SKrzysztof Kosiński   // Returns an offset in |new_image| corresponding to |offset| in |old_image|.
119*a03ca8b9SKrzysztof Kosiński   // Assumes |equivalences_| to be non-empty. Cases:
120*a03ca8b9SKrzysztof Kosiński   // - If |offset| is covered (i.e., in an "old" block), then use the delta of
121*a03ca8b9SKrzysztof Kosiński   //   the (unique) equivalence unit that covers |offset|.
122*a03ca8b9SKrzysztof Kosiński   // - If |offset| is non-covered, but in |[0, old_image_size_)|, then find the
123*a03ca8b9SKrzysztof Kosiński   //   nearest "old" block, use its delta, and avert underflow / overflow by
124*a03ca8b9SKrzysztof Kosiński   //   clamping the result to |[0, new_image_size_)|.
125*a03ca8b9SKrzysztof Kosiński   // - If |offset| is >= |new_image_size_| (a "fake offset"), then use
126*a03ca8b9SKrzysztof Kosiński   //   |new_image_size_ - old_image_size_| as the delta.
127*a03ca8b9SKrzysztof Kosiński   offset_t ExtendedForwardProject(offset_t offset) const;
128*a03ca8b9SKrzysztof Kosiński 
129*a03ca8b9SKrzysztof Kosiński   // Given sorted |offsets|, applies a projection in-place of all offsets that
130*a03ca8b9SKrzysztof Kosiński   // are part of a pruned equivalence from |old_image| to |new_image|. Other
131*a03ca8b9SKrzysztof Kosiński   // offsets are removed from |offsets|.
132*a03ca8b9SKrzysztof Kosiński   void ForwardProjectAll(std::deque<offset_t>* offsets) const;
133*a03ca8b9SKrzysztof Kosiński 
134*a03ca8b9SKrzysztof Kosiński   // Accessor for testing.
equivalences()135*a03ca8b9SKrzysztof Kosiński   const std::deque<Equivalence> equivalences() const { return equivalences_; }
136*a03ca8b9SKrzysztof Kosiński 
137*a03ca8b9SKrzysztof Kosiński   // Sorts |equivalences| by |src_offset| and removes all source overlaps; so a
138*a03ca8b9SKrzysztof Kosiński   // source location that was covered by some Equivalence would become covered
139*a03ca8b9SKrzysztof Kosiński   // by exactly one Equivalence. Moreover, for the offset, the equivalence
140*a03ca8b9SKrzysztof Kosiński   // corresponds to the largest (pre-pruning) covering Equivalence, and in case
141*a03ca8b9SKrzysztof Kosiński   // of a tie, the Equivalence with minimal |src_offset|. |equivalences| may
142*a03ca8b9SKrzysztof Kosiński   // change in size since empty Equivalences are removed.
143*a03ca8b9SKrzysztof Kosiński   static void PruneEquivalencesAndSortBySource(
144*a03ca8b9SKrzysztof Kosiński       std::deque<Equivalence>* equivalences);
145*a03ca8b9SKrzysztof Kosiński 
146*a03ca8b9SKrzysztof Kosiński  private:
147*a03ca8b9SKrzysztof Kosiński   // |equivalences_| is pruned, i.e., no "old" blocks overlap (and no "new"
148*a03ca8b9SKrzysztof Kosiński   // block overlaps). Also, it is sorted by "old" offsets.
149*a03ca8b9SKrzysztof Kosiński   std::deque<Equivalence> equivalences_;
150*a03ca8b9SKrzysztof Kosiński   const offset_t old_image_size_;
151*a03ca8b9SKrzysztof Kosiński   const offset_t new_image_size_;
152*a03ca8b9SKrzysztof Kosiński };
153*a03ca8b9SKrzysztof Kosiński 
154*a03ca8b9SKrzysztof Kosiński // Container of equivalences between |old_image_index| and |new_image_index|,
155*a03ca8b9SKrzysztof Kosiński // sorted by |Equivalence::dst_offset|, only used during patch generation.
156*a03ca8b9SKrzysztof Kosiński class EquivalenceMap {
157*a03ca8b9SKrzysztof Kosiński  public:
158*a03ca8b9SKrzysztof Kosiński   using const_iterator = std::vector<EquivalenceCandidate>::const_iterator;
159*a03ca8b9SKrzysztof Kosiński 
160*a03ca8b9SKrzysztof Kosiński   EquivalenceMap();
161*a03ca8b9SKrzysztof Kosiński   // Initializes the object with |equivalences|.
162*a03ca8b9SKrzysztof Kosiński   explicit EquivalenceMap(std::vector<EquivalenceCandidate>&& candidates);
163*a03ca8b9SKrzysztof Kosiński   EquivalenceMap(EquivalenceMap&&);
164*a03ca8b9SKrzysztof Kosiński   EquivalenceMap(const EquivalenceMap&) = delete;
165*a03ca8b9SKrzysztof Kosiński   ~EquivalenceMap();
166*a03ca8b9SKrzysztof Kosiński 
167*a03ca8b9SKrzysztof Kosiński   // Finds relevant equivalences between |old_view| and |new_view|, using
168*a03ca8b9SKrzysztof Kosiński   // suffix array |old_sa| computed from |old_view| and using
169*a03ca8b9SKrzysztof Kosiński   // |targets_affinities| to evaluate similarity between references. This
170*a03ca8b9SKrzysztof Kosiński   // function is not symmetric. Equivalences might overlap in |old_view|, but
171*a03ca8b9SKrzysztof Kosiński   // not in |new_view|. It tries to maximize accumulated similarity within each
172*a03ca8b9SKrzysztof Kosiński   // equivalence, while maximizing |new_view| coverage. The minimum similarity
173*a03ca8b9SKrzysztof Kosiński   // of an equivalence is given by |min_similarity|.
174*a03ca8b9SKrzysztof Kosiński   void Build(const std::vector<offset_t>& old_sa,
175*a03ca8b9SKrzysztof Kosiński              const EncodedView& old_view,
176*a03ca8b9SKrzysztof Kosiński              const EncodedView& new_view,
177*a03ca8b9SKrzysztof Kosiński              const std::vector<TargetsAffinity>& targets_affinities,
178*a03ca8b9SKrzysztof Kosiński              double min_similarity);
179*a03ca8b9SKrzysztof Kosiński 
size()180*a03ca8b9SKrzysztof Kosiński   size_t size() const { return candidates_.size(); }
begin()181*a03ca8b9SKrzysztof Kosiński   const_iterator begin() const { return candidates_.begin(); }
end()182*a03ca8b9SKrzysztof Kosiński   const_iterator end() const { return candidates_.end(); }
183*a03ca8b9SKrzysztof Kosiński 
184*a03ca8b9SKrzysztof Kosiński  private:
185*a03ca8b9SKrzysztof Kosiński   // Discovers equivalence candidates between |old_view| and |new_view| and
186*a03ca8b9SKrzysztof Kosiński   // stores them in the object. Note that resulting candidates are not sorted
187*a03ca8b9SKrzysztof Kosiński   // and might be overlapping in new image.
188*a03ca8b9SKrzysztof Kosiński   void CreateCandidates(const std::vector<offset_t>& old_sa,
189*a03ca8b9SKrzysztof Kosiński                         const EncodedView& old_view,
190*a03ca8b9SKrzysztof Kosiński                         const EncodedView& new_view,
191*a03ca8b9SKrzysztof Kosiński                         const std::vector<TargetsAffinity>& targets_affinities,
192*a03ca8b9SKrzysztof Kosiński                         double min_similarity);
193*a03ca8b9SKrzysztof Kosiński   // Sorts candidates by their offset in new image.
194*a03ca8b9SKrzysztof Kosiński   void SortByDestination();
195*a03ca8b9SKrzysztof Kosiński   // Visits |candidates_| (sorted by |dst_offset|) and remove all destination
196*a03ca8b9SKrzysztof Kosiński   // overlaps. Candidates with low similarity scores are more likely to be
197*a03ca8b9SKrzysztof Kosiński   // shrunken. Unfit candidates may be removed.
198*a03ca8b9SKrzysztof Kosiński   void Prune(const EncodedView& old_view,
199*a03ca8b9SKrzysztof Kosiński              const EncodedView& new_view,
200*a03ca8b9SKrzysztof Kosiński              const std::vector<TargetsAffinity>& targets_affinities,
201*a03ca8b9SKrzysztof Kosiński              double min_similarity);
202*a03ca8b9SKrzysztof Kosiński 
203*a03ca8b9SKrzysztof Kosiński   std::vector<EquivalenceCandidate> candidates_;
204*a03ca8b9SKrzysztof Kosiński };
205*a03ca8b9SKrzysztof Kosiński 
206*a03ca8b9SKrzysztof Kosiński }  // namespace zucchini
207*a03ca8b9SKrzysztof Kosiński 
208*a03ca8b9SKrzysztof Kosiński #endif  // COMPONENTS_ZUCCHINI_EQUIVALENCE_MAP_H_
209