1*a03ca8b9SKrzysztof Kosiński // Copyright 2017 The Chromium Authors. All rights reserved. 2*a03ca8b9SKrzysztof Kosiński // Use of this source code is governed by a BSD-style license that can be 3*a03ca8b9SKrzysztof Kosiński // found in the LICENSE file. 4*a03ca8b9SKrzysztof Kosiński 5*a03ca8b9SKrzysztof Kosiński #ifndef COMPONENTS_ZUCCHINI_EQUIVALENCE_MAP_H_ 6*a03ca8b9SKrzysztof Kosiński #define COMPONENTS_ZUCCHINI_EQUIVALENCE_MAP_H_ 7*a03ca8b9SKrzysztof Kosiński 8*a03ca8b9SKrzysztof Kosiński #include <stddef.h> 9*a03ca8b9SKrzysztof Kosiński 10*a03ca8b9SKrzysztof Kosiński #include <deque> 11*a03ca8b9SKrzysztof Kosiński #include <limits> 12*a03ca8b9SKrzysztof Kosiński #include <vector> 13*a03ca8b9SKrzysztof Kosiński 14*a03ca8b9SKrzysztof Kosiński #include "components/zucchini/image_index.h" 15*a03ca8b9SKrzysztof Kosiński #include "components/zucchini/image_utils.h" 16*a03ca8b9SKrzysztof Kosiński #include "components/zucchini/targets_affinity.h" 17*a03ca8b9SKrzysztof Kosiński 18*a03ca8b9SKrzysztof Kosiński namespace zucchini { 19*a03ca8b9SKrzysztof Kosiński 20*a03ca8b9SKrzysztof Kosiński constexpr double kMismatchFatal = -std::numeric_limits<double>::infinity(); 21*a03ca8b9SKrzysztof Kosiński 22*a03ca8b9SKrzysztof Kosiński class EncodedView; 23*a03ca8b9SKrzysztof Kosiński class EquivalenceSource; 24*a03ca8b9SKrzysztof Kosiński 25*a03ca8b9SKrzysztof Kosiński // Returns similarity score between a token (raw byte or first byte of a 26*a03ca8b9SKrzysztof Kosiński // reference) in |old_image_index| at |src| and a token in |new_image_index| 27*a03ca8b9SKrzysztof Kosiński // at |dst|. |targets_affinities| describes affinities for each target pool and 28*a03ca8b9SKrzysztof Kosiński // is used to evaluate similarity between references, hence it's size must be 29*a03ca8b9SKrzysztof Kosiński // equal to the number of pools in both |old_image_index| and |new_image_index|. 30*a03ca8b9SKrzysztof Kosiński // Both |src| and |dst| must refer to tokens in |old_image_index| and 31*a03ca8b9SKrzysztof Kosiński // |new_image_index|. 32*a03ca8b9SKrzysztof Kosiński double GetTokenSimilarity( 33*a03ca8b9SKrzysztof Kosiński const ImageIndex& old_image_index, 34*a03ca8b9SKrzysztof Kosiński const ImageIndex& new_image_index, 35*a03ca8b9SKrzysztof Kosiński const std::vector<TargetsAffinity>& targets_affinities, 36*a03ca8b9SKrzysztof Kosiński offset_t src, 37*a03ca8b9SKrzysztof Kosiński offset_t dst); 38*a03ca8b9SKrzysztof Kosiński 39*a03ca8b9SKrzysztof Kosiński // Returns a similarity score between content in |old_image_index| and 40*a03ca8b9SKrzysztof Kosiński // |new_image_index| at regions described by |equivalence|, using 41*a03ca8b9SKrzysztof Kosiński // |targets_affinities| to evaluate similarity between references. 42*a03ca8b9SKrzysztof Kosiński double GetEquivalenceSimilarity( 43*a03ca8b9SKrzysztof Kosiński const ImageIndex& old_image_index, 44*a03ca8b9SKrzysztof Kosiński const ImageIndex& new_image_index, 45*a03ca8b9SKrzysztof Kosiński const std::vector<TargetsAffinity>& targets_affinities, 46*a03ca8b9SKrzysztof Kosiński const Equivalence& equivalence); 47*a03ca8b9SKrzysztof Kosiński 48*a03ca8b9SKrzysztof Kosiński // Extends |equivalence| forward and returns the result. This is related to 49*a03ca8b9SKrzysztof Kosiński // VisitEquivalenceSeed(). 50*a03ca8b9SKrzysztof Kosiński EquivalenceCandidate ExtendEquivalenceForward( 51*a03ca8b9SKrzysztof Kosiński const ImageIndex& old_image_index, 52*a03ca8b9SKrzysztof Kosiński const ImageIndex& new_image_index, 53*a03ca8b9SKrzysztof Kosiński const std::vector<TargetsAffinity>& targets_affinities, 54*a03ca8b9SKrzysztof Kosiński const EquivalenceCandidate& equivalence, 55*a03ca8b9SKrzysztof Kosiński double min_similarity); 56*a03ca8b9SKrzysztof Kosiński 57*a03ca8b9SKrzysztof Kosiński // Extends |equivalence| backward and returns the result. This is related to 58*a03ca8b9SKrzysztof Kosiński // VisitEquivalenceSeed(). 59*a03ca8b9SKrzysztof Kosiński EquivalenceCandidate ExtendEquivalenceBackward( 60*a03ca8b9SKrzysztof Kosiński const ImageIndex& old_image_index, 61*a03ca8b9SKrzysztof Kosiński const ImageIndex& new_image_index, 62*a03ca8b9SKrzysztof Kosiński const std::vector<TargetsAffinity>& targets_affinities, 63*a03ca8b9SKrzysztof Kosiński const EquivalenceCandidate& equivalence, 64*a03ca8b9SKrzysztof Kosiński double min_similarity); 65*a03ca8b9SKrzysztof Kosiński 66*a03ca8b9SKrzysztof Kosiński // Creates an equivalence, starting with |src| and |dst| as offset hint, and 67*a03ca8b9SKrzysztof Kosiński // extends it both forward and backward, trying to maximise similarity between 68*a03ca8b9SKrzysztof Kosiński // |old_image_index| and |new_image_index|, and returns the result. 69*a03ca8b9SKrzysztof Kosiński // |targets_affinities| is used to evaluate similarity between references. 70*a03ca8b9SKrzysztof Kosiński // |min_similarity| describes the minimum acceptable similarity score and is 71*a03ca8b9SKrzysztof Kosiński // used as threshold to discard bad equivalences. 72*a03ca8b9SKrzysztof Kosiński EquivalenceCandidate VisitEquivalenceSeed( 73*a03ca8b9SKrzysztof Kosiński const ImageIndex& old_image_index, 74*a03ca8b9SKrzysztof Kosiński const ImageIndex& new_image_index, 75*a03ca8b9SKrzysztof Kosiński const std::vector<TargetsAffinity>& targets_affinities, 76*a03ca8b9SKrzysztof Kosiński offset_t src, 77*a03ca8b9SKrzysztof Kosiński offset_t dst, 78*a03ca8b9SKrzysztof Kosiński double min_similarity); 79*a03ca8b9SKrzysztof Kosiński 80*a03ca8b9SKrzysztof Kosiński // Container of pruned equivalences used to map offsets from |old_image| to 81*a03ca8b9SKrzysztof Kosiński // offsets in |new_image|. Equivalences are pruned by cropping smaller 82*a03ca8b9SKrzysztof Kosiński // equivalences to avoid overlaps, to make the equivalence map (for covered 83*a03ca8b9SKrzysztof Kosiński // bytes in |old_image| and |new_image|) one-to-one. 84*a03ca8b9SKrzysztof Kosiński class OffsetMapper { 85*a03ca8b9SKrzysztof Kosiński public: 86*a03ca8b9SKrzysztof Kosiński using const_iterator = std::deque<Equivalence>::const_iterator; 87*a03ca8b9SKrzysztof Kosiński 88*a03ca8b9SKrzysztof Kosiński // Constructors for various data sources. "Old" and "new" image sizes are 89*a03ca8b9SKrzysztof Kosiński // needed for bounds checks and to handle dangling targets. 90*a03ca8b9SKrzysztof Kosiński // - From a list of |equivalences|, already sorted (by |src_offset|) and 91*a03ca8b9SKrzysztof Kosiński // pruned, useful for tests. 92*a03ca8b9SKrzysztof Kosiński OffsetMapper(std::deque<Equivalence>&& equivalences, 93*a03ca8b9SKrzysztof Kosiński offset_t old_image_size, 94*a03ca8b9SKrzysztof Kosiński offset_t new_image_size); 95*a03ca8b9SKrzysztof Kosiński // - From a generator, useful for Zucchini-apply. 96*a03ca8b9SKrzysztof Kosiński OffsetMapper(EquivalenceSource&& equivalence_source, 97*a03ca8b9SKrzysztof Kosiński offset_t old_image_size, 98*a03ca8b9SKrzysztof Kosiński offset_t new_image_size); 99*a03ca8b9SKrzysztof Kosiński // - From an EquivalenceMap that needs to be processed, useful for 100*a03ca8b9SKrzysztof Kosiński // Zucchini-gen. 101*a03ca8b9SKrzysztof Kosiński OffsetMapper(const EquivalenceMap& equivalence_map, 102*a03ca8b9SKrzysztof Kosiński offset_t old_image_size, 103*a03ca8b9SKrzysztof Kosiński offset_t new_image_size); 104*a03ca8b9SKrzysztof Kosiński ~OffsetMapper(); 105*a03ca8b9SKrzysztof Kosiński size()106*a03ca8b9SKrzysztof Kosiński size_t size() const { return equivalences_.size(); } begin()107*a03ca8b9SKrzysztof Kosiński const_iterator begin() const { return equivalences_.begin(); } end()108*a03ca8b9SKrzysztof Kosiński const_iterator end() const { return equivalences_.end(); } 109*a03ca8b9SKrzysztof Kosiński 110*a03ca8b9SKrzysztof Kosiński // Returns naive extended forward-projection of "old" |offset| that follows 111*a03ca8b9SKrzysztof Kosiński // |eq|'s delta. |eq| needs not cover |offset|. 112*a03ca8b9SKrzysztof Kosiński // - Averts underflow / overflow by clamping to |[0, new_image_size_)|. 113*a03ca8b9SKrzysztof Kosiński // - However, |offset| is *not* restricted to |[0, old_image_size_)|; the 114*a03ca8b9SKrzysztof Kosiński // caller must to make the check (hence "naive"). 115*a03ca8b9SKrzysztof Kosiński offset_t NaiveExtendedForwardProject(const Equivalence& unit, 116*a03ca8b9SKrzysztof Kosiński offset_t offset) const; 117*a03ca8b9SKrzysztof Kosiński 118*a03ca8b9SKrzysztof Kosiński // Returns an offset in |new_image| corresponding to |offset| in |old_image|. 119*a03ca8b9SKrzysztof Kosiński // Assumes |equivalences_| to be non-empty. Cases: 120*a03ca8b9SKrzysztof Kosiński // - If |offset| is covered (i.e., in an "old" block), then use the delta of 121*a03ca8b9SKrzysztof Kosiński // the (unique) equivalence unit that covers |offset|. 122*a03ca8b9SKrzysztof Kosiński // - If |offset| is non-covered, but in |[0, old_image_size_)|, then find the 123*a03ca8b9SKrzysztof Kosiński // nearest "old" block, use its delta, and avert underflow / overflow by 124*a03ca8b9SKrzysztof Kosiński // clamping the result to |[0, new_image_size_)|. 125*a03ca8b9SKrzysztof Kosiński // - If |offset| is >= |new_image_size_| (a "fake offset"), then use 126*a03ca8b9SKrzysztof Kosiński // |new_image_size_ - old_image_size_| as the delta. 127*a03ca8b9SKrzysztof Kosiński offset_t ExtendedForwardProject(offset_t offset) const; 128*a03ca8b9SKrzysztof Kosiński 129*a03ca8b9SKrzysztof Kosiński // Given sorted |offsets|, applies a projection in-place of all offsets that 130*a03ca8b9SKrzysztof Kosiński // are part of a pruned equivalence from |old_image| to |new_image|. Other 131*a03ca8b9SKrzysztof Kosiński // offsets are removed from |offsets|. 132*a03ca8b9SKrzysztof Kosiński void ForwardProjectAll(std::deque<offset_t>* offsets) const; 133*a03ca8b9SKrzysztof Kosiński 134*a03ca8b9SKrzysztof Kosiński // Accessor for testing. equivalences()135*a03ca8b9SKrzysztof Kosiński const std::deque<Equivalence> equivalences() const { return equivalences_; } 136*a03ca8b9SKrzysztof Kosiński 137*a03ca8b9SKrzysztof Kosiński // Sorts |equivalences| by |src_offset| and removes all source overlaps; so a 138*a03ca8b9SKrzysztof Kosiński // source location that was covered by some Equivalence would become covered 139*a03ca8b9SKrzysztof Kosiński // by exactly one Equivalence. Moreover, for the offset, the equivalence 140*a03ca8b9SKrzysztof Kosiński // corresponds to the largest (pre-pruning) covering Equivalence, and in case 141*a03ca8b9SKrzysztof Kosiński // of a tie, the Equivalence with minimal |src_offset|. |equivalences| may 142*a03ca8b9SKrzysztof Kosiński // change in size since empty Equivalences are removed. 143*a03ca8b9SKrzysztof Kosiński static void PruneEquivalencesAndSortBySource( 144*a03ca8b9SKrzysztof Kosiński std::deque<Equivalence>* equivalences); 145*a03ca8b9SKrzysztof Kosiński 146*a03ca8b9SKrzysztof Kosiński private: 147*a03ca8b9SKrzysztof Kosiński // |equivalences_| is pruned, i.e., no "old" blocks overlap (and no "new" 148*a03ca8b9SKrzysztof Kosiński // block overlaps). Also, it is sorted by "old" offsets. 149*a03ca8b9SKrzysztof Kosiński std::deque<Equivalence> equivalences_; 150*a03ca8b9SKrzysztof Kosiński const offset_t old_image_size_; 151*a03ca8b9SKrzysztof Kosiński const offset_t new_image_size_; 152*a03ca8b9SKrzysztof Kosiński }; 153*a03ca8b9SKrzysztof Kosiński 154*a03ca8b9SKrzysztof Kosiński // Container of equivalences between |old_image_index| and |new_image_index|, 155*a03ca8b9SKrzysztof Kosiński // sorted by |Equivalence::dst_offset|, only used during patch generation. 156*a03ca8b9SKrzysztof Kosiński class EquivalenceMap { 157*a03ca8b9SKrzysztof Kosiński public: 158*a03ca8b9SKrzysztof Kosiński using const_iterator = std::vector<EquivalenceCandidate>::const_iterator; 159*a03ca8b9SKrzysztof Kosiński 160*a03ca8b9SKrzysztof Kosiński EquivalenceMap(); 161*a03ca8b9SKrzysztof Kosiński // Initializes the object with |equivalences|. 162*a03ca8b9SKrzysztof Kosiński explicit EquivalenceMap(std::vector<EquivalenceCandidate>&& candidates); 163*a03ca8b9SKrzysztof Kosiński EquivalenceMap(EquivalenceMap&&); 164*a03ca8b9SKrzysztof Kosiński EquivalenceMap(const EquivalenceMap&) = delete; 165*a03ca8b9SKrzysztof Kosiński ~EquivalenceMap(); 166*a03ca8b9SKrzysztof Kosiński 167*a03ca8b9SKrzysztof Kosiński // Finds relevant equivalences between |old_view| and |new_view|, using 168*a03ca8b9SKrzysztof Kosiński // suffix array |old_sa| computed from |old_view| and using 169*a03ca8b9SKrzysztof Kosiński // |targets_affinities| to evaluate similarity between references. This 170*a03ca8b9SKrzysztof Kosiński // function is not symmetric. Equivalences might overlap in |old_view|, but 171*a03ca8b9SKrzysztof Kosiński // not in |new_view|. It tries to maximize accumulated similarity within each 172*a03ca8b9SKrzysztof Kosiński // equivalence, while maximizing |new_view| coverage. The minimum similarity 173*a03ca8b9SKrzysztof Kosiński // of an equivalence is given by |min_similarity|. 174*a03ca8b9SKrzysztof Kosiński void Build(const std::vector<offset_t>& old_sa, 175*a03ca8b9SKrzysztof Kosiński const EncodedView& old_view, 176*a03ca8b9SKrzysztof Kosiński const EncodedView& new_view, 177*a03ca8b9SKrzysztof Kosiński const std::vector<TargetsAffinity>& targets_affinities, 178*a03ca8b9SKrzysztof Kosiński double min_similarity); 179*a03ca8b9SKrzysztof Kosiński size()180*a03ca8b9SKrzysztof Kosiński size_t size() const { return candidates_.size(); } begin()181*a03ca8b9SKrzysztof Kosiński const_iterator begin() const { return candidates_.begin(); } end()182*a03ca8b9SKrzysztof Kosiński const_iterator end() const { return candidates_.end(); } 183*a03ca8b9SKrzysztof Kosiński 184*a03ca8b9SKrzysztof Kosiński private: 185*a03ca8b9SKrzysztof Kosiński // Discovers equivalence candidates between |old_view| and |new_view| and 186*a03ca8b9SKrzysztof Kosiński // stores them in the object. Note that resulting candidates are not sorted 187*a03ca8b9SKrzysztof Kosiński // and might be overlapping in new image. 188*a03ca8b9SKrzysztof Kosiński void CreateCandidates(const std::vector<offset_t>& old_sa, 189*a03ca8b9SKrzysztof Kosiński const EncodedView& old_view, 190*a03ca8b9SKrzysztof Kosiński const EncodedView& new_view, 191*a03ca8b9SKrzysztof Kosiński const std::vector<TargetsAffinity>& targets_affinities, 192*a03ca8b9SKrzysztof Kosiński double min_similarity); 193*a03ca8b9SKrzysztof Kosiński // Sorts candidates by their offset in new image. 194*a03ca8b9SKrzysztof Kosiński void SortByDestination(); 195*a03ca8b9SKrzysztof Kosiński // Visits |candidates_| (sorted by |dst_offset|) and remove all destination 196*a03ca8b9SKrzysztof Kosiński // overlaps. Candidates with low similarity scores are more likely to be 197*a03ca8b9SKrzysztof Kosiński // shrunken. Unfit candidates may be removed. 198*a03ca8b9SKrzysztof Kosiński void Prune(const EncodedView& old_view, 199*a03ca8b9SKrzysztof Kosiński const EncodedView& new_view, 200*a03ca8b9SKrzysztof Kosiński const std::vector<TargetsAffinity>& targets_affinities, 201*a03ca8b9SKrzysztof Kosiński double min_similarity); 202*a03ca8b9SKrzysztof Kosiński 203*a03ca8b9SKrzysztof Kosiński std::vector<EquivalenceCandidate> candidates_; 204*a03ca8b9SKrzysztof Kosiński }; 205*a03ca8b9SKrzysztof Kosiński 206*a03ca8b9SKrzysztof Kosiński } // namespace zucchini 207*a03ca8b9SKrzysztof Kosiński 208*a03ca8b9SKrzysztof Kosiński #endif // COMPONENTS_ZUCCHINI_EQUIVALENCE_MAP_H_ 209