xref: /aosp_15_r20/external/zucchini/equivalence_map_unittest.cc (revision a03ca8b91e029cd15055c20c78c2e087c84792e4)
1 // Copyright 2017 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "components/zucchini/equivalence_map.h"
6 
7 #include <cstring>
8 #include <deque>
9 #include <map>
10 #include <string>
11 #include <utility>
12 #include <vector>
13 
14 #include "components/zucchini/encoded_view.h"
15 #include "components/zucchini/image_index.h"
16 #include "components/zucchini/suffix_array.h"
17 #include "components/zucchini/targets_affinity.h"
18 #include "components/zucchini/test_disassembler.h"
19 #include "testing/gtest/include/gtest/gtest.h"
20 
21 namespace zucchini {
22 
23 namespace {
24 
25 using OffsetDeque = std::deque<offset_t>;
26 
27 // Make all references 2 bytes long.
28 constexpr offset_t kReferenceSize = 2;
29 
30 // Creates and initialize an ImageIndex from |a| and with 2 types of references.
31 // The result is populated with |refs0| and |refs1|. |a| is expected to be a
32 // string literal valid for the lifetime of the object.
MakeImageIndexForTesting(const char * a,std::vector<Reference> && refs0,std::vector<Reference> && refs1)33 ImageIndex MakeImageIndexForTesting(const char* a,
34                                     std::vector<Reference>&& refs0,
35                                     std::vector<Reference>&& refs1) {
36   TestDisassembler disasm(
37       {kReferenceSize, TypeTag(0), PoolTag(0)}, std::move(refs0),
38       {kReferenceSize, TypeTag(1), PoolTag(0)}, std::move(refs1),
39       {kReferenceSize, TypeTag(2), PoolTag(1)}, {});
40 
41   ImageIndex image_index(
42       ConstBufferView(reinterpret_cast<const uint8_t*>(a), std::strlen(a)));
43 
44   EXPECT_TRUE(image_index.Initialize(&disasm));
45   return image_index;
46 }
47 
MakeTargetsAffinitiesForTesting(const ImageIndex & old_image_index,const ImageIndex & new_image_index,const EquivalenceMap & equivalence_map)48 std::vector<TargetsAffinity> MakeTargetsAffinitiesForTesting(
49     const ImageIndex& old_image_index,
50     const ImageIndex& new_image_index,
51     const EquivalenceMap& equivalence_map) {
52   std::vector<TargetsAffinity> target_affinities(old_image_index.PoolCount());
53   for (const auto& old_pool_tag_and_targets : old_image_index.target_pools()) {
54     PoolTag pool_tag = old_pool_tag_and_targets.first;
55     target_affinities[pool_tag.value()].InferFromSimilarities(
56         equivalence_map, old_pool_tag_and_targets.second.targets(),
57         new_image_index.pool(pool_tag).targets());
58   }
59   return target_affinities;
60 }
61 
62 }  // namespace
63 
TEST(EquivalenceMapTest,GetTokenSimilarity)64 TEST(EquivalenceMapTest, GetTokenSimilarity) {
65   ImageIndex old_index = MakeImageIndexForTesting(
66       "ab1122334455", {{2, 0}, {4, 1}, {6, 2}, {8, 2}}, {{10, 3}});
67   // Note: {4, 1} -> {6, 3} and {6, 2} -> {4, 1}, then result is sorted.
68   ImageIndex new_index = MakeImageIndexForTesting(
69       "a11b33224455", {{1, 0}, {4, 1}, {6, 3}, {8, 1}}, {{10, 2}});
70   std::vector<TargetsAffinity> affinities = MakeTargetsAffinitiesForTesting(
71       old_index, new_index,
72       EquivalenceMap({{{0, 0, 1}, 1.0}, {{1, 3, 1}, 1.0}}));
73 
74   // Raw match.
75   EXPECT_LT(0.0, GetTokenSimilarity(old_index, new_index, affinities, 0, 0));
76   // Raw mismatch.
77   EXPECT_GT(0.0, GetTokenSimilarity(old_index, new_index, affinities, 0, 1));
78   EXPECT_GT(0.0, GetTokenSimilarity(old_index, new_index, affinities, 1, 0));
79 
80   // Type mismatch.
81   EXPECT_EQ(kMismatchFatal,
82             GetTokenSimilarity(old_index, new_index, affinities, 0, 1));
83   EXPECT_EQ(kMismatchFatal,
84             GetTokenSimilarity(old_index, new_index, affinities, 2, 0));
85   EXPECT_EQ(kMismatchFatal,
86             GetTokenSimilarity(old_index, new_index, affinities, 2, 10));
87   EXPECT_EQ(kMismatchFatal,
88             GetTokenSimilarity(old_index, new_index, affinities, 10, 1));
89 
90   // Reference strong match.
91   EXPECT_LT(0.0, GetTokenSimilarity(old_index, new_index, affinities, 2, 1));
92   EXPECT_LT(0.0, GetTokenSimilarity(old_index, new_index, affinities, 4, 6));
93 
94   // Reference weak match.
95   EXPECT_LT(0.0, GetTokenSimilarity(old_index, new_index, affinities, 6, 4));
96   EXPECT_LT(0.0, GetTokenSimilarity(old_index, new_index, affinities, 6, 8));
97   EXPECT_LT(0.0, GetTokenSimilarity(old_index, new_index, affinities, 8, 4));
98 
99   // Weak match is not greater than strong match.
100   EXPECT_LE(GetTokenSimilarity(old_index, new_index, affinities, 6, 4),
101             GetTokenSimilarity(old_index, new_index, affinities, 2, 1));
102 
103   // Reference mismatch.
104   EXPECT_GT(0.0, GetTokenSimilarity(old_index, new_index, affinities, 2, 4));
105   EXPECT_GT(0.0, GetTokenSimilarity(old_index, new_index, affinities, 2, 6));
106 }
107 
TEST(EquivalenceMapTest,GetEquivalenceSimilarity)108 TEST(EquivalenceMapTest, GetEquivalenceSimilarity) {
109   ImageIndex image_index =
110       MakeImageIndexForTesting("abcdef1122", {{6, 0}}, {{8, 1}});
111   std::vector<TargetsAffinity> affinities =
112       MakeTargetsAffinitiesForTesting(image_index, image_index, {});
113 
114   // Sanity check. These are no-op with length-0 equivalences.
115   EXPECT_EQ(0.0, GetEquivalenceSimilarity(image_index, image_index, affinities,
116                                           {0, 0, 0}));
117   EXPECT_EQ(0.0, GetEquivalenceSimilarity(image_index, image_index, affinities,
118                                           {0, 3, 0}));
119   EXPECT_EQ(0.0, GetEquivalenceSimilarity(image_index, image_index, affinities,
120                                           {3, 0, 0}));
121 
122   // Now examine larger equivalences.
123   EXPECT_LT(0.0, GetEquivalenceSimilarity(image_index, image_index, affinities,
124                                           {0, 0, 3}));
125   EXPECT_GE(0.0, GetEquivalenceSimilarity(image_index, image_index, affinities,
126                                           {0, 3, 3}));
127   EXPECT_GE(0.0, GetEquivalenceSimilarity(image_index, image_index, affinities,
128                                           {3, 0, 3}));
129 
130   EXPECT_LT(0.0, GetEquivalenceSimilarity(image_index, image_index, affinities,
131                                           {6, 6, 4}));
132 }
133 
TEST(EquivalenceMapTest,ExtendEquivalenceForward)134 TEST(EquivalenceMapTest, ExtendEquivalenceForward) {
135   auto test_extend_forward =
136       [](const ImageIndex old_index, const ImageIndex new_index,
137          const EquivalenceCandidate& equivalence, double base_similarity) {
138         return ExtendEquivalenceForward(
139                    old_index, new_index,
140                    MakeTargetsAffinitiesForTesting(old_index, new_index, {}),
141                    equivalence, base_similarity)
142             .eq;
143       };
144 
145   EXPECT_EQ(Equivalence({0, 0, 0}),
146             test_extend_forward(MakeImageIndexForTesting("", {}, {}),
147                                 MakeImageIndexForTesting("", {}, {}),
148                                 {{0, 0, 0}, 0.0}, 8.0));
149 
150   EXPECT_EQ(Equivalence({0, 0, 0}),
151             test_extend_forward(MakeImageIndexForTesting("banana", {}, {}),
152                                 MakeImageIndexForTesting("zzzz", {}, {}),
153                                 {{0, 0, 0}, 0.0}, 8.0));
154 
155   EXPECT_EQ(Equivalence({0, 0, 6}),
156             test_extend_forward(MakeImageIndexForTesting("banana", {}, {}),
157                                 MakeImageIndexForTesting("banana", {}, {}),
158                                 {{0, 0, 0}, 0.0}, 8.0));
159 
160   EXPECT_EQ(Equivalence({2, 2, 4}),
161             test_extend_forward(MakeImageIndexForTesting("banana", {}, {}),
162                                 MakeImageIndexForTesting("banana", {}, {}),
163                                 {{2, 2, 0}, 0.0}, 8.0));
164 
165   EXPECT_EQ(Equivalence({0, 0, 6}),
166             test_extend_forward(MakeImageIndexForTesting("bananaxx", {}, {}),
167                                 MakeImageIndexForTesting("bananayy", {}, {}),
168                                 {{0, 0, 0}, 0.0}, 8.0));
169 
170   EXPECT_EQ(
171       Equivalence({0, 0, 8}),
172       test_extend_forward(MakeImageIndexForTesting("banana11", {{6, 0}}, {}),
173                           MakeImageIndexForTesting("banana11", {{6, 0}}, {}),
174                           {{0, 0, 0}, 0.0}, 8.0));
175 
176   EXPECT_EQ(
177       Equivalence({0, 0, 6}),
178       test_extend_forward(MakeImageIndexForTesting("banana11", {{6, 0}}, {}),
179                           MakeImageIndexForTesting("banana22", {}, {{6, 0}}),
180                           {{0, 0, 0}, 0.0}, 8.0));
181 
182   EXPECT_EQ(
183       Equivalence({0, 0, 17}),
184       test_extend_forward(MakeImageIndexForTesting("bananaxxpineapple", {}, {}),
185                           MakeImageIndexForTesting("bananayypineapple", {}, {}),
186                           {{0, 0, 0}, 0.0}, 8.0));
187 
188   EXPECT_EQ(
189       Equivalence({3, 0, 19}),
190       test_extend_forward(
191           MakeImageIndexForTesting("foobanana11xxpineapplexx", {{9, 0}}, {}),
192           MakeImageIndexForTesting("banana11yypineappleyy", {{6, 0}}, {}),
193           {{3, 0, 0}, 0.0}, 8.0));
194 }
195 
TEST(EquivalenceMapTest,ExtendEquivalenceBackward)196 TEST(EquivalenceMapTest, ExtendEquivalenceBackward) {
197   auto test_extend_backward =
198       [](const ImageIndex old_index, const ImageIndex new_index,
199          const EquivalenceCandidate& equivalence, double base_similarity) {
200         return ExtendEquivalenceBackward(
201                    old_index, new_index,
202                    MakeTargetsAffinitiesForTesting(old_index, new_index, {}),
203                    equivalence, base_similarity)
204             .eq;
205       };
206 
207   EXPECT_EQ(Equivalence({0, 0, 0}),
208             test_extend_backward(MakeImageIndexForTesting("", {}, {}),
209                                  MakeImageIndexForTesting("", {}, {}),
210                                  {{0, 0, 0}, 0.0}, 8.0));
211 
212   EXPECT_EQ(Equivalence({6, 4, 0}),
213             test_extend_backward(MakeImageIndexForTesting("banana", {}, {}),
214                                  MakeImageIndexForTesting("zzzz", {}, {}),
215                                  {{6, 4, 0}, 0.0}, 8.0));
216 
217   EXPECT_EQ(Equivalence({0, 0, 6}),
218             test_extend_backward(MakeImageIndexForTesting("banana", {}, {}),
219                                  MakeImageIndexForTesting("banana", {}, {}),
220                                  {{6, 6, 0}, 0.0}, 8.0));
221 
222   EXPECT_EQ(Equivalence({2, 2, 6}),
223             test_extend_backward(MakeImageIndexForTesting("xxbanana", {}, {}),
224                                  MakeImageIndexForTesting("yybanana", {}, {}),
225                                  {{8, 8, 0}, 0.0}, 8.0));
226 
227   EXPECT_EQ(
228       Equivalence({0, 0, 8}),
229       test_extend_backward(MakeImageIndexForTesting("11banana", {{0, 0}}, {}),
230                            MakeImageIndexForTesting("11banana", {{0, 0}}, {}),
231                            {{8, 8, 0}, 0.0}, 8.0));
232 
233   EXPECT_EQ(
234       Equivalence({2, 2, 6}),
235       test_extend_backward(MakeImageIndexForTesting("11banana", {{0, 0}}, {}),
236                            MakeImageIndexForTesting("22banana", {}, {{0, 0}}),
237                            {{8, 8, 0}, 0.0}, 8.0));
238 
239   EXPECT_EQ(Equivalence({0, 0, 17}),
240             test_extend_backward(
241                 MakeImageIndexForTesting("bananaxxpineapple", {}, {}),
242                 MakeImageIndexForTesting("bananayypineapple", {}, {}),
243                 {{8, 8, 9}, 9.0}, 8.0));
244 
245   EXPECT_EQ(
246       Equivalence({3, 0, 19}),
247       test_extend_backward(
248           MakeImageIndexForTesting("foobanana11xxpineapplexx", {{9, 0}}, {}),
249           MakeImageIndexForTesting("banana11yypineappleyy", {{6, 0}}, {}),
250           {{22, 19, 0}, 0.0}, 8.0));
251 }
252 
TEST(EquivalenceMapTest,PruneEquivalencesAndSortBySource)253 TEST(EquivalenceMapTest, PruneEquivalencesAndSortBySource) {
254   auto PruneEquivalencesAndSortBySourceTest =
255       [](std::deque<Equivalence>&& equivalences) {
256         OffsetMapper::PruneEquivalencesAndSortBySource(&equivalences);
257         return std::move(equivalences);
258       };
259 
260   EXPECT_EQ(std::deque<Equivalence>(),
261             PruneEquivalencesAndSortBySourceTest({}));
262   EXPECT_EQ(std::deque<Equivalence>({{0, 10, 1}}),
263             PruneEquivalencesAndSortBySourceTest({{0, 10, 1}}));
264   EXPECT_EQ(std::deque<Equivalence>(),
265             PruneEquivalencesAndSortBySourceTest({{0, 10, 0}}));
266   EXPECT_EQ(std::deque<Equivalence>({{0, 10, 1}, {1, 11, 1}}),
267             PruneEquivalencesAndSortBySourceTest({{0, 10, 1}, {1, 11, 1}}));
268   EXPECT_EQ(std::deque<Equivalence>({{0, 10, 2}, {2, 13, 1}}),
269             PruneEquivalencesAndSortBySourceTest({{0, 10, 2}, {1, 12, 2}}));
270   EXPECT_EQ(std::deque<Equivalence>({{0, 10, 2}}),
271             PruneEquivalencesAndSortBySourceTest({{0, 10, 2}, {1, 12, 1}}));
272   EXPECT_EQ(std::deque<Equivalence>({{0, 10, 2}, {2, 14, 1}}),
273             PruneEquivalencesAndSortBySourceTest({{0, 10, 2}, {1, 13, 2}}));
274   EXPECT_EQ(std::deque<Equivalence>({{0, 10, 1}, {1, 12, 3}}),
275             PruneEquivalencesAndSortBySourceTest({{0, 10, 2}, {1, 12, 3}}));
276   EXPECT_EQ(std::deque<Equivalence>({{0, 10, 3}, {3, 16, 2}}),
277             PruneEquivalencesAndSortBySourceTest(
278                 {{0, 10, 3}, {1, 13, 3}, {3, 16, 2}}));  // Pruning is greedy
279 
280   // Consider following pattern that may cause O(n^2) behavior if not handled
281   // properly.
282   //  ***************
283   //            **********
284   //             ********
285   //              ******
286   //               ****
287   //                **
288   //                 ***************
289   // This test case makes sure the function does not stall on a large instance
290   // of this pattern.
291   EXPECT_EQ(std::deque<Equivalence>({{0, 10, +300000}, {300000, 30, +300000}}),
292             PruneEquivalencesAndSortBySourceTest([] {
293               std::deque<Equivalence> equivalenses;
294               equivalenses.push_back({0, 10, +300000});
295               for (offset_t i = 0; i < 100000; ++i)
296                 equivalenses.push_back({200000 + i, 20, +200000 - 2 * i});
297               equivalenses.push_back({300000, 30, +300000});
298               return equivalenses;
299             }()));
300 }
301 
TEST(EquivalenceMapTest,NaiveExtendedForwardProject)302 TEST(EquivalenceMapTest, NaiveExtendedForwardProject) {
303   constexpr size_t kOldImageSize = 1000U;
304   constexpr size_t kNewImageSize = 1000U;
305   OffsetMapper offset_mapper(std::deque<Equivalence>(), kOldImageSize,
306                              kNewImageSize);
307 
308   // Convenience function to declutter.
309   auto project = [&offset_mapper](const Equivalence& eq, offset_t offset) {
310     return offset_mapper.NaiveExtendedForwardProject(eq, offset);
311   };
312 
313   // Equivalence with delta = 0.
314   Equivalence eq_stay = {10, 10, +5};  // [10,15) -> [10,15).
315   for (offset_t offset = 0U; offset < 1000U; ++offset) {
316     EXPECT_EQ(offset, project(eq_stay, offset));
317   }
318   // Saturate since result would overflow "new".
319   EXPECT_EQ(999U, project(eq_stay, 1000U));
320   EXPECT_EQ(999U, project(eq_stay, 2000U));
321   EXPECT_EQ(999U, project(eq_stay, kOffsetBound - 1));
322 
323   // Equivalence with delta = -10.
324   Equivalence eq_dec = {20, 10, +12};  // [20,32) --> [10,22).
325   // Offsets in "old" block.
326   EXPECT_EQ(10U, project(eq_dec, 20U));
327   EXPECT_EQ(11U, project(eq_dec, 21U));
328   EXPECT_EQ(21U, project(eq_dec, 31U));
329   // Offsets before "old" block, no underflow
330   EXPECT_EQ(9U, project(eq_dec, 19U));
331   EXPECT_EQ(1U, project(eq_dec, 11U));
332   EXPECT_EQ(0U, project(eq_dec, 10U));
333   // Offsets before "old" block, underflow (possible since delta < 0).
334   EXPECT_EQ(0U, project(eq_dec, 9U));
335   EXPECT_EQ(0U, project(eq_dec, 5U));
336   EXPECT_EQ(0U, project(eq_dec, 0U));
337   // Offsets after "old" block, no overflow.
338   EXPECT_EQ(20U, project(eq_dec, 30U));
339   EXPECT_EQ(64U, project(eq_dec, 74U));
340   EXPECT_EQ(90U, project(eq_dec, 100U));
341   EXPECT_EQ(490U, project(eq_dec, 500U));
342   EXPECT_EQ(999U, project(eq_dec, 1009U));
343   // Offsets after "old" block, overflow.
344   EXPECT_EQ(999U, project(eq_dec, 1010U));
345   EXPECT_EQ(999U, project(eq_dec, 2000U));
346   EXPECT_EQ(999U, project(eq_dec, kOffsetBound - 1));
347 
348   // Equivalence with delta = +10.
349   Equivalence eq_inc = {7, 17, +80};  // [7,87) --> [17,97).
350   // Offsets in "old" block.
351   EXPECT_EQ(17U, project(eq_inc, 7U));
352   EXPECT_EQ(60U, project(eq_inc, 50U));
353   EXPECT_EQ(96U, project(eq_inc, 86U));
354   // Offsets before "old" block, underflow impossible since delta >= 0.
355   EXPECT_EQ(16U, project(eq_inc, 6U));
356   EXPECT_EQ(10U, project(eq_inc, 0U));
357   // Offsets after "old" block, no overflow.
358   EXPECT_EQ(97U, project(eq_inc, 87U));
359   EXPECT_EQ(510U, project(eq_inc, 500U));
360   EXPECT_EQ(999U, project(eq_inc, 989U));
361   // Offsets after "old" block, overflow.
362   EXPECT_EQ(999U, project(eq_inc, 990U));
363   EXPECT_EQ(999U, project(eq_inc, 2000U));
364   EXPECT_EQ(999U, project(eq_inc, kOffsetBound - 1));
365 }
366 
TEST(EquivalenceMapTest,ExtendedForwardProject)367 TEST(EquivalenceMapTest, ExtendedForwardProject) {
368   // EquivalenceMaps provided must be sorted by "old" offset, and pruned.
369   // [0,2) --> [10,12), [2,3) --> [13,14), [4,6) --> [16,18).
370   OffsetMapper offset_mapper1({{0, 10, +2}, {2, 13, +1}, {4, 16, +2}}, 20U,
371                               25U);
372   EXPECT_EQ(10U, offset_mapper1.ExtendedForwardProject(0U));
373   EXPECT_EQ(11U, offset_mapper1.ExtendedForwardProject(1U));
374   EXPECT_EQ(13U, offset_mapper1.ExtendedForwardProject(2U));
375   EXPECT_EQ(14U, offset_mapper1.ExtendedForwardProject(3U));  // Previous equiv.
376   EXPECT_EQ(16U, offset_mapper1.ExtendedForwardProject(4U));
377   EXPECT_EQ(17U, offset_mapper1.ExtendedForwardProject(5U));
378   EXPECT_EQ(18U, offset_mapper1.ExtendedForwardProject(6U));  // Previous equiv.
379   // Fake offsets.
380   EXPECT_EQ(25U, offset_mapper1.ExtendedForwardProject(20U));
381   EXPECT_EQ(26U, offset_mapper1.ExtendedForwardProject(21U));
382   EXPECT_EQ(1005U, offset_mapper1.ExtendedForwardProject(1000U));
383   EXPECT_EQ(kOffsetBound - 1,
384             offset_mapper1.ExtendedForwardProject(kOffsetBound - 1));
385 
386   // [0,2) --> [10,12), [13,14) --> [2,3), [16,18) --> [4,6).
387   OffsetMapper offset_mapper2({{0, 10, +2}, {13, 2, +1}, {16, 4, +2}}, 25U,
388                               20U);
389   EXPECT_EQ(10U, offset_mapper2.ExtendedForwardProject(0U));
390   EXPECT_EQ(11U, offset_mapper2.ExtendedForwardProject(1U));
391   EXPECT_EQ(2U, offset_mapper2.ExtendedForwardProject(13U));
392   EXPECT_EQ(3U, offset_mapper2.ExtendedForwardProject(14U));  // Previous equiv.
393   EXPECT_EQ(4U, offset_mapper2.ExtendedForwardProject(16U));
394   EXPECT_EQ(5U, offset_mapper2.ExtendedForwardProject(17U));
395   EXPECT_EQ(6U, offset_mapper2.ExtendedForwardProject(18U));  // Previous equiv.
396   // Fake offsets.
397   EXPECT_EQ(20U, offset_mapper2.ExtendedForwardProject(25U));
398   EXPECT_EQ(21U, offset_mapper2.ExtendedForwardProject(26U));
399   EXPECT_EQ(995U, offset_mapper2.ExtendedForwardProject(1000U));
400   EXPECT_EQ(kOffsetBound - 1 - 5,
401             offset_mapper2.ExtendedForwardProject(kOffsetBound - 1));
402 }
403 
TEST(EquivalenceMapTest,ExtendedForwardProjectEncoding)404 TEST(EquivalenceMapTest, ExtendedForwardProjectEncoding) {
405   // Tests OffsetMapper::ExtendedForwardProject(), which maps every "old" offset
406   // to a "new" offset, with possible overlap (even though blocks don't
407   // overlap). Not testing real offsets only (no fake offsets).
408   // |old_spec| is a string like "<<aaAAaabbBBbcCCc>>":
409   // - Upper case letters are covered "old" offsets.
410   // - Lower case letters are non-covered offsets that are properly mapped using
411   //   nearest "old" block.
412   // - '<' denotes underflow (clamped to 0).
413   // - '>' denotes overflow (clampled to "new" size - 1).
414   // |new_spec| is a string like "aaAA(ab)(ab)BBb..cCCc":
415   // - Upper and lower case letters are mapped "new" targets, occurring in the
416   //   order that they appear in |old_spec|.
417   // - '.' are "new" offsets that appear as output.
418   // - '(' and ')' surround a single "new" location that are repeated as output.
419   int case_no = 0;
420   auto run_test = [&case_no](std::deque<Equivalence>&& equivalences,
421                              const std::string& old_spec,
422                              const std::string& new_spec) {
423     const size_t old_size = old_spec.length();
424     // Build expected "new" offsets, queue up for each letter.
425     std::map<char, std::deque<offset_t>> expected;
426     offset_t cur_new_offset = 0;
427     char state = ')';  // ')' = increase offset, '(' = stay.
428     for (char ch : new_spec) {
429       if (ch == '(' || ch == ')')
430         state = ch;
431       else
432         expected[ch].push_back(cur_new_offset);
433       cur_new_offset += (state == ')') ? 1 : 0;
434     }
435     const size_t new_size = cur_new_offset;
436     // Forward-project for each "old" index, pull from queue from matching
437     // letter, and compare.
438     OffsetMapper offset_mapper(std::move(equivalences), old_size, new_size);
439     for (offset_t old_offset = 0; old_offset < old_size; ++old_offset) {
440       offset_t new_offset = offset_mapper.ExtendedForwardProject(old_offset);
441       char ch = old_spec[old_offset];
442       if (ch == '<') {  // Special case: Underflow.
443         EXPECT_EQ(0U, new_offset) << "in case " << case_no;
444       } else if (ch == '>') {  // Special case: Overflow.
445         EXPECT_EQ(static_cast<offset_t>(new_size - 1), new_offset)
446             << "in case " << case_no;
447       } else {
448         std::deque<offset_t>& q = expected[ch];
449         ASSERT_FALSE(q.empty());
450         EXPECT_EQ(q.front(), new_offset) << "in case " << case_no;
451         q.pop_front();
452         if (q.empty())
453           expected.erase(ch);
454       }
455     }
456     // Clear useless '.', and ensure everything is consumed.
457     expected.erase('.');
458     EXPECT_TRUE(expected.empty()) << "in case " << case_no;
459     ++case_no;
460   };
461 
462   // Trivial: [5,9) --> [5,9).
463   run_test({{5, 5, +4}}, "aaaaaAAAAaaaaa", "aaaaaAAAAaaaaa");
464   // Swap: [0,4) --> [6,10), [4,10) --> [0,6).
465   run_test({{0, 6, +4}, {4, 0, +6}}, "AAAABBBBBB", "BBBBBBAAAA");
466   // Overlap: [0,4) --> [2,6), [4,10) --> [3,9).
467   run_test({{0, 2, +4}, {4, 3, +6}}, "AAAABBBBBB", "..A(AB)(AB)(AB)BBB.");
468   // Converge: [1,3) --> [2,4), [7,8) --> [6,7).
469   run_test({{1, 2, +2}, {7, 6, +1}}, "aAAaabbBbb", ".aAA(ab)(ab)Bbb.");
470   // Converge with tie-breaker: [1,3) --> [2,4), [8,9) --> [7,8).
471   run_test({{1, 2, +2}, {8, 7, +1}}, "aAAaaabbBb", ".aAAa(ab)(ab)Bb.");
472   // Shift left: [6,8) --> [2,4): Underflow occurs.
473   run_test({{6, 2, +2}}, "<<<<aaAAaa", "aaAAaa....");
474   // Shift right: [2,5) --> [6,9): Overflow occurs.
475   run_test({{2, 6, +3}}, "aaAAAa>>>>", "....aaAAAa");
476   // Diverge: [3,5) --> [1,3], [7,9) --> [9,11).
477   run_test({{3, 1, +2}, {7, 9, +2}}, "<<aAAabBBb>>", "aAAa....bBBb");
478   // Pile-up: [0,2) --> [7,9), [9,11) --> [9,11), [18,20) --> [11,13).
479   run_test({{0, 7, +2}, {9, 9, +2}, {18, 11, +2}}, "AAaaaabbbBBbbbbcccCC",
480            "......b(Ab)(Abc)(Bac)(Bac)(Cab)(Cab)bb.....");
481   // Inverse pile-up: [7,9) --> [0,2), [9,11) --> [9,11), [13,15) --> [18,20).
482   run_test({{7, 0, +2}, {9, 9, +2}, {11, 18, +2}}, "<<<<<<<AABBCC>>>>>>>",
483            "AA.......BB.......CC");
484   // Sparse rotate: [3,4) -> [10,11), [10,11) --> [17,18), [17,18) --> [3,4).
485   run_test({{3, 10, +1}, {10, 17, +1}, {17, 3, +1}}, "aaaAaaabbbBbbbcccCccc",
486            "cccCcccaaaAaaabbbBbbb");
487   // Messy swap: [2,4) --> [10,12), [12,16) --> [3,7).
488   run_test({{2, 10, +2}, {12, 3, +4}}, "aaAAaa>><bbbBBBBbb",
489            "bbbBBBBb(ab)aAAaa");
490   // Messy expand: [6,8) --> [3,5), [10,11) -> [11,12), [14,17) --> [16,19).
491   run_test({{6, 3, +2}, {10, 11, +1}, {14, 16, +3}}, "<<<aaaAAabBbbcCCCc>>>>>",
492            "aaaAAa....bBbb.cCCCc");
493   // Interleave: [1,2) --> [0,1), [5,6) --> [10,11), [6,8) --> [3,5),
494   //             [11,13) --> [12,14), [14,16) --> [6,8), [17,18) --> [17,18).
495   run_test({{1, 0, +1},
496             {5, 10, +1},
497             {6, 3, +2},
498             {11, 12, +2},
499             {14, 6, +2},
500             {17, 17, +1}},
501            "<AaabBCCccdDDdEEeFf>", "AaaCCc(Ec)EebBdDDd..Ff");
502 }
503 
TEST(EquivalenceMapTest,ForwardProjectAll)504 TEST(EquivalenceMapTest, ForwardProjectAll) {
505   auto ForwardProjectAllTest = [](const OffsetMapper& offset_mapper,
506                                   std::initializer_list<offset_t> offsets) {
507     std::deque<offset_t> offsets_vec(offsets);
508     offset_mapper.ForwardProjectAll(&offsets_vec);
509     return offsets_vec;
510   };
511 
512   // [0,2) --> [10,12), [2,3) --> [13,14), [4,6) --> [16,18).
513   OffsetMapper offset_mapper1({{0, 10, +2}, {2, 13, +1}, {4, 16, +2}}, 100U,
514                               100U);
515   EXPECT_EQ(OffsetDeque({10}), ForwardProjectAllTest(offset_mapper1, {0}));
516   EXPECT_EQ(OffsetDeque({13}), ForwardProjectAllTest(offset_mapper1, {2}));
517   EXPECT_EQ(OffsetDeque({}), ForwardProjectAllTest(offset_mapper1, {3}));
518   EXPECT_EQ(OffsetDeque({10, 13}),
519             ForwardProjectAllTest(offset_mapper1, {0, 2}));
520   EXPECT_EQ(OffsetDeque({11, 13, 17}),
521             ForwardProjectAllTest(offset_mapper1, {1, 2, 5}));
522   EXPECT_EQ(OffsetDeque({11, 17}),
523             ForwardProjectAllTest(offset_mapper1, {1, 3, 5}));
524   EXPECT_EQ(OffsetDeque({10, 11, 13, 16, 17}),
525             ForwardProjectAllTest(offset_mapper1, {0, 1, 2, 3, 4, 5, 6}));
526 
527   // [0,2) --> [10,12), [13,14) --> [2,3), [16,18) --> [4,6).
528   OffsetMapper offset_mapper2({{0, 10, +2}, {13, 2, +1}, {16, 4, +2}}, 100U,
529                               100U);
530   EXPECT_EQ(OffsetDeque({2}), ForwardProjectAllTest(offset_mapper2, {13}));
531   EXPECT_EQ(OffsetDeque({10, 2}),
532             ForwardProjectAllTest(offset_mapper2, {0, 13}));
533   EXPECT_EQ(OffsetDeque({11, 2, 5}),
534             ForwardProjectAllTest(offset_mapper2, {1, 13, 17}));
535   EXPECT_EQ(OffsetDeque({11, 5}),
536             ForwardProjectAllTest(offset_mapper2, {1, 14, 17}));
537   EXPECT_EQ(OffsetDeque({10, 11, 2, 4, 5}),
538             ForwardProjectAllTest(offset_mapper2, {0, 1, 13, 14, 16, 17, 18}));
539 }
540 
TEST(EquivalenceMapTest,Build)541 TEST(EquivalenceMapTest, Build) {
542   auto test_build_equivalence = [](const ImageIndex old_index,
543                                    const ImageIndex new_index,
544                                    double minimum_similarity) {
545     auto affinities = MakeTargetsAffinitiesForTesting(old_index, new_index, {});
546 
547     EncodedView old_view(old_index);
548     EncodedView new_view(new_index);
549 
550     for (const auto& old_pool_tag_and_targets : old_index.target_pools()) {
551       PoolTag pool_tag = old_pool_tag_and_targets.first;
552       std::vector<uint32_t> old_labels;
553       std::vector<uint32_t> new_labels;
554       size_t label_bound = affinities[pool_tag.value()].AssignLabels(
555           1.0, &old_labels, &new_labels);
556       old_view.SetLabels(pool_tag, std::move(old_labels), label_bound);
557       new_view.SetLabels(pool_tag, std::move(new_labels), label_bound);
558     }
559 
560     std::vector<offset_t> old_sa =
561         MakeSuffixArray<InducedSuffixSort>(old_view, old_view.Cardinality());
562 
563     EquivalenceMap equivalence_map;
564     equivalence_map.Build(old_sa, old_view, new_view, affinities,
565                           minimum_similarity);
566 
567     offset_t current_dst_offset = 0;
568     offset_t coverage = 0;
569     for (const auto& candidate : equivalence_map) {
570       EXPECT_GE(candidate.eq.dst_offset, current_dst_offset);
571       EXPECT_GT(candidate.eq.length, offset_t(0));
572       EXPECT_LE(candidate.eq.src_offset + candidate.eq.length,
573                 old_index.size());
574       EXPECT_LE(candidate.eq.dst_offset + candidate.eq.length,
575                 new_index.size());
576       EXPECT_GE(candidate.similarity, minimum_similarity);
577       current_dst_offset = candidate.eq.dst_offset;
578       coverage += candidate.eq.length;
579     }
580     return coverage;
581   };
582 
583   EXPECT_EQ(0U,
584             test_build_equivalence(MakeImageIndexForTesting("", {}, {}),
585                                    MakeImageIndexForTesting("", {}, {}), 4.0));
586 
587   EXPECT_EQ(0U, test_build_equivalence(
588                     MakeImageIndexForTesting("", {}, {}),
589                     MakeImageIndexForTesting("banana", {}, {}), 4.0));
590 
591   EXPECT_EQ(0U,
592             test_build_equivalence(MakeImageIndexForTesting("banana", {}, {}),
593                                    MakeImageIndexForTesting("", {}, {}), 4.0));
594 
595   EXPECT_EQ(0U, test_build_equivalence(
596                     MakeImageIndexForTesting("banana", {}, {}),
597                     MakeImageIndexForTesting("zzzz", {}, {}), 4.0));
598 
599   EXPECT_EQ(6U, test_build_equivalence(
600                     MakeImageIndexForTesting("banana", {}, {}),
601                     MakeImageIndexForTesting("banana", {}, {}), 4.0));
602 
603   EXPECT_EQ(6U, test_build_equivalence(
604                     MakeImageIndexForTesting("bananaxx", {}, {}),
605                     MakeImageIndexForTesting("bananayy", {}, {}), 4.0));
606 
607   EXPECT_EQ(8U, test_build_equivalence(
608                     MakeImageIndexForTesting("banana11", {{6, 0}}, {}),
609                     MakeImageIndexForTesting("banana11", {{6, 0}}, {}), 4.0));
610 
611   EXPECT_EQ(6U, test_build_equivalence(
612                     MakeImageIndexForTesting("banana11", {{6, 0}}, {}),
613                     MakeImageIndexForTesting("banana22", {}, {{6, 0}}), 4.0));
614 
615   EXPECT_EQ(
616       15U,
617       test_build_equivalence(
618           MakeImageIndexForTesting("banana11pineapple", {{6, 0}}, {}),
619           MakeImageIndexForTesting("banana22pineapple", {}, {{6, 0}}), 4.0));
620 
621   EXPECT_EQ(
622       15U,
623       test_build_equivalence(
624           MakeImageIndexForTesting("bananaxxxxxxxxpineapple", {}, {}),
625           MakeImageIndexForTesting("bananayyyyyyyypineapple", {}, {}), 4.0));
626 
627   EXPECT_EQ(
628       19U,
629       test_build_equivalence(
630           MakeImageIndexForTesting("foobanana11xxpineapplexx", {{9, 0}}, {}),
631           MakeImageIndexForTesting("banana11yypineappleyy", {{6, 0}}, {}),
632           4.0));
633 }
634 
635 }  // namespace zucchini
636