xref: /aosp_15_r20/external/stg/deduplication.cc (revision 9e3b08ae94a55201065475453d799e8b1378bea6)
1*9e3b08aeSAndroid Build Coastguard Worker // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
2*9e3b08aeSAndroid Build Coastguard Worker // -*- mode: C++ -*-
3*9e3b08aeSAndroid Build Coastguard Worker //
4*9e3b08aeSAndroid Build Coastguard Worker // Copyright 2022 Google LLC
5*9e3b08aeSAndroid Build Coastguard Worker //
6*9e3b08aeSAndroid Build Coastguard Worker // Licensed under the Apache License v2.0 with LLVM Exceptions (the
7*9e3b08aeSAndroid Build Coastguard Worker // "License"); you may not use this file except in compliance with the
8*9e3b08aeSAndroid Build Coastguard Worker // License.  You may obtain a copy of the License at
9*9e3b08aeSAndroid Build Coastguard Worker //
10*9e3b08aeSAndroid Build Coastguard Worker //     https://llvm.org/LICENSE.txt
11*9e3b08aeSAndroid Build Coastguard Worker //
12*9e3b08aeSAndroid Build Coastguard Worker // Unless required by applicable law or agreed to in writing, software
13*9e3b08aeSAndroid Build Coastguard Worker // distributed under the License is distributed on an "AS IS" BASIS,
14*9e3b08aeSAndroid Build Coastguard Worker // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15*9e3b08aeSAndroid Build Coastguard Worker // See the License for the specific language governing permissions and
16*9e3b08aeSAndroid Build Coastguard Worker // limitations under the License.
17*9e3b08aeSAndroid Build Coastguard Worker //
18*9e3b08aeSAndroid Build Coastguard Worker // Author: Giuliano Procida
19*9e3b08aeSAndroid Build Coastguard Worker 
20*9e3b08aeSAndroid Build Coastguard Worker #include "deduplication.h"
21*9e3b08aeSAndroid Build Coastguard Worker 
22*9e3b08aeSAndroid Build Coastguard Worker #include <cstddef>
23*9e3b08aeSAndroid Build Coastguard Worker #include <unordered_map>
24*9e3b08aeSAndroid Build Coastguard Worker #include <utility>
25*9e3b08aeSAndroid Build Coastguard Worker #include <vector>
26*9e3b08aeSAndroid Build Coastguard Worker 
27*9e3b08aeSAndroid Build Coastguard Worker #include "equality.h"
28*9e3b08aeSAndroid Build Coastguard Worker #include "equality_cache.h"
29*9e3b08aeSAndroid Build Coastguard Worker #include "graph.h"
30*9e3b08aeSAndroid Build Coastguard Worker #include "hashing.h"
31*9e3b08aeSAndroid Build Coastguard Worker #include "runtime.h"
32*9e3b08aeSAndroid Build Coastguard Worker #include "substitution.h"
33*9e3b08aeSAndroid Build Coastguard Worker 
34*9e3b08aeSAndroid Build Coastguard Worker namespace stg {
35*9e3b08aeSAndroid Build Coastguard Worker 
Deduplicate(Runtime & runtime,Graph & graph,Id root,const Hashes & hashes)36*9e3b08aeSAndroid Build Coastguard Worker Id Deduplicate(Runtime& runtime, Graph& graph, Id root, const Hashes& hashes) {
37*9e3b08aeSAndroid Build Coastguard Worker   // Partition the nodes by hash.
38*9e3b08aeSAndroid Build Coastguard Worker   std::unordered_map<HashValue, std::vector<Id>> partitions;
39*9e3b08aeSAndroid Build Coastguard Worker   {
40*9e3b08aeSAndroid Build Coastguard Worker     const Time x(runtime, "partition nodes");
41*9e3b08aeSAndroid Build Coastguard Worker     for (const auto& [id, fp] : hashes) {
42*9e3b08aeSAndroid Build Coastguard Worker       partitions[fp].push_back(id);
43*9e3b08aeSAndroid Build Coastguard Worker     }
44*9e3b08aeSAndroid Build Coastguard Worker   }
45*9e3b08aeSAndroid Build Coastguard Worker   Counter(runtime, "deduplicate.nodes") = hashes.size();
46*9e3b08aeSAndroid Build Coastguard Worker   Counter(runtime, "deduplicate.hashes") = partitions.size();
47*9e3b08aeSAndroid Build Coastguard Worker 
48*9e3b08aeSAndroid Build Coastguard Worker   Histogram hash_partition_size(runtime, "deduplicate.hash_partition_size");
49*9e3b08aeSAndroid Build Coastguard Worker   Counter min_comparisons(runtime, "deduplicate.min_comparisons");
50*9e3b08aeSAndroid Build Coastguard Worker   Counter max_comparisons(runtime, "deduplicate.max_comparisons");
51*9e3b08aeSAndroid Build Coastguard Worker   for (const auto& [fp, ids] : partitions) {
52*9e3b08aeSAndroid Build Coastguard Worker     const auto n = ids.size();
53*9e3b08aeSAndroid Build Coastguard Worker     hash_partition_size.Add(n);
54*9e3b08aeSAndroid Build Coastguard Worker     min_comparisons += n - 1;
55*9e3b08aeSAndroid Build Coastguard Worker     max_comparisons += n * (n - 1) / 2;
56*9e3b08aeSAndroid Build Coastguard Worker   }
57*9e3b08aeSAndroid Build Coastguard Worker 
58*9e3b08aeSAndroid Build Coastguard Worker   // Refine partitions of nodes with the same fingerprints.
59*9e3b08aeSAndroid Build Coastguard Worker   EqualityCache cache(runtime, hashes);
60*9e3b08aeSAndroid Build Coastguard Worker   Equals<EqualityCache> equals(graph, cache);
61*9e3b08aeSAndroid Build Coastguard Worker   Counter equalities(runtime, "deduplicate.equalities");
62*9e3b08aeSAndroid Build Coastguard Worker   Counter inequalities(runtime, "deduplicate.inequalities");
63*9e3b08aeSAndroid Build Coastguard Worker   {
64*9e3b08aeSAndroid Build Coastguard Worker     const Time x(runtime, "find duplicates");
65*9e3b08aeSAndroid Build Coastguard Worker     for (auto& [fp, ids] : partitions) {
66*9e3b08aeSAndroid Build Coastguard Worker       while (ids.size() > 1) {
67*9e3b08aeSAndroid Build Coastguard Worker         std::vector<Id> todo;
68*9e3b08aeSAndroid Build Coastguard Worker         const Id candidate = ids[0];
69*9e3b08aeSAndroid Build Coastguard Worker         for (size_t i = 1; i < ids.size(); ++i) {
70*9e3b08aeSAndroid Build Coastguard Worker           if (equals(ids[i], candidate)) {
71*9e3b08aeSAndroid Build Coastguard Worker             ++equalities;
72*9e3b08aeSAndroid Build Coastguard Worker           } else {
73*9e3b08aeSAndroid Build Coastguard Worker             todo.push_back(ids[i]);
74*9e3b08aeSAndroid Build Coastguard Worker             ++inequalities;
75*9e3b08aeSAndroid Build Coastguard Worker           }
76*9e3b08aeSAndroid Build Coastguard Worker         }
77*9e3b08aeSAndroid Build Coastguard Worker         std::swap(todo, ids);
78*9e3b08aeSAndroid Build Coastguard Worker       }
79*9e3b08aeSAndroid Build Coastguard Worker     }
80*9e3b08aeSAndroid Build Coastguard Worker   }
81*9e3b08aeSAndroid Build Coastguard Worker 
82*9e3b08aeSAndroid Build Coastguard Worker   // Keep one representative of each set of duplicates.
83*9e3b08aeSAndroid Build Coastguard Worker   Counter unique(runtime, "deduplicate.unique");
84*9e3b08aeSAndroid Build Coastguard Worker   Counter duplicate(runtime, "deduplicate.duplicate");
85*9e3b08aeSAndroid Build Coastguard Worker   const auto remap = [&cache](Id& id) {
86*9e3b08aeSAndroid Build Coastguard Worker     // update id to representative id, avoiding silent stores
87*9e3b08aeSAndroid Build Coastguard Worker     const Id fid = cache.Find(id);
88*9e3b08aeSAndroid Build Coastguard Worker     if (fid != id) {
89*9e3b08aeSAndroid Build Coastguard Worker       id = fid;
90*9e3b08aeSAndroid Build Coastguard Worker     }
91*9e3b08aeSAndroid Build Coastguard Worker   };
92*9e3b08aeSAndroid Build Coastguard Worker   const Substitute substitute(graph, remap);
93*9e3b08aeSAndroid Build Coastguard Worker   {
94*9e3b08aeSAndroid Build Coastguard Worker     const Time x(runtime, "rewrite");
95*9e3b08aeSAndroid Build Coastguard Worker     for (const auto& [id, fp] : hashes) {
96*9e3b08aeSAndroid Build Coastguard Worker       const Id fid = cache.Find(id);
97*9e3b08aeSAndroid Build Coastguard Worker       if (fid != id) {
98*9e3b08aeSAndroid Build Coastguard Worker         graph.Remove(id);
99*9e3b08aeSAndroid Build Coastguard Worker         ++duplicate;
100*9e3b08aeSAndroid Build Coastguard Worker       } else {
101*9e3b08aeSAndroid Build Coastguard Worker         substitute(id);
102*9e3b08aeSAndroid Build Coastguard Worker         ++unique;
103*9e3b08aeSAndroid Build Coastguard Worker       }
104*9e3b08aeSAndroid Build Coastguard Worker     }
105*9e3b08aeSAndroid Build Coastguard Worker   }
106*9e3b08aeSAndroid Build Coastguard Worker 
107*9e3b08aeSAndroid Build Coastguard Worker   // In case the root node was remapped.
108*9e3b08aeSAndroid Build Coastguard Worker   substitute.Update(root);
109*9e3b08aeSAndroid Build Coastguard Worker   return root;
110*9e3b08aeSAndroid Build Coastguard Worker }
111*9e3b08aeSAndroid Build Coastguard Worker 
112*9e3b08aeSAndroid Build Coastguard Worker }  // namespace stg
113