1 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
2 // -*- mode: C++ -*-
3 //
4 // Copyright 2021-2024 Google LLC
5 //
6 // Licensed under the Apache License v2.0 with LLVM Exceptions (the
7 // "License"); you may not use this file except in compliance with the
8 // License. You may obtain a copy of the License at
9 //
10 // https://llvm.org/LICENSE.txt
11 //
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
18 // Author: Giuliano Procida
19
20 #ifndef STG_ORDER_H_
21 #define STG_ORDER_H_
22
23 #include <algorithm>
24 #include <cstddef>
25 #include <optional>
26 #include <utility>
27 #include <vector>
28
29 #include "error.h"
30
31 namespace stg {
32 // Combines two orderings of unique items, eliminating duplicates between the
33 // sequences, preserving the relative positions of the items in the second
34 // ordering and incorporating as much of the first's order as is compatible.
35 //
36 // The two orderings are reconciled by examining each item from the first
37 // sequence in turn. If it is not present in the second sequence, it is greedily
38 // appended to the combined sequence. If it is present but hasn't yet been
39 // appended, then all items from the current position in the second sequence up
40 // to and including it are appended in bulk. Otherwise it is skipped. Finally,
41 // all items from the current position in the second sequence are appended.
42 //
43 // This guarantees that the second sequence is a subsequence of the combined
44 // sequence and that items unique to the first subsequence are output as early
45 // as possible and only out of order if they are one of the extra items appended
46 // in bulk.
47 //
48 // Example, before and after:
49 //
50 // indexes1: rose, george, emily
51 // indexes2: george, ted, emily
52 //
53 // combined: rose, george, ted, emily
54 template <typename T>
CombineOrders(const std::vector<T> & indexes1,const std::vector<T> & indexes2,size_t combined_size)55 std::vector<T> CombineOrders(const std::vector<T>& indexes1,
56 const std::vector<T>& indexes2,
57 size_t combined_size) {
58 std::vector<T> combined;
59 combined.reserve(combined_size);
60 // keep track of where we are up to in indexes2
61 auto position = indexes2.begin();
62 for (const auto& value : indexes1) {
63 auto found = std::find(indexes2.begin(), indexes2.end(), value);
64 if (found == indexes2.end()) {
65 // value not found in the second ordering, append immediately
66 combined.push_back(value);
67 } else {
68 // copy up to and including found value, if not yet copied
69 for (; position <= found; ++position) {
70 combined.push_back(*position);
71 }
72 }
73 }
74 // copy any remaining values unique to indexes2
75 for (; position < indexes2.end(); ++position) {
76 combined.push_back(*position);
77 }
78 return combined;
79 }
80
81 // Permutes the data array according to the permutation.
82 //
83 // The vectors must be the same size and permutation must contain [0, size()).
84 //
85 // Each data[i] <- data[permutation[i]].
86 //
87 // Each permutation[i] <- i.
88 //
89 // Example (where the permutation consists of a single cycle), step by step:
90 //
91 // data: emily, george, rose, ted
92 // permutation: 2, 1, 3, 0
93 //
94 // from = 0
95 // initialise to = 0
96 // resolve cycle by swapping elements
97 //
98 // permutation[to = 0] = 2 != from = 2
99 // want data[2] = rose at data[0] = emily, swap them
100 // also swap to = 0 with permutation[to] = 2
101 // data: rose, george, emily, ted
102 // permutation 0, 1, 3, 0
103 // to = 2
104 //
105 // permutation[to = 2] = 3 != from = 0
106 // want data[3] = ted at data[2] = emily, swap them
107 // also swap to = 2 with permutation[2] = 3
108 // data: rose, george, ted, emily
109 // permutation 0, 1, 2, 0
110 // to = 3
111 //
112 // permutation[to = 3] = 0 == from = 0
113 // emily is now in right position
114 // finish the cycle and set permutation[to = 3] = to = 3
115 // permutation 0, 1, 2, 3
116 template <typename T>
Permute(std::vector<T> & data,std::vector<size_t> & permutation)117 void Permute(std::vector<T>& data, std::vector<size_t>& permutation) {
118 const size_t size = permutation.size();
119 Check(data.size() == size) << "internal error: bad Permute vectors";
120 for (size_t from = 0; from < size; ++from) {
121 size_t to = from;
122 while (permutation[to] != from) {
123 Check(permutation[to] < size) << "internal error: bad Permute index";
124 using std::swap;
125 swap(data[to], data[permutation[to]]);
126 swap(to, permutation[to]);
127 }
128 permutation[to] = to;
129 }
130 }
131
132 // Reorders the data array according to its implicit ordering constraints.
133 //
134 // At least one of each pair of positions must be present.
135 //
136 // Each pair gives 1 or 2 abstract positions for the corresponding data item.
137 //
138 // The first and second positions are interpreted separately, with the second
139 // implied ordering having precedence over the first in the event of a conflict.
140 //
141 // The real work is done by CombineOrders and Permute.
142 //
143 // In practice the input data are the output of a matching process, consider:
144 //
145 // sequence1: rose, george, emily
146 // sequence2: george, ted, emily
147 //
148 // These have the corresponding matches (here just ordered by the matching key;
149 // this algorithm gives the same result independent of this ordering):
150 //
151 // emily: {{2}, {2}}
152 // george: {{1}, {0}}
153 // rose: {{0}, {} }
154 // ted: {{}, {1}}
155 //
156 // Now ignore the matching keys.
157 //
158 // This function processes the matches into intermediate data structures:
159 //
160 // positions1: {{2, 0}, {1, 1}, {0, 2}, }
161 // positions2: {{2, 0}, {0, 1}, {1, 3},}
162 //
163 // The indexes (.second) are sorted by the positions (.first):
164 //
165 // positions1: {{0, 2}, {1, 1}, {2, 0}}
166 // positions2: {{0, 1}, {1, 3}, {2, 0}}
167 //
168 // And the positions are discarded:
169 //
170 // indexes1: 2, 1, 0
171 // indexes2: 1, 3, 0
172 //
173 // Finally a consistent ordering is made:
174 //
175 // indexes1: 2, 1, 3, 0
176 //
177 // And this is used to permute the original matching sequence, for clarity
178 // including the implicit keys here:
179 //
180 // rose: {{0}, {} }
181 // george: {{1}, {0}}
182 // ted: {{}, {1}}
183 // emily: {{2}, {2}}
184 template <typename T>
Reorder(std::vector<std::pair<std::optional<T>,std::optional<T>>> & data)185 void Reorder(std::vector<std::pair<std::optional<T>, std::optional<T>>>& data) {
186 const auto size = data.size();
187 // Split out the ordering constraints as position-index pairs.
188 std::vector<std::pair<T, size_t>> positions1;
189 positions1.reserve(size);
190 std::vector<std::pair<T, size_t>> positions2;
191 positions2.reserve(size);
192 for (size_t index = 0; index < size; ++index) {
193 const auto& [position1, position2] = data[index];
194 Check(position1 || position2)
195 << "internal error: Reorder constraint with no positions";
196 if (position1) {
197 positions1.push_back({*position1, index});
198 }
199 if (position2) {
200 positions2.push_back({*position2, index});
201 }
202 }
203 // Order the indexes by the desired positions.
204 std::stable_sort(positions1.begin(), positions1.end());
205 std::stable_sort(positions2.begin(), positions2.end());
206 std::vector<size_t> indexes1;
207 indexes1.reserve(positions1.size());
208 std::vector<size_t> indexes2;
209 indexes2.reserve(positions2.size());
210 for (const auto& ordered_index : positions1) {
211 indexes1.push_back(ordered_index.second);
212 }
213 for (const auto& ordered_index : positions2) {
214 indexes2.push_back(ordered_index.second);
215 }
216 // Merge the two orderings of indexes, giving preference to the second.
217 auto combined = CombineOrders(indexes1, indexes2, size);
218 // Use this to permute the original data array.
219 Permute(data, combined);
220 }
221
222 } // namespace stg
223
224 #endif // STG_ORDER_H_
225