1 /*
2 * Copyright (C) 2021 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "src/tools/proto_merger/proto_merger.h"
18
19 #include <optional>
20
21 #include "perfetto/base/logging.h"
22 #include "perfetto/base/status.h"
23 #include "perfetto/ext/base/string_utils.h"
24
25 namespace perfetto {
26 namespace proto_merger {
27 namespace {
28
29 template <typename Key, typename Value>
FindInMap(const std::map<Key,Value> & map,const Key & key)30 std::optional<Value> FindInMap(const std::map<Key, Value>& map,
31 const Key& key) {
32 auto it = map.find(key);
33 return it == map.end() ? std::nullopt : std::make_optional(it->second);
34 }
35
36 // Finds the given 'name' in the vector by comparing against
37 // the field named 'name' for each item in the vector.
38 // T is ProtoFile::Enum, ProtoFile::Oneof or ProtoFile::Message.
39 template <typename T>
FindByName(const std::vector<T> & items,const std::string & name)40 const T* FindByName(const std::vector<T>& items, const std::string& name) {
41 for (const auto& item : items) {
42 if (item.name == name)
43 return &item;
44 }
45 return nullptr;
46 }
47
48 // Compute the items present in the |input| vector but deleted in
49 // the |upstream| vector by looking at the field |name|.
50 // T is ProtoFile::Enum, ProtoFile::Oneof or ProtoFile::Message.
51 template <typename T>
ComputeDeletedByName(const std::vector<T> & input,const std::vector<T> & upstream)52 std::vector<T> ComputeDeletedByName(const std::vector<T>& input,
53 const std::vector<T>& upstream) {
54 std::vector<T> deleted;
55 std::set<std::string> seen;
56 for (const auto& upstream_item : upstream) {
57 auto* input_item = FindByName(input, upstream_item.name);
58 if (!input_item)
59 continue;
60 seen.insert(input_item->name);
61 }
62
63 for (const auto& input_item : input) {
64 if (seen.count(input_item.name))
65 continue;
66 deleted.emplace_back(input_item);
67 }
68 return deleted;
69 }
70
71 // Finds the given 'number' in the vector by comparing against
72 // the field named 'number for each item in the vector.
73 // T is ProtoFile::EnumValue or ProtoFile::Field.
74 template <typename T>
FindByNumber(const std::vector<T> & items,int number)75 const T* FindByNumber(const std::vector<T>& items, int number) {
76 for (const auto& item : items) {
77 if (item.number == number)
78 return &item;
79 }
80 return nullptr;
81 }
82
83 // Compute the items present in the |input| vector but deleted in
84 // the |upstream| vector by looking at the field |number|.
85 // T is ProtoFile::EnumValue or ProtoFile::Field.
86 template <typename T>
ComputeDeletedByNumber(const std::vector<T> & input,const std::vector<T> & upstream)87 std::vector<T> ComputeDeletedByNumber(const std::vector<T>& input,
88 const std::vector<T>& upstream) {
89 std::vector<T> deleted;
90 std::set<int> seen;
91 for (const auto& upstream_item : upstream) {
92 auto* input_item = FindByNumber(input, upstream_item.number);
93 if (!input_item)
94 continue;
95 seen.insert(input_item->number);
96 }
97
98 for (const auto& input_item : input) {
99 if (seen.count(input_item.number))
100 continue;
101 deleted.emplace_back(input_item);
102 }
103 return deleted;
104 }
105
MergeEnumValue(const ProtoFile::Enum::Value & input,const ProtoFile::Enum::Value & upstream)106 ProtoFile::Enum::Value MergeEnumValue(const ProtoFile::Enum::Value& input,
107 const ProtoFile::Enum::Value& upstream) {
108 PERFETTO_CHECK(input.number == upstream.number);
109
110 ProtoFile::Enum::Value out;
111 out.name = upstream.name;
112
113 // Get the comments from the source of truth.
114 out.leading_comments = upstream.leading_comments;
115 out.trailing_comments = upstream.trailing_comments;
116
117 // Get everything else from the input.
118 out.number = input.number;
119 out.options = input.options;
120 return out;
121 }
122
MergeEnum(const ProtoFile::Enum & input,const ProtoFile::Enum & upstream)123 ProtoFile::Enum MergeEnum(const ProtoFile::Enum& input,
124 const ProtoFile::Enum& upstream) {
125 PERFETTO_CHECK(input.name == upstream.name);
126
127 ProtoFile::Enum out;
128 out.name = upstream.name;
129
130 // Get the comments from the source of truth.
131 out.leading_comments = upstream.leading_comments;
132 out.trailing_comments = upstream.trailing_comments;
133
134 for (const auto& upstream_value : upstream.values) {
135 // If an enum is allowlisted, we implicitly assume that all its
136 // values are also allowed. Therefore, if the value doesn't exist
137 // in the input, just take it from the source of truth.
138 auto* input_value = FindByNumber(input.values, upstream_value.number);
139 auto out_value = input_value ? MergeEnumValue(*input_value, upstream_value)
140 : upstream_value;
141 out.values.emplace_back(std::move(out_value));
142 }
143
144 // Compute all the values present in the input but deleted in the
145 // source of truth.
146 out.deleted_values = ComputeDeletedByNumber(input.values, upstream.values);
147 return out;
148 }
149
MergeEnums(const std::vector<ProtoFile::Enum> & input,const std::vector<ProtoFile::Enum> & upstream,const std::set<std::string> & allowlist)150 std::vector<ProtoFile::Enum> MergeEnums(
151 const std::vector<ProtoFile::Enum>& input,
152 const std::vector<ProtoFile::Enum>& upstream,
153 const std::set<std::string>& allowlist) {
154 std::vector<ProtoFile::Enum> out;
155 for (const auto& upstream_enum : upstream) {
156 auto* input_enum = FindByName(input, upstream_enum.name);
157 if (!input_enum) {
158 // If the enum is missing from the input but is present
159 // in the allowlist, take the whole enum from the
160 // source of truth.
161 if (allowlist.count(upstream_enum.name))
162 out.emplace_back(upstream_enum);
163 continue;
164 }
165
166 // Otherwise, merge the enums from the input and source of truth.
167 out.emplace_back(MergeEnum(*input_enum, upstream_enum));
168 }
169 return out;
170 }
171
MergeField(const ProtoFile::Field & input,const ProtoFile::Field & upstream,ProtoFile::Field & out)172 base::Status MergeField(const ProtoFile::Field& input,
173 const ProtoFile::Field& upstream,
174 ProtoFile::Field& out) {
175 PERFETTO_CHECK(input.number == upstream.number);
176
177 if (input.packageless_type != upstream.packageless_type) {
178 return base::ErrStatus(
179 "The type of field with id %d and name %s (source of truth name: %s) "
180 "changed from %s to %s. Please resolve conflict manually before "
181 "rerunning.",
182 input.number, input.name.c_str(), upstream.name.c_str(),
183 input.packageless_type.c_str(), upstream.packageless_type.c_str());
184 }
185
186 // If the packageless type name is the same but the type is different
187 // mostly we should error however sometimes it is useful to allow downstream
188 // to 'alias' an upstream type. For example 'Foo' to an existing internal
189 // type in another package 'my.private.Foo'.
190 if (input.type != upstream.type) {
191 if (!base::EndsWith(upstream.type, "Atom")) {
192 return base::ErrStatus(
193 "Upstream field with id %d and name '%s' "
194 "(source of truth name: '%s') uses the type '%s' but we have the "
195 "existing downstream type '%s'. Resolve this manually either by "
196 "allowing this explicitly in proto_merger or editing the proto.",
197 input.number, input.name.c_str(), upstream.name.c_str(),
198 upstream.type.c_str(), input.type.c_str());
199 }
200 }
201
202 // Get the comments, label and the name from the source of truth.
203 out.leading_comments = upstream.leading_comments;
204 out.trailing_comments = upstream.trailing_comments;
205 out.is_repeated = upstream.is_repeated;
206 out.name = upstream.name;
207
208 // Get everything else from the input.
209 out.number = input.number;
210 out.options = input.options;
211 out.packageless_type = input.packageless_type;
212 out.type = input.type;
213
214 return base::OkStatus();
215 }
216
MergeFields(const std::vector<ProtoFile::Field> & input,const std::vector<ProtoFile::Field> & upstream,const std::set<int> & allowlist,std::vector<ProtoFile::Field> & out)217 base::Status MergeFields(const std::vector<ProtoFile::Field>& input,
218 const std::vector<ProtoFile::Field>& upstream,
219 const std::set<int>& allowlist,
220 std::vector<ProtoFile::Field>& out) {
221 for (const auto& upstream_field : upstream) {
222 auto* input_field = FindByNumber(input, upstream_field.number);
223 if (!input_field) {
224 // If the field is missing from the input but is present
225 // in the allowlist, take the whole field from the
226 // source of truth.
227 if (allowlist.count(upstream_field.number))
228 out.emplace_back(upstream_field);
229 continue;
230 }
231
232 // Otherwise, merge the fields from the input and source of truth.
233 ProtoFile::Field out_field;
234 base::Status status = MergeField(*input_field, upstream_field, out_field);
235 if (!status.ok())
236 return status;
237 out.emplace_back(std::move(out_field));
238 }
239 return base::OkStatus();
240 }
241
242 // We call both of these just "Merge" so that |MergeRecursive| below can
243 // reference them with the same name.
244 base::Status Merge(const ProtoFile::Oneof& input,
245 const ProtoFile::Oneof& upstream,
246 const Allowlist::Oneof& allowlist,
247 ProtoFile::Oneof& out);
248
249 base::Status Merge(const ProtoFile::Message& input,
250 const ProtoFile::Message& upstream,
251 const Allowlist::Message& allowlist,
252 ProtoFile::Message& out);
253
254 template <typename T, typename AllowlistType>
MergeRecursive(const std::vector<T> & input,const std::vector<T> & upstream,const std::map<std::string,AllowlistType> & allowlist_map,std::vector<T> & out)255 base::Status MergeRecursive(
256 const std::vector<T>& input,
257 const std::vector<T>& upstream,
258 const std::map<std::string, AllowlistType>& allowlist_map,
259 std::vector<T>& out) {
260 for (const auto& upstream_item : upstream) {
261 auto opt_allowlist = FindInMap(allowlist_map, upstream_item.name);
262 auto* input_item = FindByName(input, upstream_item.name);
263
264 // If the value is not present in the input and the allowlist doesn't
265 // exist either, this field is not approved so should not be included
266 // in the output.
267 if (!input_item && !opt_allowlist)
268 continue;
269
270 // If the input value doesn't exist, create a fake "input" that we can pass
271 // to the merge function. This basically has the effect that the upstream
272 // item is taken but *not* recursively; i.e. any fields which are inside the
273 // message/oneof are checked against the allowlist individually. If we just
274 // took the whole upstream here, we could add fields which were not
275 // allowlisted.
276 T input_or_fake;
277 if (input_item) {
278 input_or_fake = *input_item;
279 } else {
280 input_or_fake.name = upstream_item.name;
281 }
282
283 auto allowlist = opt_allowlist.value_or(AllowlistType{});
284 T out_item;
285 auto status = Merge(input_or_fake, upstream_item, allowlist, out_item);
286 if (!status.ok())
287 return status;
288 out.emplace_back(std::move(out_item));
289 }
290 return base::OkStatus();
291 }
292
Merge(const ProtoFile::Oneof & input,const ProtoFile::Oneof & upstream,const Allowlist::Oneof & allowlist,ProtoFile::Oneof & out)293 base::Status Merge(const ProtoFile::Oneof& input,
294 const ProtoFile::Oneof& upstream,
295 const Allowlist::Oneof& allowlist,
296 ProtoFile::Oneof& out) {
297 PERFETTO_CHECK(input.name == upstream.name);
298 out.name = input.name;
299
300 // Get the comments from the source of truth.
301 out.leading_comments = upstream.leading_comments;
302 out.trailing_comments = upstream.trailing_comments;
303
304 // Compute all the fields present in the input but deleted in the
305 // source of truth.
306 out.deleted_fields = ComputeDeletedByNumber(input.fields, upstream.fields);
307
308 // Finish by merging the list of fields.
309 return MergeFields(input.fields, upstream.fields, allowlist, out.fields);
310 }
311
Merge(const ProtoFile::Message & input,const ProtoFile::Message & upstream,const Allowlist::Message & allowlist,ProtoFile::Message & out)312 base::Status Merge(const ProtoFile::Message& input,
313 const ProtoFile::Message& upstream,
314 const Allowlist::Message& allowlist,
315 ProtoFile::Message& out) {
316 PERFETTO_CHECK(input.name == upstream.name);
317 out.name = input.name;
318
319 // Get the comments from the source of truth.
320 out.leading_comments = upstream.leading_comments;
321 out.trailing_comments = upstream.trailing_comments;
322
323 // Compute all the values present in the input but deleted in the
324 // source of truth.
325 out.deleted_enums = ComputeDeletedByName(input.enums, upstream.enums);
326 out.deleted_nested_messages =
327 ComputeDeletedByName(input.nested_messages, upstream.nested_messages);
328 out.deleted_oneofs = ComputeDeletedByName(input.oneofs, upstream.oneofs);
329 out.deleted_fields = ComputeDeletedByNumber(input.fields, upstream.fields);
330
331 // Merge any nested enum types.
332 out.enums = MergeEnums(input.enums, upstream.enums, allowlist.enums);
333
334 // Merge any nested message types.
335 auto status = MergeRecursive(input.nested_messages, upstream.nested_messages,
336 allowlist.nested_messages, out.nested_messages);
337 if (!status.ok())
338 return status;
339
340 // Merge any oneofs.
341 status = MergeRecursive(input.oneofs, upstream.oneofs, allowlist.oneofs,
342 out.oneofs);
343 if (!status.ok())
344 return status;
345
346 // Finish by merging the list of fields.
347 return MergeFields(input.fields, upstream.fields, allowlist.fields,
348 out.fields);
349 }
350
351 } // namespace
352
MergeProtoFiles(const ProtoFile & input,const ProtoFile & upstream,const Allowlist & allowlist,ProtoFile & out)353 base::Status MergeProtoFiles(const ProtoFile& input,
354 const ProtoFile& upstream,
355 const Allowlist& allowlist,
356 ProtoFile& out) {
357 // The preamble is taken directly from upstream. This allows private stuff
358 // to be in the preamble without being present in upstream.
359 out.preamble = input.preamble;
360
361 // Compute all the enums and messages present in the input but deleted in the
362 // source of truth.
363 out.deleted_enums = ComputeDeletedByName(input.enums, upstream.enums);
364 out.deleted_messages =
365 ComputeDeletedByName(input.messages, upstream.messages);
366
367 // Merge the top-level enums.
368 out.enums = MergeEnums(input.enums, upstream.enums, allowlist.enums);
369
370 // Finish by merging the top-level messages.
371 return MergeRecursive(input.messages, upstream.messages, allowlist.messages,
372 out.messages);
373 }
374
375 } // namespace proto_merger
376 } // namespace perfetto
377