1 /*
2 * Copyright (C) 2024 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "src/trace_processor/perfetto_sql/intrinsics/functions/structural_tree_partition.h"
18
19 #include <algorithm>
20 #include <cstdint>
21 #include <limits>
22 #include <memory>
23 #include <numeric>
24 #include <optional>
25 #include <vector>
26
27 #include "perfetto/base/logging.h"
28 #include "perfetto/public/compiler.h"
29 #include "src/trace_processor/perfetto_sql/intrinsics/functions/tables_py.h"
30 #include "src/trace_processor/sqlite/bindings/sqlite_aggregate_function.h"
31 #include "src/trace_processor/sqlite/bindings/sqlite_result.h"
32 #include "src/trace_processor/sqlite/bindings/sqlite_value.h"
33
34 namespace perfetto::trace_processor {
35 namespace tables {
36 StructuralTreePartitionTable::~StructuralTreePartitionTable() = default;
37 } // namespace tables
38
39 namespace {
40
41 constexpr uint32_t kNullParentId = std::numeric_limits<uint32_t>::max();
42
43 struct Row {
44 uint32_t id;
45 uint32_t parent_id;
46 uint32_t group;
47 };
48
49 struct AggCtx : SqliteAggregateContext<AggCtx> {
50 std::vector<Row> input;
51 std::vector<uint32_t> child_count_by_id;
52 std::optional<Row> root;
53 uint32_t max_group = 0;
54 };
55
56 struct LookupHelper {
ChildrenForIdBeginperfetto::trace_processor::__anon9b8c68cc0111::LookupHelper57 const Row* ChildrenForIdBegin(uint32_t id) const {
58 return rows.data() + child_count_by_id[id];
59 }
ChildrenForIdEndperfetto::trace_processor::__anon9b8c68cc0111::LookupHelper60 const Row* ChildrenForIdEnd(uint32_t id) const {
61 return id + 1 == child_count_by_id.size()
62 ? rows.data() + rows.size()
63 : rows.data() + child_count_by_id[id + 1];
64 }
65 const std::vector<Row>& rows;
66 const std::vector<uint32_t>& child_count_by_id;
67 };
68
69 } // namespace
70
Step(sqlite3_context * ctx,int argc,sqlite3_value ** argv)71 void StructuralTreePartition::StructuralTreePartition::Step(
72 sqlite3_context* ctx,
73 int argc,
74 sqlite3_value** argv) {
75 if (argc != 3) {
76 return sqlite::result::Error(
77 ctx, "tree_partition_arg: incorrect number of arguments");
78 }
79
80 auto& agg_ctx = AggCtx::GetOrCreateContextForStep(ctx);
81
82 // For performance reasons, we don't typecheck the arguments and assume they
83 // are longs.
84 auto id = static_cast<uint32_t>(sqlite::value::Int64(argv[0]));
85 auto group = static_cast<uint32_t>(sqlite::value::Int64(argv[2]));
86
87 // Keep track of the maximum group seen.
88 agg_ctx.max_group = std::max(agg_ctx.max_group, group);
89
90 // If the parent_id is null, this is the root. Keep track of it there.
91 sqlite3_value* parent_id_value = argv[1];
92 if (PERFETTO_UNLIKELY(sqlite::value::IsNull(parent_id_value))) {
93 if (agg_ctx.root) {
94 return sqlite::result::Error(
95 ctx,
96 "tree_partition: multiple NULL parent_ids. Only one root (i.e. one "
97 "NULL parent_id) expected.");
98 }
99 agg_ctx.root = Row{id, kNullParentId, group};
100 return;
101 }
102
103 // Otherwise, this is a non-root. Increment the child count of its parent.
104 auto parent_id = static_cast<uint32_t>(sqlite::value::Int64(parent_id_value));
105 uint32_t max_id = std::max(id, parent_id);
106 if (max_id >= agg_ctx.child_count_by_id.size()) {
107 agg_ctx.child_count_by_id.resize(max_id + 1);
108 }
109 agg_ctx.child_count_by_id[parent_id]++;
110
111 // Keep track of all the values.
112 agg_ctx.input.push_back(Row{id, parent_id, group});
113 }
114
Final(sqlite3_context * ctx)115 void StructuralTreePartition::Final(sqlite3_context* ctx) {
116 auto scoped_agg_ctx = AggCtx::GetContextOrNullForFinal(ctx);
117
118 // The algorithm below computes the strucutal partition of the input tree.
119 // We do this in three stages:
120 // 1) We counting sort the input rows to be ordered by parent_id: this acts
121 // as a map to lookup the children for a given node.
122 // 2) In the first pass (i.e. downard pass, before any children have been
123 // processed) of the DFS, we associate this node with it's
124 // closest ancestor for the same group and then save this ancestor so
125 // we can restore it.
126 // 3) In the second pass (i.e the upwards pass, after all children have been
127 // processed), we restore the ancestor for the group for the previous
128 // ancestor. This ensures that any sibling nodes do not accidentally end up
129 // with this node as its ancestor.
130
131 // If Step was never called, this will be null. Don't run the algorithm in
132 // that case, causing an empty table to be returned.
133 auto table =
134 std::make_unique<tables::StructuralTreePartitionTable>(GetUserData(ctx));
135 if (auto* agg_ctx = scoped_agg_ctx.get(); agg_ctx) {
136 // If there is no root, we cannot do anything.
137 if (!agg_ctx->root) {
138 return sqlite::result::Error(ctx, "tree_partition: no root in tree");
139 }
140
141 // Compute the partial sums to give us the positions we should place each
142 // row in the output.
143 std::partial_sum(agg_ctx->child_count_by_id.cbegin(),
144 agg_ctx->child_count_by_id.cend(),
145 agg_ctx->child_count_by_id.begin());
146
147 // Counting sort the rows to allow looking up the rows fast.
148 std::vector<Row> sorted(agg_ctx->input.size());
149 for (auto it = agg_ctx->input.rbegin(); it != agg_ctx->input.rend(); ++it) {
150 PERFETTO_DCHECK(agg_ctx->child_count_by_id[it->parent_id] > 0);
151 uint32_t index = --agg_ctx->child_count_by_id[it->parent_id];
152 sorted[index] = *it;
153 }
154
155 struct StackState {
156 Row row;
157 std::optional<uint32_t> prev_ancestor_id_for_group;
158 bool first_pass_done;
159 };
160
161 LookupHelper helper{sorted, agg_ctx->child_count_by_id};
162 std::vector<StackState> stack{{*agg_ctx->root, std::nullopt, false}};
163 std::vector<std::optional<uint32_t>> ancestor_id_for_group(
164 agg_ctx->max_group + 1, std::nullopt);
165 while (!stack.empty()) {
166 StackState& ss = stack.back();
167 if (ss.first_pass_done) {
168 // This node has already been processed before: make sure to restore the
169 // state of the ancestor id back to what it was before this node was
170 // processed.
171 ancestor_id_for_group[ss.row.group] = ss.prev_ancestor_id_for_group;
172 stack.pop_back();
173 continue;
174 }
175 table->Insert(
176 {ss.row.id, ancestor_id_for_group[ss.row.group], ss.row.group});
177
178 // Keep track of the fact this node was processed and update the ancestor
179 // id for all children.
180 ss.first_pass_done = true;
181 ss.prev_ancestor_id_for_group = ancestor_id_for_group[ss.row.group];
182 ancestor_id_for_group[ss.row.group] = ss.row.id;
183
184 const auto* start = helper.ChildrenForIdBegin(ss.row.id);
185 const auto* end = helper.ChildrenForIdEnd(ss.row.id);
186 for (const auto* it = start; it != end; ++it) {
187 stack.emplace_back(StackState{*it, std::nullopt, false});
188 }
189 }
190 }
191 return sqlite::result::RawPointer(
192 ctx, table.release(), "TABLE", [](void* ptr) {
193 delete static_cast<tables::StructuralTreePartitionTable*>(ptr);
194 });
195 }
196
197 } // namespace perfetto::trace_processor
198