1 /*
2 * Copyright (C) 2024 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "src/trace_processor/perfetto_sql/intrinsics/functions/interval_intersect.h"
18
19 #include <algorithm>
20 #include <cinttypes>
21 #include <cstdint>
22 #include <iterator>
23 #include <memory>
24 #include <numeric>
25 #include <string>
26 #include <string_view>
27 #include <utility>
28 #include <variant>
29 #include <vector>
30
31 #include "perfetto/base/compiler.h"
32 #include "perfetto/base/logging.h"
33 #include "perfetto/base/status.h"
34 #include "perfetto/ext/base/flat_hash_map.h"
35 #include "perfetto/ext/base/status_or.h"
36 #include "perfetto/ext/base/string_utils.h"
37 #include "perfetto/trace_processor/basic_types.h"
38 #include "src/trace_processor/containers/interval_intersector.h"
39 #include "src/trace_processor/containers/string_pool.h"
40 #include "src/trace_processor/db/runtime_table.h"
41 #include "src/trace_processor/perfetto_sql/engine/perfetto_sql_engine.h"
42 #include "src/trace_processor/perfetto_sql/intrinsics/types/partitioned_intervals.h"
43 #include "src/trace_processor/perfetto_sql/parser/function_util.h"
44 #include "src/trace_processor/sqlite/bindings/sqlite_bind.h"
45 #include "src/trace_processor/sqlite/bindings/sqlite_column.h"
46 #include "src/trace_processor/sqlite/bindings/sqlite_function.h"
47 #include "src/trace_processor/sqlite/bindings/sqlite_result.h"
48 #include "src/trace_processor/sqlite/bindings/sqlite_stmt.h"
49 #include "src/trace_processor/sqlite/bindings/sqlite_type.h"
50 #include "src/trace_processor/sqlite/bindings/sqlite_value.h"
51 #include "src/trace_processor/sqlite/sqlite_utils.h"
52 #include "src/trace_processor/util/status_macros.h"
53
54 namespace perfetto::trace_processor::perfetto_sql {
55 namespace {
56
57 static const uint32_t kArgCols = 2;
58 static const uint32_t kIdCols = 5;
59 static const uint32_t kPartitionColsOffset = kArgCols + kIdCols;
60
61 using Intervals = std::vector<Interval>;
62 using BuilderColType = RuntimeTable::BuilderColumnType;
63
64 struct MultiIndexInterval {
65 uint64_t start;
66 uint64_t end;
67 std::vector<int64_t> idx_in_table;
68 };
69
FromSqlValueTypeToBuilderType(SqlValue::Type type)70 BuilderColType FromSqlValueTypeToBuilderType(SqlValue::Type type) {
71 switch (type) {
72 case SqlValue::kLong:
73 return RuntimeTable::kNullInt;
74 case SqlValue::kDouble:
75 return RuntimeTable::kNullDouble;
76 case SqlValue::kString:
77 return RuntimeTable::kString;
78 case SqlValue::kNull:
79 case SqlValue::kBytes:
80 PERFETTO_FATAL("Wrong type");
81 }
82 PERFETTO_FATAL("For gcc");
83 }
84
85 // Translates partitions to RuntimeTable::Builder types.
GetPartitionsSqlType(const Partitions & partitions)86 base::StatusOr<std::vector<BuilderColType>> GetPartitionsSqlType(
87 const Partitions& partitions) {
88 auto partition_it = partitions.GetIterator();
89 if (!partition_it) {
90 return std::vector<BuilderColType>();
91 }
92 uint32_t p_count =
93 static_cast<uint32_t>(partition_it.value().sql_values.size());
94
95 std::vector<BuilderColType> types(p_count, BuilderColType::kNull);
96 bool any_part_not_found = true;
97 // We expect this loop to be broken very early, but it has to be implemented
98 // as loop as we can't deduce the type partition with NULL value.
99 for (; partition_it; ++partition_it) {
100 any_part_not_found = false;
101 for (uint32_t i = 0; i < p_count && any_part_not_found; i++) {
102 auto type = types[i];
103 if (type != BuilderColType::kNull) {
104 continue;
105 }
106 if (partition_it.value().sql_values[i].is_null()) {
107 any_part_not_found = true;
108 continue;
109 }
110 types[i] = FromSqlValueTypeToBuilderType(
111 partition_it.value().sql_values[i].type);
112 }
113 }
114 if (any_part_not_found) {
115 return base::ErrStatus(
116 "INTERVAL_INTERSECT: Can't partition on column that only has NULLs");
117 }
118 return types;
119 }
120
PushPartition(RuntimeTable::Builder & builder,const std::vector<Partition * > & partitions)121 static base::StatusOr<uint32_t> PushPartition(
122 RuntimeTable::Builder& builder,
123 const std::vector<Partition*>& partitions) {
124 size_t tables_count = partitions.size();
125
126 // Sort `tables_order` from the smallest to the biggest.
127 std::vector<uint32_t> tables_order(tables_count);
128 std::iota(tables_order.begin(), tables_order.end(), 0);
129 std::sort(tables_order.begin(), tables_order.end(),
130 [partitions](const uint32_t idx_a, const uint32_t idx_b) {
131 return partitions[idx_a]->intervals.size() <
132 partitions[idx_b]->intervals.size();
133 });
134 uint32_t idx_of_smallest_part = tables_order.front();
135 PERFETTO_DCHECK(!partitions[idx_of_smallest_part]->intervals.empty());
136
137 // Trivially translate intervals from smallest table to `MultiIndexIntervals`.
138 std::vector<MultiIndexInterval> last_results;
139 last_results.reserve(partitions.back()->intervals.size());
140 for (const auto& interval : partitions[idx_of_smallest_part]->intervals) {
141 MultiIndexInterval m_int;
142 m_int.start = interval.start;
143 m_int.end = interval.end;
144 m_int.idx_in_table.resize(tables_count);
145 m_int.idx_in_table[idx_of_smallest_part] = interval.id;
146 last_results.push_back(m_int);
147 }
148
149 // Create an interval tree on all tables except the smallest - the first one.
150 std::vector<MultiIndexInterval> overlaps_with_this_table;
151 overlaps_with_this_table.reserve(partitions.back()->intervals.size());
152 for (uint32_t i = 1; i < tables_count && !last_results.empty(); i++) {
153 overlaps_with_this_table.clear();
154 uint32_t table_idx = tables_order[i];
155
156 IntervalIntersector::Mode intersection_mode =
157 IntervalIntersector::DecideMode(
158 partitions[table_idx]->is_nonoverlapping,
159 static_cast<uint32_t>(last_results.size()));
160 IntervalIntersector cur_int_operator(partitions[table_idx]->intervals,
161 intersection_mode);
162 for (const auto& prev_result : last_results) {
163 Intervals new_overlaps;
164 cur_int_operator.FindOverlaps(prev_result.start, prev_result.end,
165 new_overlaps);
166 for (const auto& overlap : new_overlaps) {
167 MultiIndexInterval m_int;
168 m_int.idx_in_table = std::move(prev_result.idx_in_table);
169 m_int.idx_in_table[table_idx] = overlap.id;
170 m_int.start = overlap.start;
171 m_int.end = overlap.end;
172 overlaps_with_this_table.push_back(std::move(m_int));
173 }
174 }
175
176 last_results = std::move(overlaps_with_this_table);
177 }
178
179 uint32_t rows_count = static_cast<uint32_t>(last_results.size());
180 std::vector<int64_t> timestamps(rows_count);
181 std::vector<int64_t> durations(rows_count);
182 std::vector<std::vector<int64_t>> ids(tables_count);
183 for (auto& t_ids_vec : ids) {
184 t_ids_vec.resize(rows_count);
185 }
186
187 for (uint32_t i = 0; i < rows_count; i++) {
188 const MultiIndexInterval& interval = last_results[i];
189 timestamps[i] = static_cast<int64_t>(interval.start);
190 durations[i] = static_cast<int64_t>(interval.end) -
191 static_cast<int64_t>(interval.start);
192 for (uint32_t j = 0; j < tables_count; j++) {
193 ids[j][i] = interval.idx_in_table[j];
194 }
195 }
196
197 builder.AddNonNullIntegersUnchecked(0, std::move(timestamps));
198 builder.AddNonNullIntegersUnchecked(1, std::move(durations));
199 for (uint32_t i = 0; i < tables_count; i++) {
200 builder.AddNonNullIntegersUnchecked(i + kArgCols, ids[i]);
201 }
202
203 for (uint32_t i = 0; i < partitions[0]->sql_values.size(); i++) {
204 const SqlValue& part_val = partitions[0]->sql_values[i];
205 switch (part_val.type) {
206 case SqlValue::kLong:
207 RETURN_IF_ERROR(builder.AddIntegers(i + kPartitionColsOffset,
208 part_val.AsLong(), rows_count));
209 continue;
210 case SqlValue::kDouble:
211 RETURN_IF_ERROR(builder.AddFloats(i + kPartitionColsOffset,
212 part_val.AsDouble(), rows_count));
213 continue;
214 case SqlValue::kString:
215 RETURN_IF_ERROR(builder.AddTexts(i + kPartitionColsOffset,
216 part_val.AsString(), rows_count));
217 continue;
218 case SqlValue::kNull:
219 RETURN_IF_ERROR(builder.AddNulls(i + kPartitionColsOffset, rows_count));
220 continue;
221 case SqlValue::kBytes:
222 PERFETTO_FATAL("Invalid partition type");
223 }
224 }
225
226 return static_cast<uint32_t>(last_results.size());
227 }
228
229 struct IntervalIntersect : public SqliteFunction<IntervalIntersect> {
230 static constexpr char kName[] = "__intrinsic_interval_intersect";
231 // Two tables that are being intersected.
232 // TODO(mayzner): Support more tables.
233 static constexpr int kArgCount = -1;
234
235 struct UserDataContext {
236 PerfettoSqlEngine* engine;
237 StringPool* pool;
238 };
239
Stepperfetto::trace_processor::perfetto_sql::__anon1e7b90400111::IntervalIntersect240 static void Step(sqlite3_context* ctx, int argc, sqlite3_value** argv) {
241 PERFETTO_DCHECK(argc >= 2);
242 size_t tabc = static_cast<size_t>(argc - 1);
243 if (tabc > kIdCols) {
244 return sqlite::result::Error(
245 ctx, "interval intersect: Can intersect at most 5 tables");
246 }
247 const char* partition_list = sqlite::value::Text(argv[argc - 1]);
248 if (!partition_list) {
249 return sqlite::result::Error(
250 ctx, "interval intersect: column list cannot be null");
251 }
252
253 // Get column names of return columns.
254 std::vector<std::string> ret_col_names{"ts", "dur"};
255 for (uint32_t i = 0; i < kIdCols; i++) {
256 ret_col_names.push_back(base::StackString<32>("id_%u", i).ToStdString());
257 }
258 std::vector<std::string> partition_columns =
259 base::SplitString(base::StripChars(partition_list, "()", ' '), ",");
260 if (partition_columns.size() > 4) {
261 return sqlite::result::Error(
262 ctx, "interval intersect: Can take at most 4 partitions.");
263 }
264 for (const auto& c : partition_columns) {
265 std::string p_col_name = base::TrimWhitespace(c).c_str();
266 if (!p_col_name.empty()) {
267 ret_col_names.push_back(p_col_name);
268 }
269 }
270
271 // Get data from of each table.
272 std::vector<PartitionedTable*> tables(tabc);
273 std::vector<Partitions*> t_partitions(tabc);
274
275 for (uint32_t i = 0; i < tabc; i++) {
276 tables[i] = sqlite::value::Pointer<PartitionedTable>(
277 argv[i], PartitionedTable::kName);
278
279 // If any of the tables is empty the intersection with it also has to be
280 // empty.
281 if (!tables[i] || tables[i]->partitions_map.size() == 0) {
282 SQLITE_ASSIGN_OR_RETURN(
283 ctx, std::unique_ptr<RuntimeTable> ret_table,
284 RuntimeTable::Builder(GetUserData(ctx)->pool, ret_col_names)
285 .Build(0));
286 return sqlite::result::UniquePointer(ctx, std::move(ret_table),
287 "TABLE");
288 }
289 t_partitions[i] = &tables[i]->partitions_map;
290 }
291
292 std::vector<BuilderColType> col_types(kArgCols + tabc,
293 BuilderColType::kInt);
294 // Add dummy id cols.
295 col_types.resize(kArgCols + kIdCols, BuilderColType::kNullInt);
296
297 Partitions* p_values = &tables[0]->partitions_map;
298 SQLITE_ASSIGN_OR_RETURN(ctx, std::vector<BuilderColType> p_types,
299 GetPartitionsSqlType(*p_values));
300 col_types.insert(col_types.end(), p_types.begin(), p_types.end());
301
302 RuntimeTable::Builder builder(GetUserData(ctx)->pool, ret_col_names,
303 col_types);
304
305 // Partitions will be taken from the table which has the least number of
306 // them.
307 auto min_el = std::min_element(t_partitions.begin(), t_partitions.end(),
308 [](const auto& t_a, const auto& t_b) {
309 return t_a->size() < t_b->size();
310 });
311
312 auto t_least_partitions =
313 static_cast<uint32_t>(std::distance(t_partitions.begin(), min_el));
314
315 // The only partitions we should look at are partitions from the table
316 // with the least partitions.
317 const Partitions* p_intervals = t_partitions[t_least_partitions];
318
319 // For each partition insert into table.
320 uint32_t rows = 0;
321 for (auto p_it = p_intervals->GetIterator(); p_it; ++p_it) {
322 std::vector<Partition*> cur_partition_in_table;
323 bool all_have_p = true;
324
325 // From each table get all vectors of intervals.
326 for (uint32_t i = 0; i < tabc; i++) {
327 Partitions* t = t_partitions[i];
328 if (auto found = t->Find(p_it.key())) {
329 cur_partition_in_table.push_back(found);
330 } else {
331 all_have_p = false;
332 break;
333 }
334 }
335
336 // Only push into the table if all tables have this partition present.
337 if (all_have_p) {
338 SQLITE_ASSIGN_OR_RETURN(ctx, uint32_t pushed_rows,
339 PushPartition(builder, cur_partition_in_table));
340 rows += pushed_rows;
341 }
342 }
343
344 // Fill the dummy id columns with nulls.
345 for (uint32_t i = static_cast<uint32_t>(tabc); i < kIdCols; i++) {
346 SQLITE_RETURN_IF_ERROR(ctx, builder.AddNulls(i + kArgCols, rows));
347 }
348
349 SQLITE_ASSIGN_OR_RETURN(ctx, std::unique_ptr<RuntimeTable> ret_tab,
350 std::move(builder).Build(rows));
351
352 return sqlite::result::UniquePointer(ctx, std::move(ret_tab), "TABLE");
353 }
354 };
355
356 } // namespace
357
RegisterIntervalIntersectFunctions(PerfettoSqlEngine & engine,StringPool * pool)358 base::Status RegisterIntervalIntersectFunctions(PerfettoSqlEngine& engine,
359 StringPool* pool) {
360 return engine.RegisterSqliteFunction<IntervalIntersect>(
361 std::make_unique<IntervalIntersect::UserDataContext>(
362 IntervalIntersect::UserDataContext{&engine, pool}));
363 }
364
365 } // namespace perfetto::trace_processor::perfetto_sql
366