1 /*
2  * Copyright (C) 2024 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "src/trace_processor/perfetto_sql/intrinsics/functions/interval_intersect.h"
18 
19 #include <algorithm>
20 #include <cinttypes>
21 #include <cstdint>
22 #include <iterator>
23 #include <memory>
24 #include <numeric>
25 #include <string>
26 #include <string_view>
27 #include <utility>
28 #include <variant>
29 #include <vector>
30 
31 #include "perfetto/base/compiler.h"
32 #include "perfetto/base/logging.h"
33 #include "perfetto/base/status.h"
34 #include "perfetto/ext/base/flat_hash_map.h"
35 #include "perfetto/ext/base/status_or.h"
36 #include "perfetto/ext/base/string_utils.h"
37 #include "perfetto/trace_processor/basic_types.h"
38 #include "src/trace_processor/containers/interval_intersector.h"
39 #include "src/trace_processor/containers/string_pool.h"
40 #include "src/trace_processor/db/runtime_table.h"
41 #include "src/trace_processor/perfetto_sql/engine/perfetto_sql_engine.h"
42 #include "src/trace_processor/perfetto_sql/intrinsics/types/partitioned_intervals.h"
43 #include "src/trace_processor/perfetto_sql/parser/function_util.h"
44 #include "src/trace_processor/sqlite/bindings/sqlite_bind.h"
45 #include "src/trace_processor/sqlite/bindings/sqlite_column.h"
46 #include "src/trace_processor/sqlite/bindings/sqlite_function.h"
47 #include "src/trace_processor/sqlite/bindings/sqlite_result.h"
48 #include "src/trace_processor/sqlite/bindings/sqlite_stmt.h"
49 #include "src/trace_processor/sqlite/bindings/sqlite_type.h"
50 #include "src/trace_processor/sqlite/bindings/sqlite_value.h"
51 #include "src/trace_processor/sqlite/sqlite_utils.h"
52 #include "src/trace_processor/util/status_macros.h"
53 
54 namespace perfetto::trace_processor::perfetto_sql {
55 namespace {
56 
57 static const uint32_t kArgCols = 2;
58 static const uint32_t kIdCols = 5;
59 static const uint32_t kPartitionColsOffset = kArgCols + kIdCols;
60 
61 using Intervals = std::vector<Interval>;
62 using BuilderColType = RuntimeTable::BuilderColumnType;
63 
64 struct MultiIndexInterval {
65   uint64_t start;
66   uint64_t end;
67   std::vector<int64_t> idx_in_table;
68 };
69 
FromSqlValueTypeToBuilderType(SqlValue::Type type)70 BuilderColType FromSqlValueTypeToBuilderType(SqlValue::Type type) {
71   switch (type) {
72     case SqlValue::kLong:
73       return RuntimeTable::kNullInt;
74     case SqlValue::kDouble:
75       return RuntimeTable::kNullDouble;
76     case SqlValue::kString:
77       return RuntimeTable::kString;
78     case SqlValue::kNull:
79     case SqlValue::kBytes:
80       PERFETTO_FATAL("Wrong type");
81   }
82   PERFETTO_FATAL("For gcc");
83 }
84 
85 // Translates partitions to RuntimeTable::Builder types.
GetPartitionsSqlType(const Partitions & partitions)86 base::StatusOr<std::vector<BuilderColType>> GetPartitionsSqlType(
87     const Partitions& partitions) {
88   auto partition_it = partitions.GetIterator();
89   if (!partition_it) {
90     return std::vector<BuilderColType>();
91   }
92   uint32_t p_count =
93       static_cast<uint32_t>(partition_it.value().sql_values.size());
94 
95   std::vector<BuilderColType> types(p_count, BuilderColType::kNull);
96   bool any_part_not_found = true;
97   // We expect this loop to be broken very early, but it has to be implemented
98   // as loop as we can't deduce the type partition with NULL value.
99   for (; partition_it; ++partition_it) {
100     any_part_not_found = false;
101     for (uint32_t i = 0; i < p_count && any_part_not_found; i++) {
102       auto type = types[i];
103       if (type != BuilderColType::kNull) {
104         continue;
105       }
106       if (partition_it.value().sql_values[i].is_null()) {
107         any_part_not_found = true;
108         continue;
109       }
110       types[i] = FromSqlValueTypeToBuilderType(
111           partition_it.value().sql_values[i].type);
112     }
113   }
114   if (any_part_not_found) {
115     return base::ErrStatus(
116         "INTERVAL_INTERSECT: Can't partition on column that only has NULLs");
117   }
118   return types;
119 }
120 
PushPartition(RuntimeTable::Builder & builder,const std::vector<Partition * > & partitions)121 static base::StatusOr<uint32_t> PushPartition(
122     RuntimeTable::Builder& builder,
123     const std::vector<Partition*>& partitions) {
124   size_t tables_count = partitions.size();
125 
126   // Sort `tables_order` from the smallest to the biggest.
127   std::vector<uint32_t> tables_order(tables_count);
128   std::iota(tables_order.begin(), tables_order.end(), 0);
129   std::sort(tables_order.begin(), tables_order.end(),
130             [partitions](const uint32_t idx_a, const uint32_t idx_b) {
131               return partitions[idx_a]->intervals.size() <
132                      partitions[idx_b]->intervals.size();
133             });
134   uint32_t idx_of_smallest_part = tables_order.front();
135   PERFETTO_DCHECK(!partitions[idx_of_smallest_part]->intervals.empty());
136 
137   // Trivially translate intervals from smallest table to `MultiIndexIntervals`.
138   std::vector<MultiIndexInterval> last_results;
139   last_results.reserve(partitions.back()->intervals.size());
140   for (const auto& interval : partitions[idx_of_smallest_part]->intervals) {
141     MultiIndexInterval m_int;
142     m_int.start = interval.start;
143     m_int.end = interval.end;
144     m_int.idx_in_table.resize(tables_count);
145     m_int.idx_in_table[idx_of_smallest_part] = interval.id;
146     last_results.push_back(m_int);
147   }
148 
149   // Create an interval tree on all tables except the smallest - the first one.
150   std::vector<MultiIndexInterval> overlaps_with_this_table;
151   overlaps_with_this_table.reserve(partitions.back()->intervals.size());
152   for (uint32_t i = 1; i < tables_count && !last_results.empty(); i++) {
153     overlaps_with_this_table.clear();
154     uint32_t table_idx = tables_order[i];
155 
156     IntervalIntersector::Mode intersection_mode =
157         IntervalIntersector::DecideMode(
158             partitions[table_idx]->is_nonoverlapping,
159             static_cast<uint32_t>(last_results.size()));
160     IntervalIntersector cur_int_operator(partitions[table_idx]->intervals,
161                                          intersection_mode);
162     for (const auto& prev_result : last_results) {
163       Intervals new_overlaps;
164       cur_int_operator.FindOverlaps(prev_result.start, prev_result.end,
165                                     new_overlaps);
166       for (const auto& overlap : new_overlaps) {
167         MultiIndexInterval m_int;
168         m_int.idx_in_table = std::move(prev_result.idx_in_table);
169         m_int.idx_in_table[table_idx] = overlap.id;
170         m_int.start = overlap.start;
171         m_int.end = overlap.end;
172         overlaps_with_this_table.push_back(std::move(m_int));
173       }
174     }
175 
176     last_results = std::move(overlaps_with_this_table);
177   }
178 
179   uint32_t rows_count = static_cast<uint32_t>(last_results.size());
180   std::vector<int64_t> timestamps(rows_count);
181   std::vector<int64_t> durations(rows_count);
182   std::vector<std::vector<int64_t>> ids(tables_count);
183   for (auto& t_ids_vec : ids) {
184     t_ids_vec.resize(rows_count);
185   }
186 
187   for (uint32_t i = 0; i < rows_count; i++) {
188     const MultiIndexInterval& interval = last_results[i];
189     timestamps[i] = static_cast<int64_t>(interval.start);
190     durations[i] = static_cast<int64_t>(interval.end) -
191                    static_cast<int64_t>(interval.start);
192     for (uint32_t j = 0; j < tables_count; j++) {
193       ids[j][i] = interval.idx_in_table[j];
194     }
195   }
196 
197   builder.AddNonNullIntegersUnchecked(0, std::move(timestamps));
198   builder.AddNonNullIntegersUnchecked(1, std::move(durations));
199   for (uint32_t i = 0; i < tables_count; i++) {
200     builder.AddNonNullIntegersUnchecked(i + kArgCols, ids[i]);
201   }
202 
203   for (uint32_t i = 0; i < partitions[0]->sql_values.size(); i++) {
204     const SqlValue& part_val = partitions[0]->sql_values[i];
205     switch (part_val.type) {
206       case SqlValue::kLong:
207         RETURN_IF_ERROR(builder.AddIntegers(i + kPartitionColsOffset,
208                                             part_val.AsLong(), rows_count));
209         continue;
210       case SqlValue::kDouble:
211         RETURN_IF_ERROR(builder.AddFloats(i + kPartitionColsOffset,
212                                           part_val.AsDouble(), rows_count));
213         continue;
214       case SqlValue::kString:
215         RETURN_IF_ERROR(builder.AddTexts(i + kPartitionColsOffset,
216                                          part_val.AsString(), rows_count));
217         continue;
218       case SqlValue::kNull:
219         RETURN_IF_ERROR(builder.AddNulls(i + kPartitionColsOffset, rows_count));
220         continue;
221       case SqlValue::kBytes:
222         PERFETTO_FATAL("Invalid partition type");
223     }
224   }
225 
226   return static_cast<uint32_t>(last_results.size());
227 }
228 
229 struct IntervalIntersect : public SqliteFunction<IntervalIntersect> {
230   static constexpr char kName[] = "__intrinsic_interval_intersect";
231   // Two tables that are being intersected.
232   // TODO(mayzner): Support more tables.
233   static constexpr int kArgCount = -1;
234 
235   struct UserDataContext {
236     PerfettoSqlEngine* engine;
237     StringPool* pool;
238   };
239 
Stepperfetto::trace_processor::perfetto_sql::__anon1e7b90400111::IntervalIntersect240   static void Step(sqlite3_context* ctx, int argc, sqlite3_value** argv) {
241     PERFETTO_DCHECK(argc >= 2);
242     size_t tabc = static_cast<size_t>(argc - 1);
243     if (tabc > kIdCols) {
244       return sqlite::result::Error(
245           ctx, "interval intersect: Can intersect at most 5 tables");
246     }
247     const char* partition_list = sqlite::value::Text(argv[argc - 1]);
248     if (!partition_list) {
249       return sqlite::result::Error(
250           ctx, "interval intersect: column list cannot be null");
251     }
252 
253     // Get column names of return columns.
254     std::vector<std::string> ret_col_names{"ts", "dur"};
255     for (uint32_t i = 0; i < kIdCols; i++) {
256       ret_col_names.push_back(base::StackString<32>("id_%u", i).ToStdString());
257     }
258     std::vector<std::string> partition_columns =
259         base::SplitString(base::StripChars(partition_list, "()", ' '), ",");
260     if (partition_columns.size() > 4) {
261       return sqlite::result::Error(
262           ctx, "interval intersect: Can take at most 4 partitions.");
263     }
264     for (const auto& c : partition_columns) {
265       std::string p_col_name = base::TrimWhitespace(c).c_str();
266       if (!p_col_name.empty()) {
267         ret_col_names.push_back(p_col_name);
268       }
269     }
270 
271     // Get data from of each table.
272     std::vector<PartitionedTable*> tables(tabc);
273     std::vector<Partitions*> t_partitions(tabc);
274 
275     for (uint32_t i = 0; i < tabc; i++) {
276       tables[i] = sqlite::value::Pointer<PartitionedTable>(
277           argv[i], PartitionedTable::kName);
278 
279       // If any of the tables is empty the intersection with it also has to be
280       // empty.
281       if (!tables[i] || tables[i]->partitions_map.size() == 0) {
282         SQLITE_ASSIGN_OR_RETURN(
283             ctx, std::unique_ptr<RuntimeTable> ret_table,
284             RuntimeTable::Builder(GetUserData(ctx)->pool, ret_col_names)
285                 .Build(0));
286         return sqlite::result::UniquePointer(ctx, std::move(ret_table),
287                                              "TABLE");
288       }
289       t_partitions[i] = &tables[i]->partitions_map;
290     }
291 
292     std::vector<BuilderColType> col_types(kArgCols + tabc,
293                                           BuilderColType::kInt);
294     // Add dummy id cols.
295     col_types.resize(kArgCols + kIdCols, BuilderColType::kNullInt);
296 
297     Partitions* p_values = &tables[0]->partitions_map;
298     SQLITE_ASSIGN_OR_RETURN(ctx, std::vector<BuilderColType> p_types,
299                             GetPartitionsSqlType(*p_values));
300     col_types.insert(col_types.end(), p_types.begin(), p_types.end());
301 
302     RuntimeTable::Builder builder(GetUserData(ctx)->pool, ret_col_names,
303                                   col_types);
304 
305     // Partitions will be taken from the table which has the least number of
306     // them.
307     auto min_el = std::min_element(t_partitions.begin(), t_partitions.end(),
308                                    [](const auto& t_a, const auto& t_b) {
309                                      return t_a->size() < t_b->size();
310                                    });
311 
312     auto t_least_partitions =
313         static_cast<uint32_t>(std::distance(t_partitions.begin(), min_el));
314 
315     // The only partitions we should look at are partitions from the table
316     // with the least partitions.
317     const Partitions* p_intervals = t_partitions[t_least_partitions];
318 
319     // For each partition insert into table.
320     uint32_t rows = 0;
321     for (auto p_it = p_intervals->GetIterator(); p_it; ++p_it) {
322       std::vector<Partition*> cur_partition_in_table;
323       bool all_have_p = true;
324 
325       // From each table get all vectors of intervals.
326       for (uint32_t i = 0; i < tabc; i++) {
327         Partitions* t = t_partitions[i];
328         if (auto found = t->Find(p_it.key())) {
329           cur_partition_in_table.push_back(found);
330         } else {
331           all_have_p = false;
332           break;
333         }
334       }
335 
336       // Only push into the table if all tables have this partition present.
337       if (all_have_p) {
338         SQLITE_ASSIGN_OR_RETURN(ctx, uint32_t pushed_rows,
339                                 PushPartition(builder, cur_partition_in_table));
340         rows += pushed_rows;
341       }
342     }
343 
344     // Fill the dummy id columns with nulls.
345     for (uint32_t i = static_cast<uint32_t>(tabc); i < kIdCols; i++) {
346       SQLITE_RETURN_IF_ERROR(ctx, builder.AddNulls(i + kArgCols, rows));
347     }
348 
349     SQLITE_ASSIGN_OR_RETURN(ctx, std::unique_ptr<RuntimeTable> ret_tab,
350                             std::move(builder).Build(rows));
351 
352     return sqlite::result::UniquePointer(ctx, std::move(ret_tab), "TABLE");
353   }
354 };
355 
356 }  // namespace
357 
RegisterIntervalIntersectFunctions(PerfettoSqlEngine & engine,StringPool * pool)358 base::Status RegisterIntervalIntersectFunctions(PerfettoSqlEngine& engine,
359                                                 StringPool* pool) {
360   return engine.RegisterSqliteFunction<IntervalIntersect>(
361       std::make_unique<IntervalIntersect::UserDataContext>(
362           IntervalIntersect::UserDataContext{&engine, pool}));
363 }
364 
365 }  // namespace perfetto::trace_processor::perfetto_sql
366