xref: /aosp_15_r20/external/perfetto/ui/src/trace_processor/dataset.ts (revision 6dbdd20afdafa5e3ca9b8809fa73465d530080dc)
1// Copyright (C) 2024 The Android Open Source Project
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//      http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15import {assertUnreachable} from '../base/logging';
16import {getOrCreate} from '../base/utils';
17import {ColumnType, SqlValue} from './query_result';
18
19/**
20 * A dataset defines a set of rows in TraceProcessor and a schema of the
21 * resultant columns. Dataset implementations describe how to get the data in
22 * different ways - e.g. 'source' datasets define a dataset as a table name (or
23 * select statement) + filters, whereas a 'union' dataset defines a dataset as
24 * the union of other datasets.
25 *
26 * The idea is that users can build arbitrarily complex trees of datasets, then
27 * at any point call `optimize()` to create the smallest possible tree that
28 * represents the same dataset, and `query()` which produces a select statement
29 * for the resultant dataset.
30 *
31 * Users can also use the `schema` property and `implements()` to get and test
32 * the schema of a given dataset.
33 */
34export interface Dataset {
35  /**
36   * Get or calculate the resultant schema of this dataset.
37   */
38  readonly schema: DatasetSchema;
39
40  /**
41   * Produce a query for this dataset.
42   *
43   * @param schema - The schema to use for extracting columns - if undefined,
44   * the most specific possible schema is evaluated from the dataset first and
45   * used instead.
46   */
47  query(schema?: DatasetSchema): string;
48
49  /**
50   * Optimizes a dataset into the smallest possible expression.
51   *
52   * For example by combining elements of union data sets that have the same src
53   * and similar filters into a single set.
54   *
55   * For example, the following 'union' dataset...
56   *
57   * ```
58   * {
59   *   union: [
60   *     {
61   *       src: 'foo',
62   *       schema: {
63   *         'a': NUM,
64   *         'b': NUM,
65   *       },
66   *       filter: {col: 'a', eq: 1},
67   *     },
68   *     {
69   *       src: 'foo',
70   *       schema: {
71   *         'a': NUM,
72   *         'b': NUM,
73   *       },
74   *       filter: {col: 'a', eq: 2},
75   *     },
76   *   ]
77   * }
78   * ```
79   *
80   * ...will be combined into a single 'source' dataset...
81   *
82   * ```
83   * {
84   *   src: 'foo',
85   *   schema: {
86   *     'a': NUM,
87   *     'b': NUM,
88   *   },
89   *   filter: {col: 'a', in: [1, 2]},
90   * },
91   * ```
92   */
93  optimize(): Dataset;
94
95  /**
96   * Returns true if this dataset implements a given schema.
97   *
98   * @param schema - The schema to test against.
99   */
100  implements(schema: DatasetSchema): boolean;
101}
102
103/**
104 * Defines a list of columns and types that define the shape of the data
105 * represented by a dataset.
106 */
107export type DatasetSchema = Record<string, ColumnType>;
108
109/**
110 * A filter used to express that a column must equal a value.
111 */
112interface EqFilter {
113  readonly col: string;
114  readonly eq: SqlValue;
115}
116
117/**
118 * A filter used to express that column must be one of a set of values.
119 */
120interface InFilter {
121  readonly col: string;
122  readonly in: ReadonlyArray<SqlValue>;
123}
124
125/**
126 * Union of all filter types.
127 */
128type Filter = EqFilter | InFilter;
129
130/**
131 * Named arguments for a SourceDataset.
132 */
133interface SourceDatasetConfig {
134  readonly src: string;
135  readonly schema: DatasetSchema;
136  readonly filter?: Filter;
137}
138
139/**
140 * Defines a dataset with a source SQL select statement of table name, a
141 * schema describing the columns, and an optional filter.
142 */
143export class SourceDataset implements Dataset {
144  readonly src: string;
145  readonly schema: DatasetSchema;
146  readonly filter?: Filter;
147
148  constructor(config: SourceDatasetConfig) {
149    this.src = config.src;
150    this.schema = config.schema;
151    this.filter = config.filter;
152  }
153
154  query(schema?: DatasetSchema) {
155    schema = schema ?? this.schema;
156    const cols = Object.keys(schema);
157    const whereClause = this.filterToQuery();
158    return `select ${cols.join(', ')} from (${this.src}) ${whereClause}`.trim();
159  }
160
161  optimize() {
162    // Cannot optimize SourceDataset
163    return this;
164  }
165
166  implements(schema: DatasetSchema) {
167    return Object.entries(schema).every(([name, kind]) => {
168      return name in this.schema && this.schema[name] === kind;
169    });
170  }
171
172  private filterToQuery() {
173    const filter = this.filter;
174    if (filter === undefined) {
175      return '';
176    }
177    if ('eq' in filter) {
178      return `where ${filter.col} = ${filter.eq}`;
179    } else if ('in' in filter) {
180      return `where ${filter.col} in (${filter.in.join(',')})`;
181    } else {
182      assertUnreachable(filter);
183    }
184  }
185}
186
187/**
188 * A dataset that represents the union of multiple datasets.
189 */
190export class UnionDataset implements Dataset {
191  constructor(readonly union: ReadonlyArray<Dataset>) {}
192
193  get schema(): DatasetSchema {
194    // Find the minimal set of columns that are supported by all datasets of
195    // the union
196    let sch: Record<string, ColumnType> | undefined = undefined;
197    this.union.forEach((ds) => {
198      const dsSchema = ds.schema;
199      if (sch === undefined) {
200        // First time just use this one
201        sch = dsSchema;
202      } else {
203        const newSch: Record<string, ColumnType> = {};
204        for (const [key, kind] of Object.entries(sch)) {
205          if (key in dsSchema && dsSchema[key] === kind) {
206            newSch[key] = kind;
207          }
208        }
209        sch = newSch;
210      }
211    });
212    return sch ?? {};
213  }
214
215  query(schema?: DatasetSchema): string {
216    schema = schema ?? this.schema;
217    return this.union
218      .map((dataset) => dataset.query(schema))
219      .join(' union all ');
220  }
221
222  optimize(): Dataset {
223    // Recursively optimize each dataset of this union
224    const optimizedUnion = this.union.map((ds) => ds.optimize());
225
226    // Find all source datasets and combine then based on src
227    const combinedSrcSets = new Map<string, SourceDataset[]>();
228    const otherDatasets: Dataset[] = [];
229    for (const e of optimizedUnion) {
230      if (e instanceof SourceDataset) {
231        const set = getOrCreate(combinedSrcSets, e.src, () => []);
232        set.push(e);
233      } else {
234        otherDatasets.push(e);
235      }
236    }
237
238    const mergedSrcSets = Array.from(combinedSrcSets.values()).map(
239      (srcGroup) => {
240        if (srcGroup.length === 1) return srcGroup[0];
241
242        // Combine schema across all members in the union
243        const combinedSchema = srcGroup.reduce((acc, e) => {
244          Object.assign(acc, e.schema);
245          return acc;
246        }, {} as DatasetSchema);
247
248        // Merge filters for the same src
249        const inFilters: InFilter[] = [];
250        for (const {filter} of srcGroup) {
251          if (filter) {
252            if ('eq' in filter) {
253              inFilters.push({col: filter.col, in: [filter.eq]});
254            } else {
255              inFilters.push(filter);
256            }
257          }
258        }
259
260        const mergedFilter = mergeFilters(inFilters);
261        return new SourceDataset({
262          src: srcGroup[0].src,
263          schema: combinedSchema,
264          filter: mergedFilter,
265        });
266      },
267    );
268
269    const finalUnion = [...mergedSrcSets, ...otherDatasets];
270
271    if (finalUnion.length === 1) {
272      return finalUnion[0];
273    } else {
274      return new UnionDataset(finalUnion);
275    }
276  }
277
278  implements(schema: DatasetSchema) {
279    return Object.entries(schema).every(([name, kind]) => {
280      return name in this.schema && this.schema[name] === kind;
281    });
282  }
283}
284
285function mergeFilters(filters: InFilter[]): InFilter | undefined {
286  if (filters.length === 0) return undefined;
287  const col = filters[0].col;
288  const values = new Set(filters.flatMap((filter) => filter.in));
289  return {col, in: Array.from(values)};
290}
291