// Copyright (C) 2024 The Android Open Source Project // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. import {assertUnreachable} from '../base/logging'; import {getOrCreate} from '../base/utils'; import {ColumnType, SqlValue} from './query_result'; /** * A dataset defines a set of rows in TraceProcessor and a schema of the * resultant columns. Dataset implementations describe how to get the data in * different ways - e.g. 'source' datasets define a dataset as a table name (or * select statement) + filters, whereas a 'union' dataset defines a dataset as * the union of other datasets. * * The idea is that users can build arbitrarily complex trees of datasets, then * at any point call `optimize()` to create the smallest possible tree that * represents the same dataset, and `query()` which produces a select statement * for the resultant dataset. * * Users can also use the `schema` property and `implements()` to get and test * the schema of a given dataset. */ export interface Dataset { /** * Get or calculate the resultant schema of this dataset. */ readonly schema: DatasetSchema; /** * Produce a query for this dataset. * * @param schema - The schema to use for extracting columns - if undefined, * the most specific possible schema is evaluated from the dataset first and * used instead. */ query(schema?: DatasetSchema): string; /** * Optimizes a dataset into the smallest possible expression. * * For example by combining elements of union data sets that have the same src * and similar filters into a single set. * * For example, the following 'union' dataset... * * ``` * { * union: [ * { * src: 'foo', * schema: { * 'a': NUM, * 'b': NUM, * }, * filter: {col: 'a', eq: 1}, * }, * { * src: 'foo', * schema: { * 'a': NUM, * 'b': NUM, * }, * filter: {col: 'a', eq: 2}, * }, * ] * } * ``` * * ...will be combined into a single 'source' dataset... * * ``` * { * src: 'foo', * schema: { * 'a': NUM, * 'b': NUM, * }, * filter: {col: 'a', in: [1, 2]}, * }, * ``` */ optimize(): Dataset; /** * Returns true if this dataset implements a given schema. * * @param schema - The schema to test against. */ implements(schema: DatasetSchema): boolean; } /** * Defines a list of columns and types that define the shape of the data * represented by a dataset. */ export type DatasetSchema = Record; /** * A filter used to express that a column must equal a value. */ interface EqFilter { readonly col: string; readonly eq: SqlValue; } /** * A filter used to express that column must be one of a set of values. */ interface InFilter { readonly col: string; readonly in: ReadonlyArray; } /** * Union of all filter types. */ type Filter = EqFilter | InFilter; /** * Named arguments for a SourceDataset. */ interface SourceDatasetConfig { readonly src: string; readonly schema: DatasetSchema; readonly filter?: Filter; } /** * Defines a dataset with a source SQL select statement of table name, a * schema describing the columns, and an optional filter. */ export class SourceDataset implements Dataset { readonly src: string; readonly schema: DatasetSchema; readonly filter?: Filter; constructor(config: SourceDatasetConfig) { this.src = config.src; this.schema = config.schema; this.filter = config.filter; } query(schema?: DatasetSchema) { schema = schema ?? this.schema; const cols = Object.keys(schema); const whereClause = this.filterToQuery(); return `select ${cols.join(', ')} from (${this.src}) ${whereClause}`.trim(); } optimize() { // Cannot optimize SourceDataset return this; } implements(schema: DatasetSchema) { return Object.entries(schema).every(([name, kind]) => { return name in this.schema && this.schema[name] === kind; }); } private filterToQuery() { const filter = this.filter; if (filter === undefined) { return ''; } if ('eq' in filter) { return `where ${filter.col} = ${filter.eq}`; } else if ('in' in filter) { return `where ${filter.col} in (${filter.in.join(',')})`; } else { assertUnreachable(filter); } } } /** * A dataset that represents the union of multiple datasets. */ export class UnionDataset implements Dataset { constructor(readonly union: ReadonlyArray) {} get schema(): DatasetSchema { // Find the minimal set of columns that are supported by all datasets of // the union let sch: Record | undefined = undefined; this.union.forEach((ds) => { const dsSchema = ds.schema; if (sch === undefined) { // First time just use this one sch = dsSchema; } else { const newSch: Record = {}; for (const [key, kind] of Object.entries(sch)) { if (key in dsSchema && dsSchema[key] === kind) { newSch[key] = kind; } } sch = newSch; } }); return sch ?? {}; } query(schema?: DatasetSchema): string { schema = schema ?? this.schema; return this.union .map((dataset) => dataset.query(schema)) .join(' union all '); } optimize(): Dataset { // Recursively optimize each dataset of this union const optimizedUnion = this.union.map((ds) => ds.optimize()); // Find all source datasets and combine then based on src const combinedSrcSets = new Map(); const otherDatasets: Dataset[] = []; for (const e of optimizedUnion) { if (e instanceof SourceDataset) { const set = getOrCreate(combinedSrcSets, e.src, () => []); set.push(e); } else { otherDatasets.push(e); } } const mergedSrcSets = Array.from(combinedSrcSets.values()).map( (srcGroup) => { if (srcGroup.length === 1) return srcGroup[0]; // Combine schema across all members in the union const combinedSchema = srcGroup.reduce((acc, e) => { Object.assign(acc, e.schema); return acc; }, {} as DatasetSchema); // Merge filters for the same src const inFilters: InFilter[] = []; for (const {filter} of srcGroup) { if (filter) { if ('eq' in filter) { inFilters.push({col: filter.col, in: [filter.eq]}); } else { inFilters.push(filter); } } } const mergedFilter = mergeFilters(inFilters); return new SourceDataset({ src: srcGroup[0].src, schema: combinedSchema, filter: mergedFilter, }); }, ); const finalUnion = [...mergedSrcSets, ...otherDatasets]; if (finalUnion.length === 1) { return finalUnion[0]; } else { return new UnionDataset(finalUnion); } } implements(schema: DatasetSchema) { return Object.entries(schema).every(([name, kind]) => { return name in this.schema && this.schema[name] === kind; }); } } function mergeFilters(filters: InFilter[]): InFilter | undefined { if (filters.length === 0) return undefined; const col = filters[0].col; const values = new Set(filters.flatMap((filter) => filter.in)); return {col, in: Array.from(values)}; }