1// Copyright (C) 2024 The Android Open Source Project 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15import {assertUnreachable} from '../base/logging'; 16import {getOrCreate} from '../base/utils'; 17import {ColumnType, SqlValue} from './query_result'; 18 19/** 20 * A dataset defines a set of rows in TraceProcessor and a schema of the 21 * resultant columns. Dataset implementations describe how to get the data in 22 * different ways - e.g. 'source' datasets define a dataset as a table name (or 23 * select statement) + filters, whereas a 'union' dataset defines a dataset as 24 * the union of other datasets. 25 * 26 * The idea is that users can build arbitrarily complex trees of datasets, then 27 * at any point call `optimize()` to create the smallest possible tree that 28 * represents the same dataset, and `query()` which produces a select statement 29 * for the resultant dataset. 30 * 31 * Users can also use the `schema` property and `implements()` to get and test 32 * the schema of a given dataset. 33 */ 34export interface Dataset { 35 /** 36 * Get or calculate the resultant schema of this dataset. 37 */ 38 readonly schema: DatasetSchema; 39 40 /** 41 * Produce a query for this dataset. 42 * 43 * @param schema - The schema to use for extracting columns - if undefined, 44 * the most specific possible schema is evaluated from the dataset first and 45 * used instead. 46 */ 47 query(schema?: DatasetSchema): string; 48 49 /** 50 * Optimizes a dataset into the smallest possible expression. 51 * 52 * For example by combining elements of union data sets that have the same src 53 * and similar filters into a single set. 54 * 55 * For example, the following 'union' dataset... 56 * 57 * ``` 58 * { 59 * union: [ 60 * { 61 * src: 'foo', 62 * schema: { 63 * 'a': NUM, 64 * 'b': NUM, 65 * }, 66 * filter: {col: 'a', eq: 1}, 67 * }, 68 * { 69 * src: 'foo', 70 * schema: { 71 * 'a': NUM, 72 * 'b': NUM, 73 * }, 74 * filter: {col: 'a', eq: 2}, 75 * }, 76 * ] 77 * } 78 * ``` 79 * 80 * ...will be combined into a single 'source' dataset... 81 * 82 * ``` 83 * { 84 * src: 'foo', 85 * schema: { 86 * 'a': NUM, 87 * 'b': NUM, 88 * }, 89 * filter: {col: 'a', in: [1, 2]}, 90 * }, 91 * ``` 92 */ 93 optimize(): Dataset; 94 95 /** 96 * Returns true if this dataset implements a given schema. 97 * 98 * @param schema - The schema to test against. 99 */ 100 implements(schema: DatasetSchema): boolean; 101} 102 103/** 104 * Defines a list of columns and types that define the shape of the data 105 * represented by a dataset. 106 */ 107export type DatasetSchema = Record<string, ColumnType>; 108 109/** 110 * A filter used to express that a column must equal a value. 111 */ 112interface EqFilter { 113 readonly col: string; 114 readonly eq: SqlValue; 115} 116 117/** 118 * A filter used to express that column must be one of a set of values. 119 */ 120interface InFilter { 121 readonly col: string; 122 readonly in: ReadonlyArray<SqlValue>; 123} 124 125/** 126 * Union of all filter types. 127 */ 128type Filter = EqFilter | InFilter; 129 130/** 131 * Named arguments for a SourceDataset. 132 */ 133interface SourceDatasetConfig { 134 readonly src: string; 135 readonly schema: DatasetSchema; 136 readonly filter?: Filter; 137} 138 139/** 140 * Defines a dataset with a source SQL select statement of table name, a 141 * schema describing the columns, and an optional filter. 142 */ 143export class SourceDataset implements Dataset { 144 readonly src: string; 145 readonly schema: DatasetSchema; 146 readonly filter?: Filter; 147 148 constructor(config: SourceDatasetConfig) { 149 this.src = config.src; 150 this.schema = config.schema; 151 this.filter = config.filter; 152 } 153 154 query(schema?: DatasetSchema) { 155 schema = schema ?? this.schema; 156 const cols = Object.keys(schema); 157 const whereClause = this.filterToQuery(); 158 return `select ${cols.join(', ')} from (${this.src}) ${whereClause}`.trim(); 159 } 160 161 optimize() { 162 // Cannot optimize SourceDataset 163 return this; 164 } 165 166 implements(schema: DatasetSchema) { 167 return Object.entries(schema).every(([name, kind]) => { 168 return name in this.schema && this.schema[name] === kind; 169 }); 170 } 171 172 private filterToQuery() { 173 const filter = this.filter; 174 if (filter === undefined) { 175 return ''; 176 } 177 if ('eq' in filter) { 178 return `where ${filter.col} = ${filter.eq}`; 179 } else if ('in' in filter) { 180 return `where ${filter.col} in (${filter.in.join(',')})`; 181 } else { 182 assertUnreachable(filter); 183 } 184 } 185} 186 187/** 188 * A dataset that represents the union of multiple datasets. 189 */ 190export class UnionDataset implements Dataset { 191 constructor(readonly union: ReadonlyArray<Dataset>) {} 192 193 get schema(): DatasetSchema { 194 // Find the minimal set of columns that are supported by all datasets of 195 // the union 196 let sch: Record<string, ColumnType> | undefined = undefined; 197 this.union.forEach((ds) => { 198 const dsSchema = ds.schema; 199 if (sch === undefined) { 200 // First time just use this one 201 sch = dsSchema; 202 } else { 203 const newSch: Record<string, ColumnType> = {}; 204 for (const [key, kind] of Object.entries(sch)) { 205 if (key in dsSchema && dsSchema[key] === kind) { 206 newSch[key] = kind; 207 } 208 } 209 sch = newSch; 210 } 211 }); 212 return sch ?? {}; 213 } 214 215 query(schema?: DatasetSchema): string { 216 schema = schema ?? this.schema; 217 return this.union 218 .map((dataset) => dataset.query(schema)) 219 .join(' union all '); 220 } 221 222 optimize(): Dataset { 223 // Recursively optimize each dataset of this union 224 const optimizedUnion = this.union.map((ds) => ds.optimize()); 225 226 // Find all source datasets and combine then based on src 227 const combinedSrcSets = new Map<string, SourceDataset[]>(); 228 const otherDatasets: Dataset[] = []; 229 for (const e of optimizedUnion) { 230 if (e instanceof SourceDataset) { 231 const set = getOrCreate(combinedSrcSets, e.src, () => []); 232 set.push(e); 233 } else { 234 otherDatasets.push(e); 235 } 236 } 237 238 const mergedSrcSets = Array.from(combinedSrcSets.values()).map( 239 (srcGroup) => { 240 if (srcGroup.length === 1) return srcGroup[0]; 241 242 // Combine schema across all members in the union 243 const combinedSchema = srcGroup.reduce((acc, e) => { 244 Object.assign(acc, e.schema); 245 return acc; 246 }, {} as DatasetSchema); 247 248 // Merge filters for the same src 249 const inFilters: InFilter[] = []; 250 for (const {filter} of srcGroup) { 251 if (filter) { 252 if ('eq' in filter) { 253 inFilters.push({col: filter.col, in: [filter.eq]}); 254 } else { 255 inFilters.push(filter); 256 } 257 } 258 } 259 260 const mergedFilter = mergeFilters(inFilters); 261 return new SourceDataset({ 262 src: srcGroup[0].src, 263 schema: combinedSchema, 264 filter: mergedFilter, 265 }); 266 }, 267 ); 268 269 const finalUnion = [...mergedSrcSets, ...otherDatasets]; 270 271 if (finalUnion.length === 1) { 272 return finalUnion[0]; 273 } else { 274 return new UnionDataset(finalUnion); 275 } 276 } 277 278 implements(schema: DatasetSchema) { 279 return Object.entries(schema).every(([name, kind]) => { 280 return name in this.schema && this.schema[name] === kind; 281 }); 282 } 283} 284 285function mergeFilters(filters: InFilter[]): InFilter | undefined { 286 if (filters.length === 0) return undefined; 287 const col = filters[0].col; 288 const values = new Set(filters.flatMap((filter) => filter.in)); 289 return {col, in: Array.from(values)}; 290} 291