1#!/usr/bin/env python3 2# Copyright (C) 2022 The Android Open Source Project 3# 4# Licensed under the Apache License, Version 2.0 (the "License"); 5# you may not use this file except in compliance with the License. 6# You may obtain a copy of the License at 7# 8# http://www.apache.org/licenses/LICENSE-2.0 9# 10# Unless required by applicable law or agreed to in writing, software 11# distributed under the License is distributed on an "AS IS" BASIS, 12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13# See the License for the specific language governing permissions and 14# limitations under the License. 15 16from abc import ABC 17from dataclasses import dataclass 18import re 19import sys 20from typing import Dict, List, Optional, Set, NamedTuple 21 22from python.generators.sql_processing.docs_extractor import DocsExtractor 23from python.generators.sql_processing.utils import ObjKind 24from python.generators.sql_processing.utils import COLUMN_TYPES, MACRO_ARG_TYPES 25 26from python.generators.sql_processing.utils import ALLOWED_PREFIXES 27from python.generators.sql_processing.utils import OBJECT_NAME_ALLOWLIST 28 29from python.generators.sql_processing.utils import ANY_PATTERN 30from python.generators.sql_processing.utils import ARG_DEFINITION_PATTERN 31 32 33def _is_internal(name: str) -> bool: 34 return re.match(r'^_.*', name, re.IGNORECASE) is not None 35 36 37def _is_snake_case(s: str) -> bool: 38 return re.fullmatch(r'^[a-z_0-9]*$', s) is not None 39 40 41def parse_comment(comment: str) -> str: 42 """ 43 Parse a SQL comment (i.e. -- Foo\n -- bar.) into a string (i.e. "Foo bar."). 44 """ 45 return ' '.join(line.strip().lstrip('--').lstrip() 46 for line in comment.strip().split('\n')) 47 48 49def get_module_prefix_error(name: str, path: str, module: str) -> Optional[str]: 50 """Returns error message if the name is not correct, None otherwise.""" 51 if module in ["common", "prelude", "deprecated"]: 52 if name.startswith(module): 53 return (f'Names of tables/views/functions in the "{module}" module ' 54 f'should not start with {module}') 55 return None 56 if name.startswith(module): 57 # Module prefix is always allowed. 58 return None 59 allowed_prefixes = [module] 60 for (path_prefix, allowed_name_prefixes) in ALLOWED_PREFIXES.items(): 61 if path.startswith(path_prefix): 62 for prefix in allowed_name_prefixes: 63 if name.startswith(prefix): 64 return None 65 allowed_prefixes.extend(allowed_name_prefixes) 66 if path in OBJECT_NAME_ALLOWLIST and name in OBJECT_NAME_ALLOWLIST[path]: 67 return None 68 return ( 69 f'Names of tables/views/functions at path "{path}" should be prefixed ' 70 f'with one of following names: {", ".join(allowed_prefixes)}') 71 72 73class Arg(NamedTuple): 74 type: str 75 long_type: str 76 description: str 77 joinid_column: Optional[str] 78 79 80class AbstractDocParser(ABC): 81 82 @dataclass 83 class Column: 84 pass 85 86 def __init__(self, path: str, module: str): 87 self.path = path 88 self.module = module 89 self.name = None 90 self.errors = [] 91 92 def _parse_name(self, upper: bool = False): 93 assert self.name 94 assert isinstance(self.name, str) 95 module_prefix_error = get_module_prefix_error(self.name, self.path, 96 self.module) 97 if module_prefix_error is not None: 98 self._error(module_prefix_error) 99 return self.name.strip() 100 101 def _parse_desc_not_empty(self, desc: str): 102 if not desc: 103 self._error('Description of the table/view/function/macro is missing') 104 return desc.strip() 105 106 def _parse_columns(self, schema: str, kind: ObjKind) -> Dict[str, Arg]: 107 columns = self._parse_args_definition(schema) if schema else {} 108 for column_name, properties in columns.items(): 109 if not properties.description: 110 self._error(f'Column "{column_name}" is missing a description. Please add a ' 111 'comment in front of the column definition') 112 continue 113 114 upper_arg_type = properties.type.upper() 115 if kind is ObjKind.table_function: 116 if upper_arg_type not in COLUMN_TYPES: 117 self._error( 118 f'Table function column "{column_name}" has unsupported type "{properties.type}".') 119 elif kind is ObjKind.table_view: 120 if upper_arg_type not in COLUMN_TYPES: 121 self._error( 122 f'Table/view column "{column_name}" has unsupported type "{properties.type}".') 123 else: 124 self._error(f'This Perfetto SQL object doesnt support columns".') 125 126 return columns 127 128 def _parse_args(self, sql_args_str: str, kind: ObjKind) -> Dict[str, Arg]: 129 args = self._parse_args_definition(sql_args_str) 130 131 for arg in args: 132 if not args[arg].description: 133 self._error(f'Arg "{arg}" is missing a description. ' 134 'Please add a comment in front of the arg definition.') 135 136 upper_arg_type = args[arg].type.upper() 137 if (kind is ObjKind.function or kind is ObjKind.table_function): 138 if upper_arg_type not in COLUMN_TYPES: 139 self._error( 140 f'Function arg "{arg}" has unsupported type "{args[arg].type}".') 141 elif (kind is ObjKind.macro): 142 if upper_arg_type not in MACRO_ARG_TYPES: 143 self._error( 144 f'Macro arg "{arg}" has unsupported type "{args[arg].type}".') 145 else: 146 self._error(f'This Perfetto SQL object doesnt support types".') 147 148 return args 149 150 # Parse function argument definition list or a table schema, e.g. 151 # arg1 INT, arg2 STRING, including their comments. 152 def _parse_args_definition(self, args_str: str) -> Dict[str, Arg]: 153 result = {} 154 remaining_args = args_str.strip() 155 while remaining_args: 156 m = re.match(fr'^{ARG_DEFINITION_PATTERN}({ANY_PATTERN})', remaining_args) 157 if not m: 158 self._error(f'Expected "{args_str}" to correspond to ' 159 '"-- Comment\n arg_name TYPE" format ' 160 '({ARG_DEFINITION_PATTERN})') 161 return result 162 groups = m.groups() 163 comment = '' if groups[0] is None else parse_comment(groups[0]) 164 name = groups[-3] 165 type = groups[-2] 166 167 m = re.match(r'JOINID\(([_A-Za-z\.]*)\)', type) 168 if m: 169 result[name] = Arg('JOINID', type, comment, m.groups()[0]) 170 else: 171 result[name] = Arg(type, type, comment, None) 172 # Strip whitespace and comma and parse the next arg. 173 remaining_args = groups[-1].lstrip().lstrip(',').lstrip() 174 175 return result 176 177 def _error(self, error: str): 178 self.errors.append( 179 f'Error while parsing documentation for "{self.name}" in {self.path}: ' 180 f'{error}') 181 182 183class TableOrView: 184 name: str 185 type: str 186 desc: str 187 cols: Dict[str, Arg] 188 id_columns: List[str] 189 joinid_cols: Dict[str, Arg] 190 191 def __init__(self, name, type, desc, cols, id_columns, joinid_columns): 192 self.name = name 193 self.type = type 194 self.desc = desc 195 self.cols = cols 196 self.id_columns = id_columns 197 self.joinid_cols = joinid_columns 198 199 200class TableViewDocParser(AbstractDocParser): 201 """Parses documentation for CREATE TABLE and CREATE VIEW statements.""" 202 203 def __init__(self, path: str, module: str): 204 super().__init__(path, module) 205 206 def parse(self, doc: DocsExtractor.Extract) -> Optional[TableOrView]: 207 assert doc.obj_kind == ObjKind.table_view 208 209 or_replace, perfetto_or_virtual, type, self.name, schema = doc.obj_match 210 211 if or_replace is not None: 212 self._error( 213 f'{type} "{self.name}": CREATE OR REPLACE is not allowed in stdlib ' 214 f'as standard library modules can only included once. Please just ' 215 f'use CREATE instead.') 216 return 217 218 if _is_internal(self.name): 219 return None 220 221 if not schema and self.name.lower() != "window": 222 self._error( 223 f'{type} "{self.name}": schema is missing for a non-internal stdlib' 224 f' perfetto table or view') 225 return 226 227 if type.lower() == "table" and not perfetto_or_virtual: 228 self._error( 229 f'{type} "{self.name}": Can only expose CREATE PERFETTO tables') 230 return 231 232 is_virtual_table = type.lower() == "table" and perfetto_or_virtual.lower( 233 ) == "virtual" 234 if is_virtual_table and self.name.lower() != "window": 235 self._error(f'{type} "{self.name}": Virtual tables cannot be exposed.') 236 return 237 238 cols = self._parse_columns(schema, ObjKind.table_view) 239 id_columns = [] 240 joinid_cols = {} 241 242 for col_name, arg in cols.items(): 243 if arg.type == "ID": 244 id_columns.append(col_name) 245 elif arg.type == "JOINID": 246 joinid_cols[col_name] = arg 247 248 return TableOrView( 249 name=self._parse_name(), 250 type=type, 251 desc=self._parse_desc_not_empty(doc.description), 252 cols=self._parse_columns(schema, ObjKind.table_view), 253 id_columns=id_columns, 254 joinid_columns=joinid_cols) 255 256 257class Function: 258 name: str 259 desc: str 260 args: Dict[str, Arg] 261 return_type: str 262 return_desc: str 263 264 def __init__(self, name, desc, args, return_type, return_desc): 265 self.name = name 266 self.desc = desc 267 self.args = args 268 self.return_type = return_type 269 self.return_desc = return_desc 270 271 272class FunctionDocParser(AbstractDocParser): 273 """Parses documentation for CREATE_FUNCTION statements.""" 274 275 def __init__(self, path: str, module: str): 276 super().__init__(path, module) 277 278 def parse(self, doc: DocsExtractor.Extract) -> Optional[Function]: 279 or_replace, self.name, args, ret_comment, ret_type = doc.obj_match 280 281 if or_replace is not None: 282 self._error( 283 f'Function "{self.name}": CREATE OR REPLACE is not allowed in stdlib ' 284 f'as standard library modules can only included once. Please just ' 285 f'use CREATE instead.') 286 287 # Ignore internal functions. 288 if _is_internal(self.name): 289 return None 290 291 name = self._parse_name() 292 293 if not _is_snake_case(name): 294 self._error(f'Function name "{name}" is not snake_case' 295 f' (should be {name.casefold()})') 296 297 ret_desc = None if ret_comment is None else parse_comment(ret_comment) 298 if not ret_desc: 299 self._error(f'Function "{name}": return description is missing') 300 301 return Function( 302 name=name, 303 desc=self._parse_desc_not_empty(doc.description), 304 args=self._parse_args(args, ObjKind.function), 305 return_type=ret_type, 306 return_desc=ret_desc, 307 ) 308 309 310class TableFunction: 311 name: str 312 desc: str 313 cols: Dict[str, Arg] 314 args: Dict[str, Arg] 315 316 def __init__(self, name, desc, cols, args): 317 self.name = name 318 self.desc = desc 319 self.cols = cols 320 self.args = args 321 322 323class TableFunctionDocParser(AbstractDocParser): 324 """Parses documentation for table function statements.""" 325 326 def __init__(self, path: str, module: str): 327 super().__init__(path, module) 328 329 def parse(self, doc: DocsExtractor.Extract) -> Optional[TableFunction]: 330 or_replace, self.name, args, ret_comment, columns = doc.obj_match 331 332 if or_replace is not None: 333 self._error( 334 f'Function "{self.name}": CREATE OR REPLACE is not allowed in stdlib ' 335 f'as standard library modules can only included once. Please just ' 336 f'use CREATE instead.') 337 return 338 339 # Ignore internal functions. 340 if _is_internal(self.name): 341 return None 342 343 name = self._parse_name() 344 345 if not _is_snake_case(name): 346 self._error(f'Function name "{name}" is not snake_case' 347 f' (should be "{name.casefold()}")') 348 349 return TableFunction( 350 name=name, 351 desc=self._parse_desc_not_empty(doc.description), 352 cols=self._parse_columns(columns, ObjKind.table_function), 353 args=self._parse_args(args, ObjKind.table_function), 354 ) 355 356 357class Macro: 358 name: str 359 desc: str 360 return_desc: str 361 return_type: str 362 args: Dict[str, Arg] 363 364 def __init__(self, name: str, desc: str, return_desc: str, return_type: str, 365 args: Dict[str, Arg]): 366 self.name = name 367 self.desc = desc 368 self.return_desc = return_desc 369 self.return_type = return_type 370 self.args = args 371 372 373class MacroDocParser(AbstractDocParser): 374 """Parses documentation for macro statements.""" 375 376 def __init__(self, path: str, module: str): 377 super().__init__(path, module) 378 379 def parse(self, doc: DocsExtractor.Extract) -> Optional[Macro]: 380 or_replace, self.name, args, return_desc, return_type = doc.obj_match 381 382 if or_replace is not None: 383 self._error( 384 f'Function "{self.name}": CREATE OR REPLACE is not allowed in stdlib ' 385 f'as standard library modules can only included once. Please just ' 386 f'use CREATE instead.') 387 388 # Ignore internal macros. 389 if _is_internal(self.name): 390 return None 391 392 name = self._parse_name() 393 394 if not _is_snake_case(name): 395 self._error(f'Macro name "{name}" is not snake_case' 396 f' (should be "{name.casefold()}")') 397 398 return Macro( 399 name=name, 400 desc=self._parse_desc_not_empty(doc.description), 401 return_desc=parse_comment(return_desc), 402 return_type=return_type, 403 args=self._parse_args(args, ObjKind.macro), 404 ) 405 406 407class Include: 408 package: str 409 module: str 410 module_as_list: List[str] 411 412 def __init__(self, package: str, module: str, module_as_list: List[str]): 413 self.package = package 414 self.module = module 415 self.module_as_list = module_as_list 416 417 418class IncludeParser(AbstractDocParser): 419 """Parses the includes of module.""" 420 421 def __init__(self, path: str, module: str): 422 super().__init__(path, module) 423 424 def parse(self, doc: DocsExtractor.Extract) -> Optional[Include]: 425 self.module = list(doc.obj_match)[0] 426 module_as_list = self.module.split('.') 427 428 return Include( 429 package=module_as_list[0], 430 module=self.module, 431 module_as_list=module_as_list, 432 ) 433 434 435class ParsedModule: 436 """Data class containing all of the documentation of single SQL file""" 437 package_name: str = "" 438 module_as_list: List[str] 439 module: str 440 errors: List[str] = [] 441 table_views: List[TableOrView] = [] 442 functions: List[Function] = [] 443 table_functions: List[TableFunction] = [] 444 macros: List[Macro] = [] 445 includes: List[Include] 446 id_columns: Dict[str, List[str]] 447 448 def __init__(self, package_name: str, module_as_list: List[str], 449 errors: List[str], table_views: List[TableOrView], 450 functions: List[Function], table_functions: List[TableFunction], 451 macros: List[Macro], includes: List[Include]): 452 self.package_name = package_name 453 self.module_as_list = module_as_list 454 self.module = ".".join(module_as_list) 455 self.errors = errors 456 self.table_views = table_views 457 self.functions = functions 458 self.table_functions = table_functions 459 self.macros = macros 460 self.includes = includes 461 self.id_columns = {o.name: o.id_columns for o in table_views} 462 463 464def parse_file(path: str, sql: str) -> Optional[ParsedModule]: 465 """Reads the provided SQL and, if possible, generates a dictionary with data 466 from documentation together with errors from validation of the schema.""" 467 if sys.platform.startswith('win'): 468 path = path.replace('\\', '/') 469 470 module_as_list: List[str] = path.split('/stdlib/')[-1].split(".sql")[0].split( 471 '/') 472 473 # Get package name 474 package_name = module_as_list[0] 475 476 # Disable support for `deprecated` package 477 if package_name == "deprecated": 478 return 479 480 # Extract all the docs from the SQL. 481 extractor = DocsExtractor(path, package_name, sql) 482 docs = extractor.extract() 483 if extractor.errors: 484 return ParsedModule(package_name, module_as_list, extractor.errors, [], [], 485 [], [], []) 486 487 # Parse the extracted docs. 488 errors: List[str] = [] 489 table_views: List[TableOrView] = [] 490 functions: List[Function] = [] 491 table_functions: List[TableFunction] = [] 492 macros: List[Macro] = [] 493 includes: List[Include] = [] 494 for doc in docs: 495 if doc.obj_kind == ObjKind.table_view: 496 parser = TableViewDocParser(path, package_name) 497 res = parser.parse(doc) 498 if res: 499 table_views.append(res) 500 errors += parser.errors 501 if doc.obj_kind == ObjKind.function: 502 parser = FunctionDocParser(path, package_name) 503 res = parser.parse(doc) 504 if res: 505 functions.append(res) 506 errors += parser.errors 507 if doc.obj_kind == ObjKind.table_function: 508 parser = TableFunctionDocParser(path, package_name) 509 res = parser.parse(doc) 510 if res: 511 table_functions.append(res) 512 errors += parser.errors 513 if doc.obj_kind == ObjKind.macro: 514 parser = MacroDocParser(path, package_name) 515 res = parser.parse(doc) 516 if res: 517 macros.append(res) 518 errors += parser.errors 519 if doc.obj_kind == ObjKind.include: 520 parser = IncludeParser(path, package_name) 521 res = parser.parse(doc) 522 if res: 523 includes.append(res) 524 errors += parser.errors 525 526 return ParsedModule(package_name, module_as_list, errors, table_views, 527 functions, table_functions, macros, includes) 528