xref: /aosp_15_r20/external/perfetto/python/generators/sql_processing/utils.py (revision 6dbdd20afdafa5e3ca9b8809fa73465d530080dc)
1# Copyright (C) 2022 The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#      http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15from enum import Enum
16import re
17import os
18from typing import Dict, List
19
20ALLOWED_PREFIXES = {
21    'android': ['heap_graph', 'memory'],
22    'counters': ['counter'],
23    'chrome/util': ['cr'],
24    'intervals': ['interval'],
25    'graphs': ['graph'],
26    'slices': ['slice', 'thread_slice', 'process_slice'],
27    'linux': ['cpu', 'memory'],
28    'stacks': ['cpu_profiling'],
29}
30
31# Allows for nonstandard object names.
32OBJECT_NAME_ALLOWLIST = {
33    'graphs/partition.sql': ['tree_structural_partition_by_group'],
34}
35
36COLUMN_TYPES = [
37    # Standard types
38    'LONG',
39    'DOUBLE',
40    'STRING',
41    'BOOL',
42    'BYTES',
43
44    # Special types
45    'TIMESTAMP',
46    'DURATION',
47    'ID',
48    'JOINID',
49    'ARGSETID'
50]
51
52MACRO_ARG_TYPES = ['TABLEORSUBQUERY', 'EXPR', 'COLUMNNAME']
53
54NAME = r'[a-zA-Z_\d\{\}]+'
55ANY_WORDS = r'[^\s].*'
56ANY_NON_QUOTE = r'[^\']*.*'
57TYPE = r'[_a-zA-Z\(\)\.]+'
58SQL = r'[\s\S]*?'
59WS = r'\s*'
60
61COMMENT = r' --[^\n]*\n'
62COMMENTS = rf'(?:{COMMENT})*'
63ARG = rf'{COMMENTS} {NAME} {TYPE}'
64ARG_PATTERN = rf'({COMMENTS}) ({NAME}) ({TYPE})'
65ARGS = rf'(?:{ARG})?(?: ,{ARG})*'
66
67
68# Make the pattern more readable by allowing the use of spaces
69# and replace then with a wildcard in a separate step.
70# NOTE: two whitespaces next to each other are really bad for performance.
71# Take special care to avoid them.
72def update_pattern(pattern):
73  return pattern.replace(' ', WS)
74
75
76CREATE_TABLE_VIEW_PATTERN = update_pattern(
77    # Match create table/view and catch type
78    fr'^CREATE (OR REPLACE)? (VIRTUAL|PERFETTO)?'
79    fr' (TABLE|VIEW) (?:IF NOT EXISTS)?'
80    # Catch the name and optional schema.
81    fr' ({NAME}) (?: \( ({ARGS}) \) )? (?:AS|USING)? .*')
82
83CREATE_TABLE_AS_PATTERN = update_pattern(fr'^CREATE TABLE ({NAME}) AS')
84
85CREATE_VIEW_AS_PATTERN = update_pattern(fr'^CREATE VIEW ({NAME}) AS')
86
87DROP_TABLE_VIEW_PATTERN = update_pattern(
88    fr'^DROP (VIEW|TABLE|INDEX) (?:IF EXISTS)? ({NAME});$')
89
90INCLUDE_ALL_PATTERN = update_pattern(
91    fr'^INCLUDE PERFETTO MODULE [a-zA-Z0-9_\.]*\*;')
92
93CREATE_FUNCTION_PATTERN = update_pattern(
94    # Function name.
95    fr"CREATE (OR REPLACE)? PERFETTO FUNCTION ({NAME}) "
96    # Args: anything in the brackets.
97    fr" \( ({ARGS}) \)"
98    # Type: word after RETURNS.
99    fr"({COMMENTS})"
100    fr" RETURNS ({TYPE}) AS ")
101
102CREATE_TABLE_FUNCTION_PATTERN = update_pattern(
103    fr"CREATE (OR REPLACE)? PERFETTO FUNCTION ({NAME}) "
104    # Args: anything in the brackets.
105    fr" \( ({ARGS}) \) "
106    # Type: table definition after RETURNS.
107    fr"({COMMENTS})"
108    fr" RETURNS TABLE \( ({ARGS}) \) AS ")
109
110CREATE_MACRO_PATTERN = update_pattern(
111    fr"CREATE (OR REPLACE)? PERFETTO MACRO ({NAME}) "
112    # Args: anything in the brackets.
113    fr" \( ({ARGS}) \) "
114    # Type: word after RETURNS.
115    fr"({COMMENTS})"
116    fr" RETURNS ({TYPE})")
117
118INCLUDE_PATTERN = update_pattern(fr'^INCLUDE PERFETTO MODULE ([A-Za-z_.*]*);$')
119
120NAME_AND_TYPE_PATTERN = update_pattern(fr' ({NAME})\s+({TYPE}) ')
121
122ARG_DEFINITION_PATTERN = update_pattern(ARG_PATTERN)
123
124FUNCTION_RETURN_PATTERN = update_pattern(fr'^ ({TYPE})\s+({ANY_WORDS})')
125
126ANY_PATTERN = r'(?:\s|.)*'
127
128
129class ObjKind(str, Enum):
130  table_view = 'table_view'
131  function = 'function'
132  table_function = 'table_function'
133  macro = 'macro'
134  include = 'include'
135
136
137PATTERN_BY_KIND = {
138    ObjKind.table_view: CREATE_TABLE_VIEW_PATTERN,
139    ObjKind.function: CREATE_FUNCTION_PATTERN,
140    ObjKind.table_function: CREATE_TABLE_FUNCTION_PATTERN,
141    ObjKind.macro: CREATE_MACRO_PATTERN,
142    ObjKind.include: INCLUDE_PATTERN
143}
144
145
146# Given a regex pattern and a string to match against, returns all the
147# matching positions. Specifically, it returns a dictionary from the line
148# number of the match to the regex match object.
149# Note: this resuts a dict[int, re.Match], but re.Match exists only in later
150# versions of python3, prior to that it was _sre.SRE_Match.
151def match_pattern(pattern: str, file_str: str) -> Dict[int, object]:
152  line_number_to_matches = {}
153  for match in re.finditer(pattern, file_str, re.MULTILINE):
154    line_id = file_str[:match.start()].count('\n')
155    line_number_to_matches[line_id] = match.groups()
156  return line_number_to_matches
157
158
159# Given a list of lines in a text and the line number, scans backwards to find
160# all the comments.
161def extract_comment(lines: List[str], line_number: int) -> List[str]:
162  comments = []
163  for line in lines[line_number - 1::-1]:
164    # Break on empty line, as that suggests it is no longer a part of
165    # this comment.
166    if not line or not line.startswith('--'):
167      break
168    comments.append(line)
169
170  # Reverse as the above was reversed
171  comments.reverse()
172  return comments
173
174
175# Given SQL string check whether any of the words is used, and create error
176# string if needed.
177def check_banned_words(sql: str) -> List[str]:
178  lines = [l.strip() for l in sql.split('\n')]
179  errors = []
180
181  # Ban the use of LIKE in non-comment lines.
182  for line in lines:
183    if line.startswith('--'):
184      continue
185
186    if 'like' in line.casefold():
187      errors.append(
188          'LIKE is banned in trace processor metrics. Prefer GLOB instead.\n')
189      continue
190
191    if 'create_function' in line.casefold():
192      errors.append('CREATE_FUNCTION is deprecated in trace processor. '
193                    'Use CREATE PERFETTO FUNCTION instead.')
194
195    if 'create_view_function' in line.casefold():
196      errors.append('CREATE_VIEW_FUNCTION is deprecated in trace processor. '
197                    'Use CREATE PERFETTO FUNCTION $name RETURNS TABLE instead.')
198
199    if 'import(' in line.casefold():
200      errors.append('SELECT IMPORT is deprecated in trace processor. '
201                    'Use INCLUDE PERFETTO MODULE instead.')
202
203  return errors
204
205
206# Given SQL string check whether there is (not allowlisted) usage of
207# CREATE TABLE {name} AS.
208def check_banned_create_table_as(sql: str) -> List[str]:
209  errors = []
210  for _, matches in match_pattern(CREATE_TABLE_AS_PATTERN, sql).items():
211    name = matches[0]
212    if name != "_trace_bounds":
213      errors.append(
214          f"Table '{name}' uses CREATE TABLE which is deprecated "
215          "and this table is not allowlisted. Use CREATE PERFETTO TABLE.")
216  return errors
217
218
219# Given SQL string check whether there is usage of CREATE VIEW {name} AS.
220def check_banned_create_view_as(sql: str) -> List[str]:
221  errors = []
222  for _, matches in match_pattern(CREATE_VIEW_AS_PATTERN, sql).items():
223    name = matches[0]
224    errors.append(f"CREATE VIEW '{name}' is deprecated. "
225                  "Use CREATE PERFETTO VIEW instead.")
226  return errors
227
228
229# Given SQL string check whether there is usage of DROP TABLE/VIEW/MACRO/INDEX.
230def check_banned_drop(sql: str) -> List[str]:
231  errors = []
232  for _, matches in match_pattern(DROP_TABLE_VIEW_PATTERN, sql).items():
233    sql_type = matches[0]
234    name = matches[1]
235    errors.append(f"Dropping object {sql_type} '{name}' is banned.")
236  return errors
237
238
239# Given SQL string check whether there is usage of CREATE VIEW {name} AS.
240def check_banned_include_all(sql: str) -> List[str]:
241  errors = []
242  for _, matches in match_pattern(INCLUDE_ALL_PATTERN, sql).items():
243    errors.append(
244        "INCLUDE PERFETTO MODULE with wildcards is not allowed in stdlib. "
245        "Import specific modules instead.")
246  return errors
247