1# Copyright (C) 2022 The Android Open Source Project 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14import argparse 15import csv 16import datetime 17import enum 18import logging 19import re 20import statistics 21import subprocess 22import textwrap 23from pathlib import Path 24 25from typing import Iterable, NewType, TextIO, TypeVar 26 27import plot_metrics 28import util 29 30Row = NewType("Row", dict[str, str]) 31 32 33# modify the row in-place 34def _normalize_rebuild(row: Row): 35 row["description"] = re.sub( 36 r"^(rebuild)-[\d+](.*)$", "\\1\\2", row.get("description") 37 ) 38 39def _get_tagged_build_type(row: Row) -> str: 40 build_type = row.get("build_type") 41 tag = row.get("tag") 42 return build_type if not tag else f"{build_type}:{tag}" 43 44def _build_types(rows: list[Row]) -> list[str]: 45 return list(dict.fromkeys(_get_tagged_build_type(row) for row in rows).keys()) 46 47 48def _write_table(lines: list[list[str]]) -> str: 49 def join_cells(line: list[str]) -> str: 50 return ",".join(str(cell) for cell in line) 51 52 return "\n".join(join_cells(line) for line in lines) + "\n" 53 54 55class Aggregation(enum.Enum): 56 # naked function as value assignment doesn't seem to work, 57 # hence wrapping in a singleton tuple 58 AVG = (statistics.mean,) 59 MAX = (max,) 60 MEDIAN = (statistics.median,) 61 MIN = (min,) 62 STDEV = (statistics.stdev,) 63 64 N = TypeVar("N", int, float) 65 66 def fn(self, xs: Iterable[N]) -> N: 67 return self.value[0](xs) 68 69 70def _aggregate(prop: str, rows: list[Row], agg: Aggregation) -> str: 71 """ 72 compute the requested aggregation 73 :return formatted values 74 """ 75 if not rows: 76 return "" 77 vals = [x.get(prop) for x in rows] 78 vals = [x for x in vals if bool(x)] 79 if len(vals) == 0: 80 return "" 81 82 isnum = any(x.isnumeric() for x in vals) 83 if isnum: 84 vals = [int(x) for x in vals] 85 cell = f"{(agg.fn(vals)):.0f}" 86 else: 87 vals = [util.period_to_seconds(x) for x in vals] 88 cell = util.hhmmss(datetime.timedelta(seconds=agg.fn(vals))) 89 90 if len(vals) > 1: 91 cell = f"{cell}[N={len(vals)}]" 92 return cell 93 94 95def acceptable(row: Row) -> bool: 96 failure = row.get("build_result") == "FAILED" 97 if failure: 98 logging.error(f"Skipping {row.get('description')}/{row.get('build_type')}") 99 return not failure 100 101 102def summarize_helper(metrics: TextIO, regex: str, agg: Aggregation) -> dict[str, str]: 103 """ 104 Args: 105 metrics: csv detailed input, each row corresponding to a build 106 regex: regex matching properties to be summarized 107 agg: aggregation to use 108 """ 109 reader: csv.DictReader = csv.DictReader(metrics) 110 111 # get all matching properties 112 p = re.compile(regex) 113 properties = [f for f in reader.fieldnames if p.search(f)] 114 if len(properties) == 0: 115 logging.error("no matching properties found") 116 return {} 117 118 all_rows: list[Row] = [row for row in reader if acceptable(row)] 119 for row in all_rows: 120 _normalize_rebuild(row) 121 build_types: list[str] = _build_types(all_rows) 122 by_cuj: dict[str, list[Row]] = util.groupby( 123 all_rows, lambda l: l.get("description") 124 ) 125 126 def extract_lines_for_cuj(prop, cuj, cuj_rows) -> list[list[str]]: 127 by_targets = util.groupby(cuj_rows, lambda l: l.get("targets")) 128 lines = [] 129 for targets, target_rows in by_targets.items(): 130 by_build_type = util.groupby(target_rows, _get_tagged_build_type) 131 vals = [ 132 _aggregate(prop, by_build_type.get(build_type), agg) 133 for build_type in build_types 134 ] 135 lines.append([cuj, targets, *vals]) 136 return lines 137 138 def tabulate(prop) -> str: 139 headers = ["cuj", "targets"] + build_types 140 lines: list[list[str]] = [headers] 141 for cuj, cuj_rows in by_cuj.items(): 142 lines.extend(extract_lines_for_cuj(prop, cuj, cuj_rows)) 143 return _write_table(lines) 144 145 return {prop: tabulate(prop) for prop in properties} 146 147 148def _display_summarized_metrics(summary_csv: Path, filter_cujs: bool): 149 cmd = ( 150 ( 151 f'grep -v "WARMUP\\|rebuild\\|revert\\|delete" {summary_csv}' 152 f" | column -t -s," 153 ) 154 if filter_cujs 155 else f"column -t -s, {summary_csv}" 156 ) 157 output = subprocess.check_output(cmd, shell=True, text=True) 158 logging.info( 159 textwrap.dedent( 160 f"""\ 161 %s 162 %s 163 """ 164 ), 165 cmd, 166 output, 167 ) 168 169 170def summarize( 171 metrics_csv: Path, 172 regex: str, 173 output_dir: Path, 174 agg: Aggregation = Aggregation.MEDIAN, 175 filter_cujs: bool = True, 176 plot_format: str = "svg", 177): 178 """ 179 writes `summary_data` value as a csv files under `output_dir` 180 if `filter_cujs` is False, then does not filter out WARMUP and rebuild cuj steps 181 """ 182 with open(metrics_csv, "rt") as input_file: 183 summary_data = summarize_helper(input_file, regex, agg) 184 for k, v in summary_data.items(): 185 summary_csv = output_dir.joinpath(f"{k}.{agg.name}.csv") 186 summary_csv.parent.mkdir(parents=True, exist_ok=True) 187 with open(summary_csv, mode="wt") as f: 188 f.write(v) 189 _display_summarized_metrics(summary_csv, filter_cujs) 190 plot_file = output_dir.joinpath(f"{k}.{agg.name}.{plot_format}") 191 plot_metrics.plot(v, plot_file, filter_cujs) 192 193 194def main(): 195 p = argparse.ArgumentParser() 196 p.add_argument( 197 "-p", 198 "--properties", 199 default="^time$", 200 nargs="?", 201 help="regex to select properties", 202 ) 203 p.add_argument( 204 "metrics", 205 nargs="?", 206 default=util.get_default_log_dir().joinpath(util.METRICS_TABLE), 207 help="metrics.csv file to parse", 208 ) 209 p.add_argument( 210 "--statistic", 211 nargs="?", 212 type=lambda arg: Aggregation[arg], 213 default=Aggregation.MEDIAN, 214 help=f"Defaults to {Aggregation.MEDIAN.name}. " 215 f"Choose from {[a.name for a in Aggregation]}", 216 ) 217 p.add_argument( 218 "--filter", 219 default=True, 220 action=argparse.BooleanOptionalAction, 221 help="Filter out 'rebuild-' and 'WARMUP' builds?", 222 ) 223 p.add_argument( 224 "--format", 225 nargs="?", 226 default="svg", 227 help="graph output format, e.g. png, svg etc" 228 ) 229 options = p.parse_args() 230 metrics_csv = Path(options.metrics) 231 aggregation: Aggregation = options.statistic 232 if metrics_csv.exists() and metrics_csv.is_dir(): 233 metrics_csv = metrics_csv.joinpath(util.METRICS_TABLE) 234 if not metrics_csv.exists(): 235 raise RuntimeError(f"{metrics_csv} does not exit") 236 summarize( 237 metrics_csv=metrics_csv, 238 regex=options.properties, 239 agg=aggregation, 240 filter_cujs=options.filter, 241 output_dir=metrics_csv.parent.joinpath("perf"), 242 plot_format=options.format, 243 ) 244 245 246if __name__ == "__main__": 247 logging.root.setLevel(logging.INFO) 248 main() 249