xref: /aosp_15_r20/build/bazel/scripts/incremental_build/pretty.py (revision 7594170e27e0732bc44b93d1440d87a54b6ffe7c)
1# Copyright (C) 2022 The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#      http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14import argparse
15import csv
16import datetime
17import enum
18import logging
19import re
20import statistics
21import subprocess
22import textwrap
23from pathlib import Path
24
25from typing import Iterable, NewType, TextIO, TypeVar
26
27import plot_metrics
28import util
29
30Row = NewType("Row", dict[str, str])
31
32
33# modify the row in-place
34def _normalize_rebuild(row: Row):
35    row["description"] = re.sub(
36        r"^(rebuild)-[\d+](.*)$", "\\1\\2", row.get("description")
37    )
38
39def _get_tagged_build_type(row: Row) -> str:
40    build_type = row.get("build_type")
41    tag = row.get("tag")
42    return build_type if not tag else f"{build_type}:{tag}"
43
44def _build_types(rows: list[Row]) -> list[str]:
45    return list(dict.fromkeys(_get_tagged_build_type(row) for row in rows).keys())
46
47
48def _write_table(lines: list[list[str]]) -> str:
49    def join_cells(line: list[str]) -> str:
50        return ",".join(str(cell) for cell in line)
51
52    return "\n".join(join_cells(line) for line in lines) + "\n"
53
54
55class Aggregation(enum.Enum):
56    # naked function as value assignment doesn't seem to work,
57    # hence wrapping in a singleton tuple
58    AVG = (statistics.mean,)
59    MAX = (max,)
60    MEDIAN = (statistics.median,)
61    MIN = (min,)
62    STDEV = (statistics.stdev,)
63
64    N = TypeVar("N", int, float)
65
66    def fn(self, xs: Iterable[N]) -> N:
67        return self.value[0](xs)
68
69
70def _aggregate(prop: str, rows: list[Row], agg: Aggregation) -> str:
71    """
72    compute the requested aggregation
73    :return formatted values
74    """
75    if not rows:
76        return ""
77    vals = [x.get(prop) for x in rows]
78    vals = [x for x in vals if bool(x)]
79    if len(vals) == 0:
80        return ""
81
82    isnum = any(x.isnumeric() for x in vals)
83    if isnum:
84        vals = [int(x) for x in vals]
85        cell = f"{(agg.fn(vals)):.0f}"
86    else:
87        vals = [util.period_to_seconds(x) for x in vals]
88        cell = util.hhmmss(datetime.timedelta(seconds=agg.fn(vals)))
89
90    if len(vals) > 1:
91        cell = f"{cell}[N={len(vals)}]"
92    return cell
93
94
95def acceptable(row: Row) -> bool:
96    failure = row.get("build_result") == "FAILED"
97    if failure:
98        logging.error(f"Skipping {row.get('description')}/{row.get('build_type')}")
99    return not failure
100
101
102def summarize_helper(metrics: TextIO, regex: str, agg: Aggregation) -> dict[str, str]:
103    """
104    Args:
105      metrics: csv detailed input, each row corresponding to a build
106      regex: regex matching properties to be summarized
107      agg: aggregation to use
108    """
109    reader: csv.DictReader = csv.DictReader(metrics)
110
111    # get all matching properties
112    p = re.compile(regex)
113    properties = [f for f in reader.fieldnames if p.search(f)]
114    if len(properties) == 0:
115        logging.error("no matching properties found")
116        return {}
117
118    all_rows: list[Row] = [row for row in reader if acceptable(row)]
119    for row in all_rows:
120        _normalize_rebuild(row)
121    build_types: list[str] = _build_types(all_rows)
122    by_cuj: dict[str, list[Row]] = util.groupby(
123        all_rows, lambda l: l.get("description")
124    )
125
126    def extract_lines_for_cuj(prop, cuj, cuj_rows) -> list[list[str]]:
127        by_targets = util.groupby(cuj_rows, lambda l: l.get("targets"))
128        lines = []
129        for targets, target_rows in by_targets.items():
130            by_build_type = util.groupby(target_rows, _get_tagged_build_type)
131            vals = [
132                _aggregate(prop, by_build_type.get(build_type), agg)
133                for build_type in build_types
134            ]
135            lines.append([cuj, targets, *vals])
136        return lines
137
138    def tabulate(prop) -> str:
139        headers = ["cuj", "targets"] + build_types
140        lines: list[list[str]] = [headers]
141        for cuj, cuj_rows in by_cuj.items():
142            lines.extend(extract_lines_for_cuj(prop, cuj, cuj_rows))
143        return _write_table(lines)
144
145    return {prop: tabulate(prop) for prop in properties}
146
147
148def _display_summarized_metrics(summary_csv: Path, filter_cujs: bool):
149    cmd = (
150        (
151            f'grep -v "WARMUP\\|rebuild\\|revert\\|delete" {summary_csv}'
152            f" | column -t -s,"
153        )
154        if filter_cujs
155        else f"column -t -s, {summary_csv}"
156    )
157    output = subprocess.check_output(cmd, shell=True, text=True)
158    logging.info(
159        textwrap.dedent(
160            f"""\
161            %s
162            %s
163            """
164        ),
165        cmd,
166        output,
167    )
168
169
170def summarize(
171    metrics_csv: Path,
172    regex: str,
173    output_dir: Path,
174    agg: Aggregation = Aggregation.MEDIAN,
175    filter_cujs: bool = True,
176    plot_format: str = "svg",
177):
178    """
179    writes `summary_data` value as a csv files under `output_dir`
180    if `filter_cujs` is False, then does not filter out WARMUP and rebuild cuj steps
181    """
182    with open(metrics_csv, "rt") as input_file:
183        summary_data = summarize_helper(input_file, regex, agg)
184    for k, v in summary_data.items():
185        summary_csv = output_dir.joinpath(f"{k}.{agg.name}.csv")
186        summary_csv.parent.mkdir(parents=True, exist_ok=True)
187        with open(summary_csv, mode="wt") as f:
188            f.write(v)
189        _display_summarized_metrics(summary_csv, filter_cujs)
190        plot_file = output_dir.joinpath(f"{k}.{agg.name}.{plot_format}")
191        plot_metrics.plot(v, plot_file, filter_cujs)
192
193
194def main():
195    p = argparse.ArgumentParser()
196    p.add_argument(
197        "-p",
198        "--properties",
199        default="^time$",
200        nargs="?",
201        help="regex to select properties",
202    )
203    p.add_argument(
204        "metrics",
205        nargs="?",
206        default=util.get_default_log_dir().joinpath(util.METRICS_TABLE),
207        help="metrics.csv file to parse",
208    )
209    p.add_argument(
210        "--statistic",
211        nargs="?",
212        type=lambda arg: Aggregation[arg],
213        default=Aggregation.MEDIAN,
214        help=f"Defaults to {Aggregation.MEDIAN.name}. "
215        f"Choose from {[a.name for a in Aggregation]}",
216    )
217    p.add_argument(
218        "--filter",
219        default=True,
220        action=argparse.BooleanOptionalAction,
221        help="Filter out 'rebuild-' and 'WARMUP' builds?",
222    )
223    p.add_argument(
224        "--format",
225        nargs="?",
226        default="svg",
227        help="graph output format, e.g. png, svg etc"
228    )
229    options = p.parse_args()
230    metrics_csv = Path(options.metrics)
231    aggregation: Aggregation = options.statistic
232    if metrics_csv.exists() and metrics_csv.is_dir():
233        metrics_csv = metrics_csv.joinpath(util.METRICS_TABLE)
234    if not metrics_csv.exists():
235        raise RuntimeError(f"{metrics_csv} does not exit")
236    summarize(
237        metrics_csv=metrics_csv,
238        regex=options.properties,
239        agg=aggregation,
240        filter_cujs=options.filter,
241        output_dir=metrics_csv.parent.joinpath("perf"),
242        plot_format=options.format,
243    )
244
245
246if __name__ == "__main__":
247    logging.root.setLevel(logging.INFO)
248    main()
249