xref: /aosp_15_r20/external/pytorch/benchmarks/dynamo/runner.py (revision da0073e96a02ea20f0ac840b70461e3646d07c45)
1#!/usr/bin/env python3
2
3"""
4A wrapper over the benchmark infrastructure to generate commonly used commands,
5parse results and generate csv/graphs.
6
7The script works on manually written TABLE (see below). We can add more commands
8in the future.
9
10One example usage is
11-> python benchmarks/runner.py --suites=torchbench --inference
12This command will generate the commands for the default compilers (see DEFAULTS
13below) for inference, run them and visualize the logs.
14
15If you want to just print the commands, you could use the following command
16-> python benchmarks/runner.py --print-run-commands --suites=torchbench --inference
17
18Similarly, if you want to just visualize the already finished logs
19-> python benchmarks/runner.py --visualize-logs --suites=torchbench --inference
20
21If you want to test float16
22-> python benchmarks/runner.py --suites=torchbench --inference --dtypes=float16
23
24"""
25
26import argparse
27import dataclasses
28import functools
29import glob
30import importlib
31import io
32import itertools
33import logging
34import os
35import re
36import shutil
37import subprocess
38import sys
39import tempfile
40from collections import defaultdict
41from datetime import datetime, timedelta, timezone
42from os.path import abspath, exists
43from random import randint
44
45import matplotlib.pyplot as plt
46import numpy as np
47import pandas as pd
48from matplotlib import rcParams
49from scipy.stats import gmean
50from tabulate import tabulate
51
52import torch
53import torch._dynamo
54
55
56rcParams.update({"figure.autolayout": True})
57plt.rc("axes", axisbelow=True)
58
59DEFAULT_OUTPUT_DIR = "benchmark_logs"
60
61
62log = logging.getLogger(__name__)
63
64TABLE = {
65    "training": {
66        "ts_nnc": "--training --speedup-ts ",
67        "ts_nvfuser": "--training --nvfuser --speedup-dynamo-ts ",
68        "eager": "--training --backend=eager ",
69        "aot_eager": "--training --backend=aot_eager ",
70        "cudagraphs": "--training --backend=cudagraphs ",
71        "aot_nvfuser": "--training --nvfuser --backend=aot_ts_nvfuser ",
72        "nvprims_nvfuser": "--training --backend=nvprims_nvfuser ",
73        "inductor": "--training --inductor ",
74        "inductor_no_cudagraphs": "--training --inductor --disable-cudagraphs ",
75        "inductor_max_autotune": "--training --inductor --inductor-compile-mode max-autotune ",
76        "inductor_max_autotune_no_cudagraphs": (
77            "--training --inductor --inductor-compile-mode max-autotune-no-cudagraphs --disable-cudagraphs "
78        ),
79    },
80    "inference": {
81        "aot_eager": "--inference --backend=aot_eager ",
82        "eager": "--inference --backend=eager ",
83        "ts_nnc": "--inference --speedup-ts ",
84        "ts_nvfuser": "--inference -n100 --speedup-ts --nvfuser ",
85        "trt": "--inference -n100 --speedup-trt ",
86        "ts_nvfuser_cudagraphs": "--inference --backend=cudagraphs_ts ",
87        "inductor": "--inference -n50 --inductor ",
88        "inductor_no_cudagraphs": "--inference -n50 --inductor --disable-cudagraphs ",
89        "inductor_max_autotune": "--inference -n50 --inductor --inductor-compile-mode max-autotune ",
90        "inductor_max_autotune_no_cudagraphs": (
91            "--inference -n50 --inductor --inductor-compile-mode max-autotune-no-cudagraphs --disable-cudagraphs "
92        ),
93        "torchscript-onnx": "--inference -n5 --torchscript-onnx",
94        "dynamo-onnx": "--inference -n5 --dynamo-onnx",
95    },
96}
97
98INFERENCE_COMPILERS = tuple(TABLE["inference"].keys())
99TRAINING_COMPILERS = tuple(TABLE["training"].keys())
100
101DEFAULTS = {
102    "training": [
103        "eager",
104        "aot_eager",
105        "inductor",
106        "inductor_no_cudagraphs",
107    ],
108    "inference": [
109        "eager",
110        "aot_eager",
111        "inductor",
112        "inductor_no_cudagraphs",
113    ],
114    "flag_compilers": {
115        "training": ["inductor", "inductor_no_cudagraphs"],
116        "inference": ["inductor", "inductor_no_cudagraphs"],
117    },
118    "dtypes": [
119        "float32",
120    ],
121    "suites": ["torchbench", "huggingface", "timm_models"],
122    "devices": [
123        "cuda",
124    ],
125    "quick": {
126        "torchbench": '-k "resnet..$"',
127        "huggingface": "-k Albert",
128        "timm_models": ' -k "^resnet" -k "^inception"',
129    },
130}
131
132
133DASHBOARD_DEFAULTS = {
134    "dashboard_image_uploader": "/fsx/users/anijain/bin/imgur.sh",
135    "dashboard_archive_path": "/data/home/anijain/cluster/cron_logs",
136    "dashboard_gh_cli_path": "/data/home/anijain/miniconda/bin/gh",
137}
138
139
140def flag_speedup(x):
141    return x < 0.95
142
143
144def flag_compilation_latency(x):
145    return x > 120
146
147
148def flag_compression_ratio(x):
149    return x < 0.9
150
151
152def flag_accuracy(x):
153    return "pass" not in x
154
155
156FLAG_FNS = {
157    "speedup": flag_speedup,
158    "compilation_latency": flag_compilation_latency,
159    "compression_ratio": flag_compression_ratio,
160    "accuracy": flag_accuracy,
161}
162
163
164def percentage(part, whole, decimals=2):
165    if whole == 0:
166        return 0
167    return round(100 * float(part) / float(whole), decimals)
168
169
170def parse_args():
171    parser = argparse.ArgumentParser()
172    parser.add_argument("--devices", action="append", help="cpu or cuda")
173    parser.add_argument("--dtypes", action="append", help="float16/float32/amp")
174    parser.add_argument("--suites", action="append", help="huggingface/torchbench/timm")
175    parser.add_argument(
176        "--compilers",
177        action="append",
178        help=f"For --inference, options are {INFERENCE_COMPILERS}. For --training, options are {TRAINING_COMPILERS}",
179    )
180
181    parser.add_argument(
182        "--flag-compilers",
183        action="append",
184        help="List of compilers to flag issues. Same format as --compilers.",
185    )
186    parser.add_argument(
187        "--quick", action="store_true", help="Just runs one model. Helps in debugging"
188    )
189    parser.add_argument(
190        "--output-dir",
191        help="Choose the output directory to save the logs",
192        default=DEFAULT_OUTPUT_DIR,
193    )
194    parser.add_argument(
195        "--keep-output-dir",
196        action="store_true",
197        help="Do not cleanup the output directory before running",
198    )
199
200    # Choose either generation of commands, pretty parsing or e2e runs
201    group = parser.add_mutually_exclusive_group(required=False)
202    group.add_argument(
203        "--print-run-commands",
204        "--print_run_commands",
205        action="store_true",
206        help="Generate commands and saves them to run.sh",
207    )
208    group.add_argument(
209        "--visualize-logs",
210        "--visualize_logs",
211        action="store_true",
212        help="Pretty print the log files and draw graphs",
213    )
214    group.add_argument(
215        "--run",
216        action="store_true",
217        default=True,
218        help="Generate commands, run and parses the files",
219    )
220
221    parser.add_argument(
222        "--log-operator-inputs",
223        action="store_true",
224        default=False,
225        help="Log operator inputs",
226    )
227    parser.add_argument(
228        "--include-slowdowns",
229        "--include_slowdowns",
230        action="store_true",
231        default=False,
232        help="Include slowdowns in geomean performance speedup report. By default, slowdowns are ignored. "
233        "This is because one can always use eager if compile is not speeding things up",
234    )
235
236    parser.add_argument(
237        "--extra-args", default="", help="Append commandline with these args"
238    )
239
240    # Choose either inference or training
241    group_mode = parser.add_mutually_exclusive_group(required=True)
242    group_mode.add_argument(
243        "--inference", action="store_true", help="Only run inference related tasks"
244    )
245    group_mode.add_argument(
246        "--training", action="store_true", help="Only run training related tasks"
247    )
248
249    parser.add_argument(
250        "--base-sha",
251        help="commit id for the tested pytorch",
252    )
253    parser.add_argument(
254        "--total-partitions",
255        type=int,
256        help="Total number of partitions, to be passed to the actual benchmark script",
257    )
258    parser.add_argument(
259        "--partition-id",
260        type=int,
261        help="ID of partition, to be passed to the actual benchmark script",
262    )
263
264    parser.add_argument(
265        "--update-dashboard",
266        action="store_true",
267        default=False,
268        help="Updates to dashboard",
269    )
270    parser.add_argument(
271        "--no-graphs",
272        action="store_true",
273        default=False,
274        help="Do not genenerate and upload metric graphs",
275    )
276    parser.add_argument(
277        "--no-update-archive",
278        action="store_true",
279        default=False,
280        help="Do not update lookup.csv or the log archive",
281    )
282    parser.add_argument(
283        "--no-gh-comment",
284        action="store_true",
285        default=False,
286        help="Do not write a comment to github",
287    )
288    parser.add_argument(
289        "--no-detect-regressions",
290        action="store_true",
291        default=False,
292        help="Do not compare to previous runs for regressions or metric graphs.",
293    )
294    parser.add_argument(
295        "--update-dashboard-test",
296        action="store_true",
297        default=False,
298        help="does all of --no-graphs, --no-update-archive, and --no-gh-comment",
299    )
300    parser.add_argument(
301        "--dashboard-image-uploader",
302        default=DASHBOARD_DEFAULTS["dashboard_image_uploader"],
303        help="Image uploader command",
304    )
305    parser.add_argument(
306        "--dashboard-archive-path",
307        default=DASHBOARD_DEFAULTS["dashboard_archive_path"],
308        help="Archived directory path",
309    )
310    parser.add_argument(
311        "--archive-name",
312        help="Directory name under dashboard-archive-path to copy output-dir to. "
313        "If not provided, a generated name is used.",
314    )
315    parser.add_argument(
316        "--dashboard-gh-cli-path",
317        default=DASHBOARD_DEFAULTS["dashboard_gh_cli_path"],
318        help="Github CLI path",
319    )
320    parser.add_argument(
321        "--batch-size",
322        "--batch_size",
323        type=int,
324        default=None,
325        help="batch size for benchmarking",
326    )
327    parser.add_argument(
328        "--threads",
329        "-t",
330        type=int,
331        default=None,
332        help="number of threads to use for eager and inductor.",
333    )
334    launcher_group = parser.add_argument_group("CPU Launcher Parameters")
335    launcher_group.add_argument(
336        "--enable-cpu-launcher",
337        "--enable_cpu_launcher",
338        action="store_true",
339        default=False,
340        help="Use torch.backends.xeon.run_cpu to get the peak performance on Intel(R) Xeon(R) Scalable Processors.",
341    )
342    launcher_group.add_argument(
343        "--cpu-launcher-args",
344        "--cpu_launcher_args",
345        type=str,
346        default="",
347        help="Provide the args of torch.backends.xeon.run_cpu. "
348        "To look up what optional arguments this launcher offers: python -m torch.backends.xeon.run_cpu --help",
349    )
350    parser.add_argument(
351        "--no-cold-start-latency",
352        action="store_true",
353        default=False,
354        help="Do not include --cold-start-latency on inductor benchmarks",
355    )
356    parser.add_argument(
357        "--inductor-compile-mode",
358        default=None,
359        help="torch.compile mode argument for inductor runs.",
360    )
361    args = parser.parse_args()
362    return args
363
364
365def get_mode(args):
366    if args.inference:
367        return "inference"
368    return "training"
369
370
371def get_skip_tests(suite, device, is_training: bool):
372    """
373    Generate -x seperated string to skip the unusual setup training tests
374    """
375    skip_tests = set()
376    original_dir = abspath(os.getcwd())
377    module = importlib.import_module(suite)
378    os.chdir(original_dir)
379
380    if suite == "torchbench":
381        skip_tests.update(module.TorchBenchmarkRunner().skip_models)
382        if is_training:
383            skip_tests.update(
384                module.TorchBenchmarkRunner().skip_not_suitable_for_training_models
385            )
386        if device == "cpu":
387            skip_tests.update(module.TorchBenchmarkRunner().skip_models_for_cpu)
388        elif device == "cuda":
389            skip_tests.update(module.TorchBenchmarkRunner().skip_models_for_cuda)
390
391    skip_tests = (f"-x {name}" for name in skip_tests)
392    skip_str = " ".join(skip_tests)
393    return skip_str
394
395
396def generate_csv_name(args, dtype, suite, device, compiler, testing):
397    mode = get_mode(args)
398    return f"{compiler}_{suite}_{dtype}_{mode}_{device}_{testing}.csv"
399
400
401def generate_commands(args, dtypes, suites, devices, compilers, output_dir):
402    mode = get_mode(args)
403    suites_str = "_".join(suites)
404    devices_str = "_".join(devices)
405    dtypes_str = "_".join(dtypes)
406    compilers_str = "_".join(compilers)
407    generated_file = (
408        f"run_{mode}_{devices_str}_{dtypes_str}_{suites_str}_{compilers_str}.sh"
409    )
410    with open(generated_file, "w") as runfile:
411        lines = []
412
413        lines.append("#!/bin/bash")
414        lines.append("set -x")
415        lines.append("# Setup the output directory")
416        if not args.keep_output_dir:
417            lines.append(f"rm -rf {output_dir}")
418        # It's ok if the output directory already exists
419        lines.append(f"mkdir -p {output_dir}")
420        lines.append("")
421
422        for testing in ["performance", "accuracy"]:
423            for iter in itertools.product(suites, devices, dtypes):
424                suite, device, dtype = iter
425                lines.append(
426                    f"# Commands for {suite} for device={device}, dtype={dtype} for {mode} and for {testing} testing"
427                )
428                info = TABLE[mode]
429                for compiler in compilers:
430                    base_cmd = info[compiler]
431                    output_filename = f"{output_dir}/{generate_csv_name(args, dtype, suite, device, compiler, testing)}"
432                    launcher_cmd = "python"
433                    if args.enable_cpu_launcher:
434                        launcher_cmd = f"python -m torch.backends.xeon.run_cpu {args.cpu_launcher_args}"
435                    cmd = f"{launcher_cmd} benchmarks/dynamo/{suite}.py --{testing} --{dtype} -d{device} --output={output_filename}"
436                    cmd = f"{cmd} {base_cmd} {args.extra_args} --dashboard"
437                    skip_tests_str = get_skip_tests(suite, device, args.training)
438                    cmd = f"{cmd} {skip_tests_str}"
439
440                    if args.log_operator_inputs:
441                        cmd = f"{cmd} --log-operator-inputs"
442
443                    if args.quick:
444                        filters = DEFAULTS["quick"][suite]
445                        cmd = f"{cmd} {filters}"
446
447                    if (
448                        compiler
449                        in (
450                            "inductor",
451                            "inductor_no_cudagraphs",
452                        )
453                        and not args.no_cold_start_latency
454                    ):
455                        cmd = f"{cmd} --cold-start-latency"
456
457                    if args.batch_size is not None:
458                        cmd = f"{cmd} --batch-size {args.batch_size}"
459
460                    if args.threads is not None:
461                        cmd = f"{cmd} --threads {args.threads}"
462
463                    if args.total_partitions is not None:
464                        cmd = f"{cmd} --total-partitions {args.total_partitions}"
465
466                    if args.partition_id is not None:
467                        cmd = f"{cmd} --partition-id {args.partition_id}"
468
469                    if args.inductor_compile_mode is not None:
470                        cmd = f"{cmd} --inductor-compile-mode {args.inductor_compile_mode}"
471                    lines.append(cmd)
472                lines.append("")
473        runfile.writelines([line + "\n" for line in lines])
474    return generated_file
475
476
477def generate_dropdown_comment(title, body):
478    str_io = io.StringIO()
479    str_io.write(f"{title}\n")
480    str_io.write("<details>\n")
481    str_io.write("<summary>see more</summary>\n")
482    str_io.write(f"{body}")
483    str_io.write("\n")
484    str_io.write("</details>\n\n")
485    return str_io.getvalue()
486
487
488def build_summary(args):
489    out_io = io.StringIO()
490
491    def print_commit_hash(path, name):
492        if args.base_sha is not None:
493            if name == "pytorch":
494                out_io.write(f"{name} commit: {args.base_sha}\n")
495        elif exists(path):
496            import git
497
498            repo = git.Repo(path, search_parent_directories=True)
499            sha = repo.head.object.hexsha
500            date = repo.head.object.committed_datetime
501            out_io.write(f"{name} commit: {sha}\n")
502            out_io.write(f"{name} commit date: {date}\n")
503        else:
504            out_io.write(f"{name} Absent\n")
505
506    def env_var(name):
507        if name in os.environ:
508            out_io.write(f"{name} = {os.environ[name]}\n")
509        else:
510            out_io.write(f"{name} = {None}\n")
511
512    out_io.write("\n")
513    out_io.write("### Run name ###\n")
514    out_io.write(get_archive_name(args, args.dtypes[0]))
515    out_io.write("\n")
516
517    out_io.write("\n")
518    out_io.write("### Commit hashes ###\n")
519    print_commit_hash("../pytorch", "pytorch")
520    print_commit_hash("../torchbenchmark", "torchbench")
521
522    out_io.write("\n")
523    out_io.write("### TorchDynamo config flags ###\n")
524    for key in dir(torch._dynamo.config):
525        val = getattr(torch._dynamo.config, key)
526        if not key.startswith("__") and isinstance(val, bool):
527            out_io.write(f"torch._dynamo.config.{key} = {val}\n")
528
529    out_io.write("\n")
530    out_io.write("### Torch version ###\n")
531    out_io.write(f"torch: {torch.__version__}\n")
532
533    out_io.write("\n")
534    out_io.write("### Environment variables ###\n")
535    env_var("TORCH_CUDA_ARCH_LIST")
536    env_var("CUDA_HOME")
537    env_var("USE_LLVM")
538
539    if "cuda" in args.devices:
540        out_io.write("\n")
541        out_io.write("### GPU details ###\n")
542        out_io.write(f"CUDNN VERSION: {torch.backends.cudnn.version()}\n")
543        out_io.write(f"Number CUDA Devices: {torch.cuda.device_count()}\n")
544        out_io.write(f"Device Name: {torch.cuda.get_device_name(0)}\n")
545        out_io.write(
546            f"Device Memory [GB]: {torch.cuda.get_device_properties(0).total_memory/1e9}\n"
547        )
548
549    title = "## Build Summary"
550    comment = generate_dropdown_comment(title, out_io.getvalue())
551    with open(f"{output_dir}/gh_build_summary.txt", "w") as gh_fh:
552        gh_fh.write(comment)
553
554
555@functools.lru_cache(None)
556def archive_data(archive_name):
557    if archive_name is not None:
558        prefix_match = re.search(r"\w+(?=_performance)", archive_name)
559        if prefix_match is not None:
560            prefix = prefix_match.group(0)
561        else:
562            prefix = ""
563        day_match = re.search(r"day_(\d+)_", archive_name)
564        if day_match is not None:
565            day = day_match.group(1)
566        else:
567            day = "000"
568    else:
569        now = datetime.now(tz=timezone(timedelta(hours=-8)))
570        day = now.strftime("%j")
571        prefix = now.strftime(f"day_{day}_%d_%m_%y")
572    return day, prefix
573
574
575@functools.lru_cache(None)
576def default_archive_name(dtype):
577    _, prefix = archive_data(None)
578    return f"{prefix}_performance_{dtype}_{randint(100, 999)}"
579
580
581def get_archive_name(args, dtype):
582    return (
583        default_archive_name(dtype) if args.archive_name is None else args.archive_name
584    )
585
586
587def archive(src_dir, dest_dir_prefix, archive_name, dtype):
588    if archive_name is None:
589        archive_name = default_archive_name(dtype)
590    # Copy the folder to archived location
591    dest = os.path.join(dest_dir_prefix, archive_name)
592    shutil.copytree(src_dir, dest, dirs_exist_ok=True)
593    print(f"copied contents of {src_dir} to {dest}")
594
595
596def get_metric_title(metric):
597    if metric == "speedup":
598        return "Performance speedup"
599    elif metric == "accuracy":
600        return "Accuracy"
601    elif metric == "compilation_latency":
602        return "Compilation latency (sec)"
603    elif metric == "compression_ratio":
604        return "Peak Memory Compression Ratio"
605    elif metric == "abs_latency":
606        return "Absolute latency (ms)"
607    raise RuntimeError("unknown metric")
608
609
610class Parser:
611    def __init__(
612        self, suites, devices, dtypes, compilers, flag_compilers, mode, output_dir
613    ):
614        self.suites = suites
615        self.devices = devices
616        self.dtypes = dtypes
617        self.compilers = compilers
618        self.flag_compilers = flag_compilers
619        self.output_dir = output_dir
620        self.mode = mode
621
622    def has_header(self, output_filename):
623        header_present = False
624        with open(output_filename) as f:
625            line = f.readline()
626            if "dev" in line:
627                header_present = True
628        return header_present
629
630
631class ParsePerformanceLogs(Parser):
632    def __init__(
633        self,
634        suites,
635        devices,
636        dtypes,
637        compilers,
638        flag_compilers,
639        mode,
640        output_dir,
641        include_slowdowns=False,
642    ):
643        super().__init__(
644            suites,
645            devices,
646            dtypes,
647            compilers,
648            flag_compilers,
649            mode,
650            output_dir,
651        )
652        self.parsed_frames = defaultdict(lambda: defaultdict(None))
653        self.untouched_parsed_frames = defaultdict(lambda: defaultdict(None))
654        self.metrics = [
655            "speedup",
656            "abs_latency",
657            "compilation_latency",
658            "compression_ratio",
659        ]
660        self.bottom_k = 50
661        self.parse()
662        self.include_slowdowns = include_slowdowns
663
664    def plot_graph(self, df, title):
665        labels = df.columns.values.tolist()
666        labels = labels[3:]
667        df.plot(
668            x="name",
669            y=labels,
670            kind="bar",
671            width=0.65,
672            title=title,
673            ylabel="Speedup over eager",
674            xlabel="",
675            grid=True,
676            figsize=(max(len(df.index) / 4, 5), 10),
677            edgecolor="black",
678        )
679        plt.tight_layout()
680        plt.savefig(f"{self.output_dir}/{title}.png")
681
682    def read_csv(self, output_filename):
683        if self.has_header(output_filename):
684            return pd.read_csv(output_filename)
685        else:
686            return pd.read_csv(
687                output_filename,
688                names=[
689                    "dev",
690                    "name",
691                    "batch_size",
692                    "speedup",
693                    "abs_latency",
694                    "compilation_latency",
695                    "compression_ratio",
696                ],
697                header=None,
698                engine="python",
699            )
700
701    def parse(self):
702        self.extract_df("accuracy", "accuracy")
703        for metric in self.metrics:
704            self.extract_df(metric, "performance")
705
706    def clean_batch_sizes(self, frames):
707        # Clean up batch sizes when its 0
708        if len(frames) == 1:
709            return frames
710        batch_sizes = frames[0]["batch_size"].to_list()
711        for frame in frames[1:]:
712            frame_batch_sizes = frame["batch_size"].to_list()
713            for idx, (batch_a, batch_b) in enumerate(
714                zip(batch_sizes, frame_batch_sizes)
715            ):
716                assert batch_a == batch_b or batch_a == 0 or batch_b == 0, print(
717                    f"a={batch_a}, b={batch_b}"
718                )
719                batch_sizes[idx] = max(batch_a, batch_b)
720        for frame in frames:
721            frame["batch_size"] = batch_sizes
722        return frames
723
724    def extract_df(self, metric, testing):
725        for iter in itertools.product(self.suites, self.devices, self.dtypes):
726            suite, device, dtype = iter
727            frames = []
728            for compiler in self.compilers:
729                output_filename = f"{self.output_dir}/{compiler}_{suite}_{dtype}_{self.mode}_{device}_{testing}.csv"
730                df = self.read_csv(output_filename)
731                if metric not in df:
732                    df.insert(len(df.columns), metric, np.nan)
733                df = df[["dev", "name", "batch_size", metric]]
734                df.rename(columns={metric: compiler}, inplace=True)
735                df["batch_size"] = df["batch_size"].astype(int)
736                frames.append(df)
737
738            # Merge the results
739            frames = self.clean_batch_sizes(frames)
740            if len(self.compilers) == 1:
741                df = frames[0]
742            else:
743                # Merge data frames
744                df = pd.merge(frames[0], frames[1], on=["dev", "name", "batch_size"])
745                for idx in range(2, len(frames)):
746                    df = pd.merge(df, frames[idx], on=["dev", "name", "batch_size"])
747
748            if testing == "performance":
749                for compiler in self.compilers:
750                    df[compiler] = pd.to_numeric(df[compiler], errors="coerce").fillna(
751                        0
752                    )
753
754            df_copy = df.copy()
755            df_copy = df_copy.sort_values(
756                by=list(reversed(self.compilers)), ascending=False
757            )
758            if "inductor" in self.compilers:
759                df_copy = df_copy.sort_values(by="inductor", ascending=False)
760            self.untouched_parsed_frames[suite][metric] = df_copy
761
762            if testing == "performance":
763                df_accuracy = self.parsed_frames[suite]["accuracy"]
764                perf_rows = []
765                for model_name in df["name"]:
766                    perf_row = df[df["name"] == model_name].copy()
767                    acc_row = df_accuracy[df_accuracy["name"] == model_name]
768                    for compiler in self.compilers:
769                        if not perf_row.empty:
770                            if acc_row.empty:
771                                perf_row[compiler] = 0.0
772                            elif acc_row[compiler].iloc[0] in (
773                                "model_fail_to_load",
774                                "eager_fail_to_run",
775                            ):
776                                perf_row = pd.DataFrame()
777                            elif acc_row[compiler].iloc[0] not in (
778                                "pass",
779                                "pass_due_to_skip",
780                            ):
781                                perf_row[compiler] = 0.0
782                    if not perf_row.empty:
783                        perf_rows.append(perf_row)
784                df = pd.concat(perf_rows)
785            df = df.sort_values(by=list(reversed(self.compilers)), ascending=False)
786
787            if "inductor" in self.compilers:
788                df = df.sort_values(by="inductor", ascending=False)
789            self.parsed_frames[suite][metric] = df
790
791    def get_passing_entries(self, compiler, df):
792        return df[compiler][df[compiler] > 0]
793
794    def comp_time(self, compiler, df):
795        df = self.get_passing_entries(compiler, df)
796        # df = df.sort_values(by=compiler, ascending=False)[compiler][: self.bottom_k]
797        if df.empty:
798            return "0.0"
799
800        return f"{df.mean():.2f}"
801
802    def geomean(self, compiler, df):
803        cleaned_df = self.get_passing_entries(compiler, df)
804        if not self.include_slowdowns:
805            cleaned_df = cleaned_df.clip(1)
806        if cleaned_df.empty:
807            return "0.0x"
808        return f"{gmean(cleaned_df):.2f}x"
809
810    def passrate(self, compiler, df):
811        total = len(df.index)
812        passing = df[df[compiler] > 0.0][compiler].count()
813        perc = int(percentage(passing, total, decimals=0))
814        return f"{perc}%, {passing}/{total}"
815
816    def memory(self, compiler, df):
817        df = self.get_passing_entries(compiler, df)
818        df = df.fillna(0)
819        df = df[df > 0]
820        if df.empty:
821            return "0.0x"
822        return f"{df.mean():.2f}x"
823
824    def exec_summary_df(self, fn, metric):
825        """
826        Generate a table with passrate and geomean perf
827        """
828        cols = {}
829        cols["Compiler"] = self.compilers
830        for suite in self.suites:
831            df = self.parsed_frames[suite][metric]
832            # speedups = [self.geomean(compiler, df) for compiler in self.compilers]
833            speedups = [fn(compiler, df) for compiler in self.compilers]
834            col = pd.Series(data=speedups, index=self.compilers)
835            cols[suite] = col
836        df = pd.DataFrame(cols)
837        df = df.fillna(0)
838        df.to_csv(os.path.join(self.output_dir, f"{fn.__name__}.csv"))
839        return df
840
841    def exec_summary_text(self, caption, fn, metric):
842        df = self.exec_summary_df(fn, metric)
843        tabform = tabulate(df, headers="keys", tablefmt="pretty", showindex="never")
844
845        str_io = io.StringIO()
846        str_io.write(f"{caption}")
847        str_io.write("~~~\n")
848        str_io.write(f"{tabform}\n")
849        str_io.write("~~~\n")
850        return str_io.getvalue()
851
852    def generate_executive_summary(self):
853        machine = "A100 GPUs"
854        if "cpu" in self.devices:
855            get_machine_cmd = "lscpu| grep 'Model name' | awk -F':' '{print $2}'"
856            machine = subprocess.getstatusoutput(get_machine_cmd)[1].strip()
857        description = (
858            "We evaluate different backends "
859            "across three benchmark suites - torchbench, huggingface and timm. We run "
860            "these experiments on "
861            + machine
862            + ". Each experiment runs one iteration of forward pass "
863            "and backward pass for training and forward pass only for inference. "
864            "For accuracy, we check the numerical correctness of forward pass outputs and gradients "
865            "by comparing with native pytorch. We measure speedup "
866            "by normalizing against the performance of native pytorch. We report mean "
867            "compilation latency numbers and peak memory footprint reduction ratio. \n\n"
868            "Caveats\n"
869            "1) Batch size has been reduced to workaround OOM errors. Work is in progress to "
870            "reduce peak memory footprint.\n"
871            "2) Experiments do not cover dynamic shapes.\n"
872            "3) Experimental setup does not have optimizer.\n\n"
873        )
874        comment = generate_dropdown_comment("", description)
875        str_io = io.StringIO()
876        str_io.write("\n")
877        str_io.write("## Executive Summary ##\n")
878        str_io.write(comment)
879
880        speedup_caption = "Geometric mean speedup \n"
881        speedup_summary = self.exec_summary_text(
882            speedup_caption, self.geomean, "speedup"
883        )
884
885        passrate_caption = "Passrate\n"
886        passrate_summary = self.exec_summary_text(
887            passrate_caption, self.passrate, "speedup"
888        )
889
890        comp_time_caption = "Mean compilation time (seconds)\n"
891        comp_time_summary = self.exec_summary_text(
892            comp_time_caption, self.comp_time, "compilation_latency"
893        )
894
895        peak_memory_caption = (
896            "Peak memory footprint compression ratio (higher is better)\n"
897        )
898        peak_memory_summary = self.exec_summary_text(
899            peak_memory_caption, self.memory, "compression_ratio"
900        )
901
902        str_io.write(
903            "To measure performance, compilation latency and memory footprint reduction, "
904            "we remove the models that fail accuracy checks.\n\n"
905        )
906        str_io.write(passrate_summary)
907        str_io.write(speedup_summary)
908        str_io.write(comp_time_summary)
909        str_io.write(peak_memory_summary)
910        self.executive_summary = str_io.getvalue()
911
912    def flag_bad_entries(self, suite, metric, flag_fn):
913        df = self.untouched_parsed_frames[suite][metric]
914        df = df.drop("dev", axis=1)
915        df = df.rename(columns={"batch_size": "bs"})
916        # apply flag_fn elementwise to flag_compilers columns,
917        # if one element fails, the entire row is flagged
918        flag = np.logical_or.reduce(
919            df[self.flag_compilers].applymap(flag_fn),
920            axis=1,
921        )
922        df = df[flag]
923        df = df.assign(suite=suite)
924        return df.reindex(columns=["suite", "name"] + self.flag_compilers)
925
926    def generate_warnings(self):
927        title = "## Warnings ##"
928        body = (
929            "We flag models where:\n\n"
930            " - accuracy fails\n"
931            " - speedup < 0.95x (NOTE: 0.0 speedup typically signifies a failure in the performance test)\n"
932            " - compilation latency > 120 sec.\n"
933            " - compression ratio < 0.9\n"
934            "\n"
935        )
936        for metric in [
937            "accuracy",
938            "speedup",
939            "compilation_latency",
940            "compression_ratio",
941        ]:
942            dfs = []
943            for suite in self.suites:
944                dfs.append(self.flag_bad_entries(suite, metric, FLAG_FNS[metric]))
945            df = pd.concat(dfs, axis=0)
946            if df.empty:
947                continue
948            tabform = tabulate(df, headers="keys", tablefmt="pretty", showindex="never")
949            str_io = io.StringIO()
950            str_io.write("\n")
951            str_io.write(get_metric_title(metric) + " warnings\n")
952            str_io.write("~~~\n")
953            str_io.write(f"{tabform}\n")
954            str_io.write("~~~\n")
955            body += str_io.getvalue()
956
957        comment = generate_dropdown_comment(title, body)
958        return comment
959
960    def prepare_message(self, suite):
961        title = f"## {suite} suite with {self.dtypes[0]} precision ##"
962        body = ""
963        for metric in [
964            "speedup",
965            "accuracy",
966            "compilation_latency",
967            "compression_ratio",
968            "abs_latency",
969        ]:
970            df = self.untouched_parsed_frames[suite][metric]
971            df = df.drop("dev", axis=1)
972            df = df.rename(columns={"batch_size": "bs"})
973            tabform = tabulate(df, headers="keys", tablefmt="pretty", showindex="never")
974            str_io = io.StringIO()
975            str_io.write("\n")
976            str_io.write(get_metric_title(metric) + "\n")
977            str_io.write("~~~\n")
978            str_io.write(f"{tabform}\n")
979            str_io.write("~~~\n")
980            body += str_io.getvalue()
981
982        comment = generate_dropdown_comment(title, body)
983        return comment
984
985    def gen_summary_files(self):
986        self.generate_executive_summary()
987        for suite in self.suites:
988            self.plot_graph(
989                self.untouched_parsed_frames[suite]["speedup"],
990                f"{suite}_{self.dtypes[0]}",
991            )
992
993        with open(f"{self.output_dir}/gh_title.txt", "w") as gh_fh:
994            str_io = io.StringIO()
995            str_io.write("\n")
996            str_io.write(f"# Performance Dashboard for {self.dtypes[0]} precision ##\n")
997            str_io.write("\n")
998            gh_fh.write(str_io.getvalue())
999
1000        with open(f"{self.output_dir}/gh_executive_summary.txt", "w") as gh_fh:
1001            gh_fh.write(self.executive_summary)
1002
1003        with open(f"{self.output_dir}/gh_warnings.txt", "w") as gh_fh:
1004            warnings_body = self.generate_warnings()
1005            gh_fh.write(warnings_body)
1006
1007        str_io = io.StringIO()
1008        for suite in self.suites:
1009            str_io.write(self.prepare_message(suite))
1010        str_io.write("\n")
1011        with open(f"{self.output_dir}/gh_{self.mode}.txt", "w") as gh_fh:
1012            gh_fh.write(str_io.getvalue())
1013
1014
1015def parse_logs(args, dtypes, suites, devices, compilers, flag_compilers, output_dir):
1016    mode = get_mode(args)
1017    build_summary(args)
1018    include_slowdowns = args.include_slowdowns
1019
1020    parser_class = ParsePerformanceLogs
1021    parser = parser_class(
1022        suites,
1023        devices,
1024        dtypes,
1025        compilers,
1026        flag_compilers,
1027        mode,
1028        output_dir,
1029        include_slowdowns,
1030    )
1031    parser.gen_summary_files()
1032    return
1033
1034
1035@dataclasses.dataclass
1036class LogInfo:
1037    # Day of the year this log was generated
1038    day: str
1039
1040    # Directory path where all logs are present
1041    dir_path: str
1042
1043
1044def get_date(log_info):
1045    return datetime.strptime(f"{log_info.day}", "%j").strftime("%m-%d")
1046
1047
1048def find_last_2_with_filenames(lookup_file, dashboard_archive_path, dtype, filenames):
1049    df = pd.read_csv(lookup_file, names=("day", "mode", "prec", "path"))
1050    df = df[df["mode"] == "performance"]
1051    df = df[df["prec"] == dtype]
1052    df = df[::-1]
1053    last2 = []
1054    for path in df["path"]:
1055        output_dir = os.path.join(dashboard_archive_path, path)
1056        fullpaths = [
1057            os.path.join(dashboard_archive_path, path, name) for name in filenames
1058        ]
1059        if all(os.path.exists(fullpath) for fullpath in fullpaths):
1060            last2.append(output_dir)
1061        if len(last2) >= 2:
1062            return last2
1063    return None
1064
1065
1066class SummaryStatDiffer:
1067    def __init__(self, args):
1068        self.args = args
1069        self.lookup_file = os.path.join(self.args.dashboard_archive_path, "lookup.csv")
1070        assert os.path.exists(self.lookup_file)
1071
1072    def generate_diff(self, last2, filename, caption):
1073        df_cur, df_prev = (pd.read_csv(os.path.join(path, filename)) for path in last2)
1074        df_merge = df_cur.merge(df_prev, on="Compiler", suffixes=("_cur", "_prev"))
1075        data = {col: [] for col in ("compiler", "suite", "prev_value", "cur_value")}
1076        for _, row in df_merge.iterrows():
1077            if row["Compiler"] in self.args.flag_compilers:
1078                for suite in self.args.suites:
1079                    if suite + "_prev" not in row or suite + "_cur" not in row:
1080                        continue
1081                    data["compiler"].append(row["Compiler"])
1082                    data["suite"].append(suite)
1083                    data["prev_value"].append(row[suite + "_prev"])
1084                    data["cur_value"].append(row[suite + "_cur"])
1085
1086        df = pd.DataFrame(data)
1087        tabform = tabulate(df, headers="keys", tablefmt="pretty", showindex="never")
1088        str_io = io.StringIO()
1089        str_io.write("\n")
1090        str_io.write(f"{caption}\n")
1091        str_io.write("~~~\n")
1092        str_io.write(f"{tabform}\n")
1093        str_io.write("~~~\n")
1094        return str_io.getvalue()
1095
1096    def generate_comment(self):
1097        title = "## Summary Statistics Diff ##\n"
1098        body = (
1099            "For each relevant compiler, we compare the summary statistics "
1100            "for the most 2 recent reports that actually run the compiler.\n\n"
1101        )
1102        dtype = self.args.dtypes[0]
1103        last2 = find_last_2_with_filenames(
1104            self.lookup_file,
1105            self.args.dashboard_archive_path,
1106            dtype,
1107            ["geomean.csv", "passrate.csv"],
1108        )
1109
1110        if last2 is None:
1111            body += "Could not find most 2 recent reports.\n\n"
1112        else:
1113            for state, path in zip(("Current", "Previous"), last2):
1114                body += f"{state} report name: {path}\n\n"
1115            body += self.generate_diff(last2, "passrate.csv", "Passrate diff")
1116            body += self.generate_diff(
1117                last2, "geomean.csv", "Geometric mean speedup diff"
1118            )
1119
1120        comment = generate_dropdown_comment(title, body)
1121
1122        with open(f"{self.args.output_dir}/gh_summary_diff.txt", "w") as gh_fh:
1123            gh_fh.write(comment)
1124
1125
1126class RegressionDetector:
1127    """
1128    Compares the most recent 2 benchmarks to find previously unflagged models
1129    that are now flagged.
1130    """
1131
1132    def __init__(self, args):
1133        self.args = args
1134        self.lookup_file = os.path.join(self.args.dashboard_archive_path, "lookup.csv")
1135        assert os.path.exists(self.lookup_file)
1136
1137    def generate_comment(self):
1138        title = "## Recent Regressions ##\n"
1139        body = (
1140            "For each relevant compiler, we compare the most recent 2 reports "
1141            "(that actually run the compiler) to find previously unflagged "
1142            "models that are now flagged as problematic (according to the "
1143            "'Warnings' section).\n\n"
1144        )
1145        dtype = self.args.dtypes[0]
1146        device = self.args.devices[0]
1147        for suite in self.args.suites:
1148            body += f"### Regressions for {suite} ###\n"
1149            last2 = {}
1150
1151            for compiler in self.args.flag_compilers:
1152                filenames = [
1153                    generate_csv_name(
1154                        self.args, dtype, suite, device, compiler, testing
1155                    )
1156                    for testing in ["performance", "accuracy"]
1157                ]
1158                compiler_last2 = find_last_2_with_filenames(
1159                    self.lookup_file, self.args.dashboard_archive_path, dtype, filenames
1160                )
1161                if compiler_last2 is not None:
1162                    last2[compiler] = [
1163                        ParsePerformanceLogs(
1164                            [suite],
1165                            [device],
1166                            [dtype],
1167                            [compiler],
1168                            [compiler],
1169                            get_mode(self.args),
1170                            output_dir,
1171                        )
1172                        for output_dir in compiler_last2
1173                    ]
1174                    for state, path in zip(("Current", "Previous"), compiler_last2):
1175                        body += (
1176                            f"{state} report name (compiler: {compiler}, "
1177                            f"suite: {suite}): {path}\n\n"
1178                        )
1179
1180            regressions_present = False
1181            for metric in [
1182                "accuracy",
1183                "speedup",
1184                "compilation_latency",
1185                "compression_ratio",
1186            ]:
1187                dfs = []
1188                for compiler in self.args.flag_compilers:
1189                    if last2[compiler] is None:
1190                        continue
1191
1192                    df_cur, df_prev = (
1193                        last2[compiler][i].untouched_parsed_frames[suite][metric]
1194                        for i in (0, 1)
1195                    )
1196                    df_merge = df_cur.merge(
1197                        df_prev, on="name", suffixes=("_cur", "_prev")
1198                    )
1199                    flag_fn = FLAG_FNS[metric]
1200                    flag = np.logical_and(
1201                        df_merge[compiler + "_prev"].apply(
1202                            lambda x: not pd.isna(x) and not flag_fn(x)
1203                        ),
1204                        df_merge[compiler + "_cur"].apply(
1205                            lambda x: not pd.isna(x) and flag_fn(x)
1206                        ),
1207                    )
1208                    df_bad = df_merge[flag]
1209                    dfs.append(
1210                        pd.DataFrame(
1211                            data={
1212                                "compiler": compiler,
1213                                "name": df_bad["name"],
1214                                "prev_status": df_bad[compiler + "_prev"],
1215                                "cur_status": df_bad[compiler + "_cur"],
1216                            }
1217                        )
1218                    )
1219
1220                if not dfs:
1221                    continue
1222                df = pd.concat(dfs, axis=0)
1223                if df.empty:
1224                    continue
1225                regressions_present = True
1226                tabform = tabulate(
1227                    df, headers="keys", tablefmt="pretty", showindex="never"
1228                )
1229                str_io = io.StringIO()
1230                str_io.write("\n")
1231                str_io.write(f"{get_metric_title(metric)} regressions\n")
1232                str_io.write("~~~\n")
1233                str_io.write(f"{tabform}\n")
1234                str_io.write("~~~\n")
1235                body += str_io.getvalue()
1236
1237            if not regressions_present:
1238                body += "No regressions found.\n"
1239
1240        comment = generate_dropdown_comment(title, body)
1241
1242        with open(f"{self.args.output_dir}/gh_metric_regression.txt", "w") as gh_fh:
1243            gh_fh.write(comment)
1244
1245
1246class RegressionTracker:
1247    """
1248    Plots progress of different metrics over time to detect regressions.
1249    """
1250
1251    def __init__(self, args):
1252        self.args = args
1253        self.suites = self.args.suites
1254        self.lookup_file = os.path.join(self.args.dashboard_archive_path, "lookup.csv")
1255        assert os.path.exists(self.lookup_file)
1256        self.k = 10
1257
1258    def find_last_k(self):
1259        """
1260        Find the last k pairs of (day number, log_path)
1261        """
1262        dtype = self.args.dtypes[0]
1263        df = pd.read_csv(self.lookup_file, names=("day", "mode", "prec", "path"))
1264        df = df[df["mode"] == "performance"]
1265        df = df[df["prec"] == dtype]
1266        log_infos = []
1267        for day, path in zip(df["day"], df["path"]):
1268            log_infos.append(LogInfo(day, path))
1269
1270        assert len(log_infos) >= self.k
1271        log_infos = log_infos[len(log_infos) - self.k :]
1272        return log_infos
1273
1274    def generate_comment(self):
1275        title = "## Metrics over time ##\n"
1276        str_io = io.StringIO()
1277        if not self.args.update_dashboard_test and not self.args.no_graphs:
1278            for name in glob.glob(self.args.output_dir + "/*over_time.png"):
1279                output = (
1280                    subprocess.check_output([self.args.dashboard_image_uploader, name])
1281                    .decode("ascii")
1282                    .rstrip()
1283                )
1284                str_io.write(f"\n{name} : ![]({output})\n")
1285        comment = generate_dropdown_comment(title, str_io.getvalue())
1286
1287        with open(f"{self.args.output_dir}/gh_regression.txt", "w") as gh_fh:
1288            gh_fh.write(comment)
1289
1290    def diff(self):
1291        log_infos = self.find_last_k()
1292
1293        for metric in ["geomean", "passrate", "comp_time", "memory"]:
1294            fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(15, 5))
1295            for idx, suite in enumerate(self.suites):
1296                dfs = []
1297                for log_info in log_infos:
1298                    dir_path = os.path.join(
1299                        self.args.dashboard_archive_path, log_info.dir_path
1300                    )
1301                    assert os.path.exists(dir_path)
1302                    gmean_filename = os.path.join(dir_path, f"{metric}.csv")
1303                    if not os.path.exists(gmean_filename):
1304                        continue
1305                    df = pd.read_csv(gmean_filename)
1306                    if suite not in df:
1307                        continue
1308                    if metric == "geomean" or metric == "memory":
1309                        df[suite] = df[suite].str.replace("x", "").astype(float)
1310                    elif metric == "passrate":
1311                        df[suite] = df[suite].str.split("%").str[0].astype(float)
1312                    df.insert(0, "day", get_date(log_info))
1313                    df = df.pivot(index="day", columns="Compiler", values=suite)
1314
1315                    # Interim stage when both inductor_cudagraphs and inductor exist
1316                    df = df.rename(columns={"inductor_cudagraphs": "inductor"})
1317                    for col_name in df.columns:
1318                        if col_name not in self.args.compilers:
1319                            df = df.drop(columns=[col_name])
1320                    dfs.append(df)
1321
1322                df = pd.concat(dfs)
1323                df = df.interpolate(method="linear")
1324                ax = df.plot(
1325                    ax=axes[idx],
1326                    kind="line",
1327                    ylabel=metric,
1328                    xlabel="Date",
1329                    grid=True,
1330                    ylim=0 if metric == "passrate" else 0.8,
1331                    title=suite,
1332                    style=".-",
1333                    legend=False,
1334                )
1335                ax.legend(loc="lower right", ncol=2)
1336
1337            plt.tight_layout()
1338            plt.savefig(os.path.join(output_dir, f"{metric}_over_time.png"))
1339
1340        self.generate_comment()
1341
1342
1343class DashboardUpdater:
1344    """
1345    Aggregates the information and makes a comment to Performance Dashboard.
1346    https://github.com/pytorch/torchdynamo/issues/681
1347    """
1348
1349    def __init__(self, args):
1350        self.args = args
1351        self.output_dir = args.output_dir
1352        self.lookup_file = os.path.join(self.args.dashboard_archive_path, "lookup.csv")
1353        assert os.path.exists(self.lookup_file)
1354        try:
1355            if not self.args.update_dashboard_test and not self.args.no_update_archive:
1356                self.update_lookup_file()
1357        except subprocess.CalledProcessError:
1358            sys.stderr.write("failed to update lookup file\n")
1359
1360    def update_lookup_file(self):
1361        dtype = self.args.dtypes[0]
1362        day, _ = archive_data(self.args.archive_name)
1363        target_dir = get_archive_name(self.args, dtype)
1364        # Update lookup csv the folder to arhived logs
1365        subprocess.check_call(
1366            f'echo "{day},performance,{dtype},{target_dir}" >> {self.lookup_file}',
1367            shell=True,
1368        )
1369
1370    def archive(self):
1371        dtype = self.args.dtypes[0]
1372        # Copy the folder to archived location
1373        archive(
1374            self.output_dir,
1375            self.args.dashboard_archive_path,
1376            self.args.archive_name,
1377            dtype,
1378        )
1379
1380    def upload_graphs(self):
1381        title = "## Performance graphs ##\n"
1382        str_io = io.StringIO()
1383        if not self.args.update_dashboard_test and not self.args.no_graphs:
1384            for name in glob.glob(self.output_dir + "/*png"):
1385                if "over_time" not in name:
1386                    output = (
1387                        subprocess.check_output(
1388                            [self.args.dashboard_image_uploader, name]
1389                        )
1390                        .decode("ascii")
1391                        .rstrip()
1392                    )
1393                    str_io.write(f"\n{name} : ![]({output})\n")
1394        comment = generate_dropdown_comment(title, str_io.getvalue())
1395
1396        with open(f"{self.output_dir}/gh_graphs.txt", "w") as gh_fh:
1397            gh_fh.write(comment)
1398
1399    def gen_comment(self):
1400        files = [
1401            "gh_title.txt",
1402            "gh_executive_summary.txt",
1403            "gh_summary_diff.txt",
1404            "gh_warnings.txt",
1405            "gh_regression.txt",
1406            "gh_metric_regression.txt",
1407            "gh_training.txt" if self.args.training else "gh_inference.txt",
1408            "gh_graphs.txt",
1409            "gh_build_summary.txt",
1410        ]
1411        all_lines = []
1412        for f in files:
1413            try:
1414                with open(os.path.join(self.output_dir, f)) as fh:
1415                    all_lines.extend(fh.readlines())
1416            except FileNotFoundError:
1417                pass
1418
1419        return "\n".join([x.rstrip() for x in all_lines])
1420
1421    def comment_on_gh(self, comment):
1422        """
1423        Send a commment to dashboard
1424        """
1425        with tempfile.NamedTemporaryFile(mode="w", delete=False) as f:
1426            f.write(comment)
1427            filename = f.name
1428
1429        issue_number = "93794"
1430        if self.args.dtypes[0] == "float32":
1431            issue_number = "93518"
1432
1433        subprocess.check_call(
1434            [
1435                self.args.dashboard_gh_cli_path,
1436                "issue",
1437                "comment",
1438                "--repo=https://github.com/pytorch/pytorch.git",
1439                issue_number,
1440                "-F",
1441                filename,
1442            ]
1443        )
1444
1445        os.remove(filename)
1446
1447    def update(self):
1448        self.upload_graphs()
1449        if not self.args.no_detect_regressions:
1450            SummaryStatDiffer(self.args).generate_comment()
1451            RegressionDetector(self.args).generate_comment()
1452            try:
1453                RegressionTracker(self.args).diff()
1454            except Exception as e:
1455                logging.exception("")
1456                with open(f"{self.args.output_dir}/gh_regression.txt", "w") as gh_fh:
1457                    gh_fh.write("")
1458
1459        comment = self.gen_comment()
1460        print(comment)
1461
1462        if not self.args.update_dashboard_test:
1463            if not self.args.no_gh_comment:
1464                self.comment_on_gh(comment)
1465            if not self.args.no_update_archive:
1466                self.archive()
1467
1468
1469if __name__ == "__main__":
1470    args = parse_args()
1471
1472    def extract(key):
1473        return DEFAULTS[key] if getattr(args, key, None) is None else getattr(args, key)
1474
1475    dtypes = extract("dtypes")
1476    suites = extract("suites")
1477    devices = extract("devices")
1478
1479    if args.inference:
1480        compilers = DEFAULTS["inference"] if args.compilers is None else args.compilers
1481        flag_compilers = (
1482            DEFAULTS["flag_compilers"]["inference"]
1483            if args.flag_compilers is None
1484            else args.flag_compilers
1485        )
1486    else:
1487        assert args.training
1488        compilers = DEFAULTS["training"] if args.compilers is None else args.compilers
1489        flag_compilers = (
1490            DEFAULTS["flag_compilers"]["training"]
1491            if args.flag_compilers is None
1492            else args.flag_compilers
1493        )
1494
1495    output_dir = args.output_dir
1496    args.compilers = compilers
1497    args.devices = devices
1498    args.dtypes = dtypes
1499    flag_compilers = list(set(flag_compilers) & set(compilers))
1500    args.flag_compilers = flag_compilers
1501    args.suites = suites
1502
1503    if args.print_run_commands:
1504        generated_file = generate_commands(
1505            args, dtypes, suites, devices, compilers, output_dir
1506        )
1507        print(
1508            f"Running commands are generated in file {generated_file}. Please run (bash {generated_file})."
1509        )
1510    elif args.visualize_logs:
1511        parse_logs(args, dtypes, suites, devices, compilers, flag_compilers, output_dir)
1512    elif args.run:
1513        generated_file = generate_commands(
1514            args, dtypes, suites, devices, compilers, output_dir
1515        )
1516        # generate memoized archive name now so that the date is reflective
1517        # of when the run started
1518        get_archive_name(args, dtypes[0])
1519        # TODO - Do we need to worry about segfaults
1520        try:
1521            os.system(f"bash {generated_file}")
1522        except Exception as e:
1523            print(
1524                f"Running commands failed. Please run manually (bash {generated_file}) and inspect the errors."
1525            )
1526            raise e
1527        if not args.log_operator_inputs:
1528            if not args.no_update_archive:
1529                archive(
1530                    output_dir,
1531                    args.dashboard_archive_path,
1532                    args.archive_name,
1533                    dtypes[0],
1534                )
1535            parse_logs(
1536                args, dtypes, suites, devices, compilers, flag_compilers, output_dir
1537            )
1538            if not args.no_update_archive:
1539                archive(
1540                    output_dir,
1541                    args.dashboard_archive_path,
1542                    args.archive_name,
1543                    dtypes[0],
1544                )
1545
1546    if args.update_dashboard:
1547        DashboardUpdater(args).update()
1548