1#!/usr/bin/env python3 2 3""" 4A wrapper over the benchmark infrastructure to generate commonly used commands, 5parse results and generate csv/graphs. 6 7The script works on manually written TABLE (see below). We can add more commands 8in the future. 9 10One example usage is 11-> python benchmarks/runner.py --suites=torchbench --inference 12This command will generate the commands for the default compilers (see DEFAULTS 13below) for inference, run them and visualize the logs. 14 15If you want to just print the commands, you could use the following command 16-> python benchmarks/runner.py --print-run-commands --suites=torchbench --inference 17 18Similarly, if you want to just visualize the already finished logs 19-> python benchmarks/runner.py --visualize-logs --suites=torchbench --inference 20 21If you want to test float16 22-> python benchmarks/runner.py --suites=torchbench --inference --dtypes=float16 23 24""" 25 26import argparse 27import dataclasses 28import functools 29import glob 30import importlib 31import io 32import itertools 33import logging 34import os 35import re 36import shutil 37import subprocess 38import sys 39import tempfile 40from collections import defaultdict 41from datetime import datetime, timedelta, timezone 42from os.path import abspath, exists 43from random import randint 44 45import matplotlib.pyplot as plt 46import numpy as np 47import pandas as pd 48from matplotlib import rcParams 49from scipy.stats import gmean 50from tabulate import tabulate 51 52import torch 53import torch._dynamo 54 55 56rcParams.update({"figure.autolayout": True}) 57plt.rc("axes", axisbelow=True) 58 59DEFAULT_OUTPUT_DIR = "benchmark_logs" 60 61 62log = logging.getLogger(__name__) 63 64TABLE = { 65 "training": { 66 "ts_nnc": "--training --speedup-ts ", 67 "ts_nvfuser": "--training --nvfuser --speedup-dynamo-ts ", 68 "eager": "--training --backend=eager ", 69 "aot_eager": "--training --backend=aot_eager ", 70 "cudagraphs": "--training --backend=cudagraphs ", 71 "aot_nvfuser": "--training --nvfuser --backend=aot_ts_nvfuser ", 72 "nvprims_nvfuser": "--training --backend=nvprims_nvfuser ", 73 "inductor": "--training --inductor ", 74 "inductor_no_cudagraphs": "--training --inductor --disable-cudagraphs ", 75 "inductor_max_autotune": "--training --inductor --inductor-compile-mode max-autotune ", 76 "inductor_max_autotune_no_cudagraphs": ( 77 "--training --inductor --inductor-compile-mode max-autotune-no-cudagraphs --disable-cudagraphs " 78 ), 79 }, 80 "inference": { 81 "aot_eager": "--inference --backend=aot_eager ", 82 "eager": "--inference --backend=eager ", 83 "ts_nnc": "--inference --speedup-ts ", 84 "ts_nvfuser": "--inference -n100 --speedup-ts --nvfuser ", 85 "trt": "--inference -n100 --speedup-trt ", 86 "ts_nvfuser_cudagraphs": "--inference --backend=cudagraphs_ts ", 87 "inductor": "--inference -n50 --inductor ", 88 "inductor_no_cudagraphs": "--inference -n50 --inductor --disable-cudagraphs ", 89 "inductor_max_autotune": "--inference -n50 --inductor --inductor-compile-mode max-autotune ", 90 "inductor_max_autotune_no_cudagraphs": ( 91 "--inference -n50 --inductor --inductor-compile-mode max-autotune-no-cudagraphs --disable-cudagraphs " 92 ), 93 "torchscript-onnx": "--inference -n5 --torchscript-onnx", 94 "dynamo-onnx": "--inference -n5 --dynamo-onnx", 95 }, 96} 97 98INFERENCE_COMPILERS = tuple(TABLE["inference"].keys()) 99TRAINING_COMPILERS = tuple(TABLE["training"].keys()) 100 101DEFAULTS = { 102 "training": [ 103 "eager", 104 "aot_eager", 105 "inductor", 106 "inductor_no_cudagraphs", 107 ], 108 "inference": [ 109 "eager", 110 "aot_eager", 111 "inductor", 112 "inductor_no_cudagraphs", 113 ], 114 "flag_compilers": { 115 "training": ["inductor", "inductor_no_cudagraphs"], 116 "inference": ["inductor", "inductor_no_cudagraphs"], 117 }, 118 "dtypes": [ 119 "float32", 120 ], 121 "suites": ["torchbench", "huggingface", "timm_models"], 122 "devices": [ 123 "cuda", 124 ], 125 "quick": { 126 "torchbench": '-k "resnet..$"', 127 "huggingface": "-k Albert", 128 "timm_models": ' -k "^resnet" -k "^inception"', 129 }, 130} 131 132 133DASHBOARD_DEFAULTS = { 134 "dashboard_image_uploader": "/fsx/users/anijain/bin/imgur.sh", 135 "dashboard_archive_path": "/data/home/anijain/cluster/cron_logs", 136 "dashboard_gh_cli_path": "/data/home/anijain/miniconda/bin/gh", 137} 138 139 140def flag_speedup(x): 141 return x < 0.95 142 143 144def flag_compilation_latency(x): 145 return x > 120 146 147 148def flag_compression_ratio(x): 149 return x < 0.9 150 151 152def flag_accuracy(x): 153 return "pass" not in x 154 155 156FLAG_FNS = { 157 "speedup": flag_speedup, 158 "compilation_latency": flag_compilation_latency, 159 "compression_ratio": flag_compression_ratio, 160 "accuracy": flag_accuracy, 161} 162 163 164def percentage(part, whole, decimals=2): 165 if whole == 0: 166 return 0 167 return round(100 * float(part) / float(whole), decimals) 168 169 170def parse_args(): 171 parser = argparse.ArgumentParser() 172 parser.add_argument("--devices", action="append", help="cpu or cuda") 173 parser.add_argument("--dtypes", action="append", help="float16/float32/amp") 174 parser.add_argument("--suites", action="append", help="huggingface/torchbench/timm") 175 parser.add_argument( 176 "--compilers", 177 action="append", 178 help=f"For --inference, options are {INFERENCE_COMPILERS}. For --training, options are {TRAINING_COMPILERS}", 179 ) 180 181 parser.add_argument( 182 "--flag-compilers", 183 action="append", 184 help="List of compilers to flag issues. Same format as --compilers.", 185 ) 186 parser.add_argument( 187 "--quick", action="store_true", help="Just runs one model. Helps in debugging" 188 ) 189 parser.add_argument( 190 "--output-dir", 191 help="Choose the output directory to save the logs", 192 default=DEFAULT_OUTPUT_DIR, 193 ) 194 parser.add_argument( 195 "--keep-output-dir", 196 action="store_true", 197 help="Do not cleanup the output directory before running", 198 ) 199 200 # Choose either generation of commands, pretty parsing or e2e runs 201 group = parser.add_mutually_exclusive_group(required=False) 202 group.add_argument( 203 "--print-run-commands", 204 "--print_run_commands", 205 action="store_true", 206 help="Generate commands and saves them to run.sh", 207 ) 208 group.add_argument( 209 "--visualize-logs", 210 "--visualize_logs", 211 action="store_true", 212 help="Pretty print the log files and draw graphs", 213 ) 214 group.add_argument( 215 "--run", 216 action="store_true", 217 default=True, 218 help="Generate commands, run and parses the files", 219 ) 220 221 parser.add_argument( 222 "--log-operator-inputs", 223 action="store_true", 224 default=False, 225 help="Log operator inputs", 226 ) 227 parser.add_argument( 228 "--include-slowdowns", 229 "--include_slowdowns", 230 action="store_true", 231 default=False, 232 help="Include slowdowns in geomean performance speedup report. By default, slowdowns are ignored. " 233 "This is because one can always use eager if compile is not speeding things up", 234 ) 235 236 parser.add_argument( 237 "--extra-args", default="", help="Append commandline with these args" 238 ) 239 240 # Choose either inference or training 241 group_mode = parser.add_mutually_exclusive_group(required=True) 242 group_mode.add_argument( 243 "--inference", action="store_true", help="Only run inference related tasks" 244 ) 245 group_mode.add_argument( 246 "--training", action="store_true", help="Only run training related tasks" 247 ) 248 249 parser.add_argument( 250 "--base-sha", 251 help="commit id for the tested pytorch", 252 ) 253 parser.add_argument( 254 "--total-partitions", 255 type=int, 256 help="Total number of partitions, to be passed to the actual benchmark script", 257 ) 258 parser.add_argument( 259 "--partition-id", 260 type=int, 261 help="ID of partition, to be passed to the actual benchmark script", 262 ) 263 264 parser.add_argument( 265 "--update-dashboard", 266 action="store_true", 267 default=False, 268 help="Updates to dashboard", 269 ) 270 parser.add_argument( 271 "--no-graphs", 272 action="store_true", 273 default=False, 274 help="Do not genenerate and upload metric graphs", 275 ) 276 parser.add_argument( 277 "--no-update-archive", 278 action="store_true", 279 default=False, 280 help="Do not update lookup.csv or the log archive", 281 ) 282 parser.add_argument( 283 "--no-gh-comment", 284 action="store_true", 285 default=False, 286 help="Do not write a comment to github", 287 ) 288 parser.add_argument( 289 "--no-detect-regressions", 290 action="store_true", 291 default=False, 292 help="Do not compare to previous runs for regressions or metric graphs.", 293 ) 294 parser.add_argument( 295 "--update-dashboard-test", 296 action="store_true", 297 default=False, 298 help="does all of --no-graphs, --no-update-archive, and --no-gh-comment", 299 ) 300 parser.add_argument( 301 "--dashboard-image-uploader", 302 default=DASHBOARD_DEFAULTS["dashboard_image_uploader"], 303 help="Image uploader command", 304 ) 305 parser.add_argument( 306 "--dashboard-archive-path", 307 default=DASHBOARD_DEFAULTS["dashboard_archive_path"], 308 help="Archived directory path", 309 ) 310 parser.add_argument( 311 "--archive-name", 312 help="Directory name under dashboard-archive-path to copy output-dir to. " 313 "If not provided, a generated name is used.", 314 ) 315 parser.add_argument( 316 "--dashboard-gh-cli-path", 317 default=DASHBOARD_DEFAULTS["dashboard_gh_cli_path"], 318 help="Github CLI path", 319 ) 320 parser.add_argument( 321 "--batch-size", 322 "--batch_size", 323 type=int, 324 default=None, 325 help="batch size for benchmarking", 326 ) 327 parser.add_argument( 328 "--threads", 329 "-t", 330 type=int, 331 default=None, 332 help="number of threads to use for eager and inductor.", 333 ) 334 launcher_group = parser.add_argument_group("CPU Launcher Parameters") 335 launcher_group.add_argument( 336 "--enable-cpu-launcher", 337 "--enable_cpu_launcher", 338 action="store_true", 339 default=False, 340 help="Use torch.backends.xeon.run_cpu to get the peak performance on Intel(R) Xeon(R) Scalable Processors.", 341 ) 342 launcher_group.add_argument( 343 "--cpu-launcher-args", 344 "--cpu_launcher_args", 345 type=str, 346 default="", 347 help="Provide the args of torch.backends.xeon.run_cpu. " 348 "To look up what optional arguments this launcher offers: python -m torch.backends.xeon.run_cpu --help", 349 ) 350 parser.add_argument( 351 "--no-cold-start-latency", 352 action="store_true", 353 default=False, 354 help="Do not include --cold-start-latency on inductor benchmarks", 355 ) 356 parser.add_argument( 357 "--inductor-compile-mode", 358 default=None, 359 help="torch.compile mode argument for inductor runs.", 360 ) 361 args = parser.parse_args() 362 return args 363 364 365def get_mode(args): 366 if args.inference: 367 return "inference" 368 return "training" 369 370 371def get_skip_tests(suite, device, is_training: bool): 372 """ 373 Generate -x seperated string to skip the unusual setup training tests 374 """ 375 skip_tests = set() 376 original_dir = abspath(os.getcwd()) 377 module = importlib.import_module(suite) 378 os.chdir(original_dir) 379 380 if suite == "torchbench": 381 skip_tests.update(module.TorchBenchmarkRunner().skip_models) 382 if is_training: 383 skip_tests.update( 384 module.TorchBenchmarkRunner().skip_not_suitable_for_training_models 385 ) 386 if device == "cpu": 387 skip_tests.update(module.TorchBenchmarkRunner().skip_models_for_cpu) 388 elif device == "cuda": 389 skip_tests.update(module.TorchBenchmarkRunner().skip_models_for_cuda) 390 391 skip_tests = (f"-x {name}" for name in skip_tests) 392 skip_str = " ".join(skip_tests) 393 return skip_str 394 395 396def generate_csv_name(args, dtype, suite, device, compiler, testing): 397 mode = get_mode(args) 398 return f"{compiler}_{suite}_{dtype}_{mode}_{device}_{testing}.csv" 399 400 401def generate_commands(args, dtypes, suites, devices, compilers, output_dir): 402 mode = get_mode(args) 403 suites_str = "_".join(suites) 404 devices_str = "_".join(devices) 405 dtypes_str = "_".join(dtypes) 406 compilers_str = "_".join(compilers) 407 generated_file = ( 408 f"run_{mode}_{devices_str}_{dtypes_str}_{suites_str}_{compilers_str}.sh" 409 ) 410 with open(generated_file, "w") as runfile: 411 lines = [] 412 413 lines.append("#!/bin/bash") 414 lines.append("set -x") 415 lines.append("# Setup the output directory") 416 if not args.keep_output_dir: 417 lines.append(f"rm -rf {output_dir}") 418 # It's ok if the output directory already exists 419 lines.append(f"mkdir -p {output_dir}") 420 lines.append("") 421 422 for testing in ["performance", "accuracy"]: 423 for iter in itertools.product(suites, devices, dtypes): 424 suite, device, dtype = iter 425 lines.append( 426 f"# Commands for {suite} for device={device}, dtype={dtype} for {mode} and for {testing} testing" 427 ) 428 info = TABLE[mode] 429 for compiler in compilers: 430 base_cmd = info[compiler] 431 output_filename = f"{output_dir}/{generate_csv_name(args, dtype, suite, device, compiler, testing)}" 432 launcher_cmd = "python" 433 if args.enable_cpu_launcher: 434 launcher_cmd = f"python -m torch.backends.xeon.run_cpu {args.cpu_launcher_args}" 435 cmd = f"{launcher_cmd} benchmarks/dynamo/{suite}.py --{testing} --{dtype} -d{device} --output={output_filename}" 436 cmd = f"{cmd} {base_cmd} {args.extra_args} --dashboard" 437 skip_tests_str = get_skip_tests(suite, device, args.training) 438 cmd = f"{cmd} {skip_tests_str}" 439 440 if args.log_operator_inputs: 441 cmd = f"{cmd} --log-operator-inputs" 442 443 if args.quick: 444 filters = DEFAULTS["quick"][suite] 445 cmd = f"{cmd} {filters}" 446 447 if ( 448 compiler 449 in ( 450 "inductor", 451 "inductor_no_cudagraphs", 452 ) 453 and not args.no_cold_start_latency 454 ): 455 cmd = f"{cmd} --cold-start-latency" 456 457 if args.batch_size is not None: 458 cmd = f"{cmd} --batch-size {args.batch_size}" 459 460 if args.threads is not None: 461 cmd = f"{cmd} --threads {args.threads}" 462 463 if args.total_partitions is not None: 464 cmd = f"{cmd} --total-partitions {args.total_partitions}" 465 466 if args.partition_id is not None: 467 cmd = f"{cmd} --partition-id {args.partition_id}" 468 469 if args.inductor_compile_mode is not None: 470 cmd = f"{cmd} --inductor-compile-mode {args.inductor_compile_mode}" 471 lines.append(cmd) 472 lines.append("") 473 runfile.writelines([line + "\n" for line in lines]) 474 return generated_file 475 476 477def generate_dropdown_comment(title, body): 478 str_io = io.StringIO() 479 str_io.write(f"{title}\n") 480 str_io.write("<details>\n") 481 str_io.write("<summary>see more</summary>\n") 482 str_io.write(f"{body}") 483 str_io.write("\n") 484 str_io.write("</details>\n\n") 485 return str_io.getvalue() 486 487 488def build_summary(args): 489 out_io = io.StringIO() 490 491 def print_commit_hash(path, name): 492 if args.base_sha is not None: 493 if name == "pytorch": 494 out_io.write(f"{name} commit: {args.base_sha}\n") 495 elif exists(path): 496 import git 497 498 repo = git.Repo(path, search_parent_directories=True) 499 sha = repo.head.object.hexsha 500 date = repo.head.object.committed_datetime 501 out_io.write(f"{name} commit: {sha}\n") 502 out_io.write(f"{name} commit date: {date}\n") 503 else: 504 out_io.write(f"{name} Absent\n") 505 506 def env_var(name): 507 if name in os.environ: 508 out_io.write(f"{name} = {os.environ[name]}\n") 509 else: 510 out_io.write(f"{name} = {None}\n") 511 512 out_io.write("\n") 513 out_io.write("### Run name ###\n") 514 out_io.write(get_archive_name(args, args.dtypes[0])) 515 out_io.write("\n") 516 517 out_io.write("\n") 518 out_io.write("### Commit hashes ###\n") 519 print_commit_hash("../pytorch", "pytorch") 520 print_commit_hash("../torchbenchmark", "torchbench") 521 522 out_io.write("\n") 523 out_io.write("### TorchDynamo config flags ###\n") 524 for key in dir(torch._dynamo.config): 525 val = getattr(torch._dynamo.config, key) 526 if not key.startswith("__") and isinstance(val, bool): 527 out_io.write(f"torch._dynamo.config.{key} = {val}\n") 528 529 out_io.write("\n") 530 out_io.write("### Torch version ###\n") 531 out_io.write(f"torch: {torch.__version__}\n") 532 533 out_io.write("\n") 534 out_io.write("### Environment variables ###\n") 535 env_var("TORCH_CUDA_ARCH_LIST") 536 env_var("CUDA_HOME") 537 env_var("USE_LLVM") 538 539 if "cuda" in args.devices: 540 out_io.write("\n") 541 out_io.write("### GPU details ###\n") 542 out_io.write(f"CUDNN VERSION: {torch.backends.cudnn.version()}\n") 543 out_io.write(f"Number CUDA Devices: {torch.cuda.device_count()}\n") 544 out_io.write(f"Device Name: {torch.cuda.get_device_name(0)}\n") 545 out_io.write( 546 f"Device Memory [GB]: {torch.cuda.get_device_properties(0).total_memory/1e9}\n" 547 ) 548 549 title = "## Build Summary" 550 comment = generate_dropdown_comment(title, out_io.getvalue()) 551 with open(f"{output_dir}/gh_build_summary.txt", "w") as gh_fh: 552 gh_fh.write(comment) 553 554 555@functools.lru_cache(None) 556def archive_data(archive_name): 557 if archive_name is not None: 558 prefix_match = re.search(r"\w+(?=_performance)", archive_name) 559 if prefix_match is not None: 560 prefix = prefix_match.group(0) 561 else: 562 prefix = "" 563 day_match = re.search(r"day_(\d+)_", archive_name) 564 if day_match is not None: 565 day = day_match.group(1) 566 else: 567 day = "000" 568 else: 569 now = datetime.now(tz=timezone(timedelta(hours=-8))) 570 day = now.strftime("%j") 571 prefix = now.strftime(f"day_{day}_%d_%m_%y") 572 return day, prefix 573 574 575@functools.lru_cache(None) 576def default_archive_name(dtype): 577 _, prefix = archive_data(None) 578 return f"{prefix}_performance_{dtype}_{randint(100, 999)}" 579 580 581def get_archive_name(args, dtype): 582 return ( 583 default_archive_name(dtype) if args.archive_name is None else args.archive_name 584 ) 585 586 587def archive(src_dir, dest_dir_prefix, archive_name, dtype): 588 if archive_name is None: 589 archive_name = default_archive_name(dtype) 590 # Copy the folder to archived location 591 dest = os.path.join(dest_dir_prefix, archive_name) 592 shutil.copytree(src_dir, dest, dirs_exist_ok=True) 593 print(f"copied contents of {src_dir} to {dest}") 594 595 596def get_metric_title(metric): 597 if metric == "speedup": 598 return "Performance speedup" 599 elif metric == "accuracy": 600 return "Accuracy" 601 elif metric == "compilation_latency": 602 return "Compilation latency (sec)" 603 elif metric == "compression_ratio": 604 return "Peak Memory Compression Ratio" 605 elif metric == "abs_latency": 606 return "Absolute latency (ms)" 607 raise RuntimeError("unknown metric") 608 609 610class Parser: 611 def __init__( 612 self, suites, devices, dtypes, compilers, flag_compilers, mode, output_dir 613 ): 614 self.suites = suites 615 self.devices = devices 616 self.dtypes = dtypes 617 self.compilers = compilers 618 self.flag_compilers = flag_compilers 619 self.output_dir = output_dir 620 self.mode = mode 621 622 def has_header(self, output_filename): 623 header_present = False 624 with open(output_filename) as f: 625 line = f.readline() 626 if "dev" in line: 627 header_present = True 628 return header_present 629 630 631class ParsePerformanceLogs(Parser): 632 def __init__( 633 self, 634 suites, 635 devices, 636 dtypes, 637 compilers, 638 flag_compilers, 639 mode, 640 output_dir, 641 include_slowdowns=False, 642 ): 643 super().__init__( 644 suites, 645 devices, 646 dtypes, 647 compilers, 648 flag_compilers, 649 mode, 650 output_dir, 651 ) 652 self.parsed_frames = defaultdict(lambda: defaultdict(None)) 653 self.untouched_parsed_frames = defaultdict(lambda: defaultdict(None)) 654 self.metrics = [ 655 "speedup", 656 "abs_latency", 657 "compilation_latency", 658 "compression_ratio", 659 ] 660 self.bottom_k = 50 661 self.parse() 662 self.include_slowdowns = include_slowdowns 663 664 def plot_graph(self, df, title): 665 labels = df.columns.values.tolist() 666 labels = labels[3:] 667 df.plot( 668 x="name", 669 y=labels, 670 kind="bar", 671 width=0.65, 672 title=title, 673 ylabel="Speedup over eager", 674 xlabel="", 675 grid=True, 676 figsize=(max(len(df.index) / 4, 5), 10), 677 edgecolor="black", 678 ) 679 plt.tight_layout() 680 plt.savefig(f"{self.output_dir}/{title}.png") 681 682 def read_csv(self, output_filename): 683 if self.has_header(output_filename): 684 return pd.read_csv(output_filename) 685 else: 686 return pd.read_csv( 687 output_filename, 688 names=[ 689 "dev", 690 "name", 691 "batch_size", 692 "speedup", 693 "abs_latency", 694 "compilation_latency", 695 "compression_ratio", 696 ], 697 header=None, 698 engine="python", 699 ) 700 701 def parse(self): 702 self.extract_df("accuracy", "accuracy") 703 for metric in self.metrics: 704 self.extract_df(metric, "performance") 705 706 def clean_batch_sizes(self, frames): 707 # Clean up batch sizes when its 0 708 if len(frames) == 1: 709 return frames 710 batch_sizes = frames[0]["batch_size"].to_list() 711 for frame in frames[1:]: 712 frame_batch_sizes = frame["batch_size"].to_list() 713 for idx, (batch_a, batch_b) in enumerate( 714 zip(batch_sizes, frame_batch_sizes) 715 ): 716 assert batch_a == batch_b or batch_a == 0 or batch_b == 0, print( 717 f"a={batch_a}, b={batch_b}" 718 ) 719 batch_sizes[idx] = max(batch_a, batch_b) 720 for frame in frames: 721 frame["batch_size"] = batch_sizes 722 return frames 723 724 def extract_df(self, metric, testing): 725 for iter in itertools.product(self.suites, self.devices, self.dtypes): 726 suite, device, dtype = iter 727 frames = [] 728 for compiler in self.compilers: 729 output_filename = f"{self.output_dir}/{compiler}_{suite}_{dtype}_{self.mode}_{device}_{testing}.csv" 730 df = self.read_csv(output_filename) 731 if metric not in df: 732 df.insert(len(df.columns), metric, np.nan) 733 df = df[["dev", "name", "batch_size", metric]] 734 df.rename(columns={metric: compiler}, inplace=True) 735 df["batch_size"] = df["batch_size"].astype(int) 736 frames.append(df) 737 738 # Merge the results 739 frames = self.clean_batch_sizes(frames) 740 if len(self.compilers) == 1: 741 df = frames[0] 742 else: 743 # Merge data frames 744 df = pd.merge(frames[0], frames[1], on=["dev", "name", "batch_size"]) 745 for idx in range(2, len(frames)): 746 df = pd.merge(df, frames[idx], on=["dev", "name", "batch_size"]) 747 748 if testing == "performance": 749 for compiler in self.compilers: 750 df[compiler] = pd.to_numeric(df[compiler], errors="coerce").fillna( 751 0 752 ) 753 754 df_copy = df.copy() 755 df_copy = df_copy.sort_values( 756 by=list(reversed(self.compilers)), ascending=False 757 ) 758 if "inductor" in self.compilers: 759 df_copy = df_copy.sort_values(by="inductor", ascending=False) 760 self.untouched_parsed_frames[suite][metric] = df_copy 761 762 if testing == "performance": 763 df_accuracy = self.parsed_frames[suite]["accuracy"] 764 perf_rows = [] 765 for model_name in df["name"]: 766 perf_row = df[df["name"] == model_name].copy() 767 acc_row = df_accuracy[df_accuracy["name"] == model_name] 768 for compiler in self.compilers: 769 if not perf_row.empty: 770 if acc_row.empty: 771 perf_row[compiler] = 0.0 772 elif acc_row[compiler].iloc[0] in ( 773 "model_fail_to_load", 774 "eager_fail_to_run", 775 ): 776 perf_row = pd.DataFrame() 777 elif acc_row[compiler].iloc[0] not in ( 778 "pass", 779 "pass_due_to_skip", 780 ): 781 perf_row[compiler] = 0.0 782 if not perf_row.empty: 783 perf_rows.append(perf_row) 784 df = pd.concat(perf_rows) 785 df = df.sort_values(by=list(reversed(self.compilers)), ascending=False) 786 787 if "inductor" in self.compilers: 788 df = df.sort_values(by="inductor", ascending=False) 789 self.parsed_frames[suite][metric] = df 790 791 def get_passing_entries(self, compiler, df): 792 return df[compiler][df[compiler] > 0] 793 794 def comp_time(self, compiler, df): 795 df = self.get_passing_entries(compiler, df) 796 # df = df.sort_values(by=compiler, ascending=False)[compiler][: self.bottom_k] 797 if df.empty: 798 return "0.0" 799 800 return f"{df.mean():.2f}" 801 802 def geomean(self, compiler, df): 803 cleaned_df = self.get_passing_entries(compiler, df) 804 if not self.include_slowdowns: 805 cleaned_df = cleaned_df.clip(1) 806 if cleaned_df.empty: 807 return "0.0x" 808 return f"{gmean(cleaned_df):.2f}x" 809 810 def passrate(self, compiler, df): 811 total = len(df.index) 812 passing = df[df[compiler] > 0.0][compiler].count() 813 perc = int(percentage(passing, total, decimals=0)) 814 return f"{perc}%, {passing}/{total}" 815 816 def memory(self, compiler, df): 817 df = self.get_passing_entries(compiler, df) 818 df = df.fillna(0) 819 df = df[df > 0] 820 if df.empty: 821 return "0.0x" 822 return f"{df.mean():.2f}x" 823 824 def exec_summary_df(self, fn, metric): 825 """ 826 Generate a table with passrate and geomean perf 827 """ 828 cols = {} 829 cols["Compiler"] = self.compilers 830 for suite in self.suites: 831 df = self.parsed_frames[suite][metric] 832 # speedups = [self.geomean(compiler, df) for compiler in self.compilers] 833 speedups = [fn(compiler, df) for compiler in self.compilers] 834 col = pd.Series(data=speedups, index=self.compilers) 835 cols[suite] = col 836 df = pd.DataFrame(cols) 837 df = df.fillna(0) 838 df.to_csv(os.path.join(self.output_dir, f"{fn.__name__}.csv")) 839 return df 840 841 def exec_summary_text(self, caption, fn, metric): 842 df = self.exec_summary_df(fn, metric) 843 tabform = tabulate(df, headers="keys", tablefmt="pretty", showindex="never") 844 845 str_io = io.StringIO() 846 str_io.write(f"{caption}") 847 str_io.write("~~~\n") 848 str_io.write(f"{tabform}\n") 849 str_io.write("~~~\n") 850 return str_io.getvalue() 851 852 def generate_executive_summary(self): 853 machine = "A100 GPUs" 854 if "cpu" in self.devices: 855 get_machine_cmd = "lscpu| grep 'Model name' | awk -F':' '{print $2}'" 856 machine = subprocess.getstatusoutput(get_machine_cmd)[1].strip() 857 description = ( 858 "We evaluate different backends " 859 "across three benchmark suites - torchbench, huggingface and timm. We run " 860 "these experiments on " 861 + machine 862 + ". Each experiment runs one iteration of forward pass " 863 "and backward pass for training and forward pass only for inference. " 864 "For accuracy, we check the numerical correctness of forward pass outputs and gradients " 865 "by comparing with native pytorch. We measure speedup " 866 "by normalizing against the performance of native pytorch. We report mean " 867 "compilation latency numbers and peak memory footprint reduction ratio. \n\n" 868 "Caveats\n" 869 "1) Batch size has been reduced to workaround OOM errors. Work is in progress to " 870 "reduce peak memory footprint.\n" 871 "2) Experiments do not cover dynamic shapes.\n" 872 "3) Experimental setup does not have optimizer.\n\n" 873 ) 874 comment = generate_dropdown_comment("", description) 875 str_io = io.StringIO() 876 str_io.write("\n") 877 str_io.write("## Executive Summary ##\n") 878 str_io.write(comment) 879 880 speedup_caption = "Geometric mean speedup \n" 881 speedup_summary = self.exec_summary_text( 882 speedup_caption, self.geomean, "speedup" 883 ) 884 885 passrate_caption = "Passrate\n" 886 passrate_summary = self.exec_summary_text( 887 passrate_caption, self.passrate, "speedup" 888 ) 889 890 comp_time_caption = "Mean compilation time (seconds)\n" 891 comp_time_summary = self.exec_summary_text( 892 comp_time_caption, self.comp_time, "compilation_latency" 893 ) 894 895 peak_memory_caption = ( 896 "Peak memory footprint compression ratio (higher is better)\n" 897 ) 898 peak_memory_summary = self.exec_summary_text( 899 peak_memory_caption, self.memory, "compression_ratio" 900 ) 901 902 str_io.write( 903 "To measure performance, compilation latency and memory footprint reduction, " 904 "we remove the models that fail accuracy checks.\n\n" 905 ) 906 str_io.write(passrate_summary) 907 str_io.write(speedup_summary) 908 str_io.write(comp_time_summary) 909 str_io.write(peak_memory_summary) 910 self.executive_summary = str_io.getvalue() 911 912 def flag_bad_entries(self, suite, metric, flag_fn): 913 df = self.untouched_parsed_frames[suite][metric] 914 df = df.drop("dev", axis=1) 915 df = df.rename(columns={"batch_size": "bs"}) 916 # apply flag_fn elementwise to flag_compilers columns, 917 # if one element fails, the entire row is flagged 918 flag = np.logical_or.reduce( 919 df[self.flag_compilers].applymap(flag_fn), 920 axis=1, 921 ) 922 df = df[flag] 923 df = df.assign(suite=suite) 924 return df.reindex(columns=["suite", "name"] + self.flag_compilers) 925 926 def generate_warnings(self): 927 title = "## Warnings ##" 928 body = ( 929 "We flag models where:\n\n" 930 " - accuracy fails\n" 931 " - speedup < 0.95x (NOTE: 0.0 speedup typically signifies a failure in the performance test)\n" 932 " - compilation latency > 120 sec.\n" 933 " - compression ratio < 0.9\n" 934 "\n" 935 ) 936 for metric in [ 937 "accuracy", 938 "speedup", 939 "compilation_latency", 940 "compression_ratio", 941 ]: 942 dfs = [] 943 for suite in self.suites: 944 dfs.append(self.flag_bad_entries(suite, metric, FLAG_FNS[metric])) 945 df = pd.concat(dfs, axis=0) 946 if df.empty: 947 continue 948 tabform = tabulate(df, headers="keys", tablefmt="pretty", showindex="never") 949 str_io = io.StringIO() 950 str_io.write("\n") 951 str_io.write(get_metric_title(metric) + " warnings\n") 952 str_io.write("~~~\n") 953 str_io.write(f"{tabform}\n") 954 str_io.write("~~~\n") 955 body += str_io.getvalue() 956 957 comment = generate_dropdown_comment(title, body) 958 return comment 959 960 def prepare_message(self, suite): 961 title = f"## {suite} suite with {self.dtypes[0]} precision ##" 962 body = "" 963 for metric in [ 964 "speedup", 965 "accuracy", 966 "compilation_latency", 967 "compression_ratio", 968 "abs_latency", 969 ]: 970 df = self.untouched_parsed_frames[suite][metric] 971 df = df.drop("dev", axis=1) 972 df = df.rename(columns={"batch_size": "bs"}) 973 tabform = tabulate(df, headers="keys", tablefmt="pretty", showindex="never") 974 str_io = io.StringIO() 975 str_io.write("\n") 976 str_io.write(get_metric_title(metric) + "\n") 977 str_io.write("~~~\n") 978 str_io.write(f"{tabform}\n") 979 str_io.write("~~~\n") 980 body += str_io.getvalue() 981 982 comment = generate_dropdown_comment(title, body) 983 return comment 984 985 def gen_summary_files(self): 986 self.generate_executive_summary() 987 for suite in self.suites: 988 self.plot_graph( 989 self.untouched_parsed_frames[suite]["speedup"], 990 f"{suite}_{self.dtypes[0]}", 991 ) 992 993 with open(f"{self.output_dir}/gh_title.txt", "w") as gh_fh: 994 str_io = io.StringIO() 995 str_io.write("\n") 996 str_io.write(f"# Performance Dashboard for {self.dtypes[0]} precision ##\n") 997 str_io.write("\n") 998 gh_fh.write(str_io.getvalue()) 999 1000 with open(f"{self.output_dir}/gh_executive_summary.txt", "w") as gh_fh: 1001 gh_fh.write(self.executive_summary) 1002 1003 with open(f"{self.output_dir}/gh_warnings.txt", "w") as gh_fh: 1004 warnings_body = self.generate_warnings() 1005 gh_fh.write(warnings_body) 1006 1007 str_io = io.StringIO() 1008 for suite in self.suites: 1009 str_io.write(self.prepare_message(suite)) 1010 str_io.write("\n") 1011 with open(f"{self.output_dir}/gh_{self.mode}.txt", "w") as gh_fh: 1012 gh_fh.write(str_io.getvalue()) 1013 1014 1015def parse_logs(args, dtypes, suites, devices, compilers, flag_compilers, output_dir): 1016 mode = get_mode(args) 1017 build_summary(args) 1018 include_slowdowns = args.include_slowdowns 1019 1020 parser_class = ParsePerformanceLogs 1021 parser = parser_class( 1022 suites, 1023 devices, 1024 dtypes, 1025 compilers, 1026 flag_compilers, 1027 mode, 1028 output_dir, 1029 include_slowdowns, 1030 ) 1031 parser.gen_summary_files() 1032 return 1033 1034 1035@dataclasses.dataclass 1036class LogInfo: 1037 # Day of the year this log was generated 1038 day: str 1039 1040 # Directory path where all logs are present 1041 dir_path: str 1042 1043 1044def get_date(log_info): 1045 return datetime.strptime(f"{log_info.day}", "%j").strftime("%m-%d") 1046 1047 1048def find_last_2_with_filenames(lookup_file, dashboard_archive_path, dtype, filenames): 1049 df = pd.read_csv(lookup_file, names=("day", "mode", "prec", "path")) 1050 df = df[df["mode"] == "performance"] 1051 df = df[df["prec"] == dtype] 1052 df = df[::-1] 1053 last2 = [] 1054 for path in df["path"]: 1055 output_dir = os.path.join(dashboard_archive_path, path) 1056 fullpaths = [ 1057 os.path.join(dashboard_archive_path, path, name) for name in filenames 1058 ] 1059 if all(os.path.exists(fullpath) for fullpath in fullpaths): 1060 last2.append(output_dir) 1061 if len(last2) >= 2: 1062 return last2 1063 return None 1064 1065 1066class SummaryStatDiffer: 1067 def __init__(self, args): 1068 self.args = args 1069 self.lookup_file = os.path.join(self.args.dashboard_archive_path, "lookup.csv") 1070 assert os.path.exists(self.lookup_file) 1071 1072 def generate_diff(self, last2, filename, caption): 1073 df_cur, df_prev = (pd.read_csv(os.path.join(path, filename)) for path in last2) 1074 df_merge = df_cur.merge(df_prev, on="Compiler", suffixes=("_cur", "_prev")) 1075 data = {col: [] for col in ("compiler", "suite", "prev_value", "cur_value")} 1076 for _, row in df_merge.iterrows(): 1077 if row["Compiler"] in self.args.flag_compilers: 1078 for suite in self.args.suites: 1079 if suite + "_prev" not in row or suite + "_cur" not in row: 1080 continue 1081 data["compiler"].append(row["Compiler"]) 1082 data["suite"].append(suite) 1083 data["prev_value"].append(row[suite + "_prev"]) 1084 data["cur_value"].append(row[suite + "_cur"]) 1085 1086 df = pd.DataFrame(data) 1087 tabform = tabulate(df, headers="keys", tablefmt="pretty", showindex="never") 1088 str_io = io.StringIO() 1089 str_io.write("\n") 1090 str_io.write(f"{caption}\n") 1091 str_io.write("~~~\n") 1092 str_io.write(f"{tabform}\n") 1093 str_io.write("~~~\n") 1094 return str_io.getvalue() 1095 1096 def generate_comment(self): 1097 title = "## Summary Statistics Diff ##\n" 1098 body = ( 1099 "For each relevant compiler, we compare the summary statistics " 1100 "for the most 2 recent reports that actually run the compiler.\n\n" 1101 ) 1102 dtype = self.args.dtypes[0] 1103 last2 = find_last_2_with_filenames( 1104 self.lookup_file, 1105 self.args.dashboard_archive_path, 1106 dtype, 1107 ["geomean.csv", "passrate.csv"], 1108 ) 1109 1110 if last2 is None: 1111 body += "Could not find most 2 recent reports.\n\n" 1112 else: 1113 for state, path in zip(("Current", "Previous"), last2): 1114 body += f"{state} report name: {path}\n\n" 1115 body += self.generate_diff(last2, "passrate.csv", "Passrate diff") 1116 body += self.generate_diff( 1117 last2, "geomean.csv", "Geometric mean speedup diff" 1118 ) 1119 1120 comment = generate_dropdown_comment(title, body) 1121 1122 with open(f"{self.args.output_dir}/gh_summary_diff.txt", "w") as gh_fh: 1123 gh_fh.write(comment) 1124 1125 1126class RegressionDetector: 1127 """ 1128 Compares the most recent 2 benchmarks to find previously unflagged models 1129 that are now flagged. 1130 """ 1131 1132 def __init__(self, args): 1133 self.args = args 1134 self.lookup_file = os.path.join(self.args.dashboard_archive_path, "lookup.csv") 1135 assert os.path.exists(self.lookup_file) 1136 1137 def generate_comment(self): 1138 title = "## Recent Regressions ##\n" 1139 body = ( 1140 "For each relevant compiler, we compare the most recent 2 reports " 1141 "(that actually run the compiler) to find previously unflagged " 1142 "models that are now flagged as problematic (according to the " 1143 "'Warnings' section).\n\n" 1144 ) 1145 dtype = self.args.dtypes[0] 1146 device = self.args.devices[0] 1147 for suite in self.args.suites: 1148 body += f"### Regressions for {suite} ###\n" 1149 last2 = {} 1150 1151 for compiler in self.args.flag_compilers: 1152 filenames = [ 1153 generate_csv_name( 1154 self.args, dtype, suite, device, compiler, testing 1155 ) 1156 for testing in ["performance", "accuracy"] 1157 ] 1158 compiler_last2 = find_last_2_with_filenames( 1159 self.lookup_file, self.args.dashboard_archive_path, dtype, filenames 1160 ) 1161 if compiler_last2 is not None: 1162 last2[compiler] = [ 1163 ParsePerformanceLogs( 1164 [suite], 1165 [device], 1166 [dtype], 1167 [compiler], 1168 [compiler], 1169 get_mode(self.args), 1170 output_dir, 1171 ) 1172 for output_dir in compiler_last2 1173 ] 1174 for state, path in zip(("Current", "Previous"), compiler_last2): 1175 body += ( 1176 f"{state} report name (compiler: {compiler}, " 1177 f"suite: {suite}): {path}\n\n" 1178 ) 1179 1180 regressions_present = False 1181 for metric in [ 1182 "accuracy", 1183 "speedup", 1184 "compilation_latency", 1185 "compression_ratio", 1186 ]: 1187 dfs = [] 1188 for compiler in self.args.flag_compilers: 1189 if last2[compiler] is None: 1190 continue 1191 1192 df_cur, df_prev = ( 1193 last2[compiler][i].untouched_parsed_frames[suite][metric] 1194 for i in (0, 1) 1195 ) 1196 df_merge = df_cur.merge( 1197 df_prev, on="name", suffixes=("_cur", "_prev") 1198 ) 1199 flag_fn = FLAG_FNS[metric] 1200 flag = np.logical_and( 1201 df_merge[compiler + "_prev"].apply( 1202 lambda x: not pd.isna(x) and not flag_fn(x) 1203 ), 1204 df_merge[compiler + "_cur"].apply( 1205 lambda x: not pd.isna(x) and flag_fn(x) 1206 ), 1207 ) 1208 df_bad = df_merge[flag] 1209 dfs.append( 1210 pd.DataFrame( 1211 data={ 1212 "compiler": compiler, 1213 "name": df_bad["name"], 1214 "prev_status": df_bad[compiler + "_prev"], 1215 "cur_status": df_bad[compiler + "_cur"], 1216 } 1217 ) 1218 ) 1219 1220 if not dfs: 1221 continue 1222 df = pd.concat(dfs, axis=0) 1223 if df.empty: 1224 continue 1225 regressions_present = True 1226 tabform = tabulate( 1227 df, headers="keys", tablefmt="pretty", showindex="never" 1228 ) 1229 str_io = io.StringIO() 1230 str_io.write("\n") 1231 str_io.write(f"{get_metric_title(metric)} regressions\n") 1232 str_io.write("~~~\n") 1233 str_io.write(f"{tabform}\n") 1234 str_io.write("~~~\n") 1235 body += str_io.getvalue() 1236 1237 if not regressions_present: 1238 body += "No regressions found.\n" 1239 1240 comment = generate_dropdown_comment(title, body) 1241 1242 with open(f"{self.args.output_dir}/gh_metric_regression.txt", "w") as gh_fh: 1243 gh_fh.write(comment) 1244 1245 1246class RegressionTracker: 1247 """ 1248 Plots progress of different metrics over time to detect regressions. 1249 """ 1250 1251 def __init__(self, args): 1252 self.args = args 1253 self.suites = self.args.suites 1254 self.lookup_file = os.path.join(self.args.dashboard_archive_path, "lookup.csv") 1255 assert os.path.exists(self.lookup_file) 1256 self.k = 10 1257 1258 def find_last_k(self): 1259 """ 1260 Find the last k pairs of (day number, log_path) 1261 """ 1262 dtype = self.args.dtypes[0] 1263 df = pd.read_csv(self.lookup_file, names=("day", "mode", "prec", "path")) 1264 df = df[df["mode"] == "performance"] 1265 df = df[df["prec"] == dtype] 1266 log_infos = [] 1267 for day, path in zip(df["day"], df["path"]): 1268 log_infos.append(LogInfo(day, path)) 1269 1270 assert len(log_infos) >= self.k 1271 log_infos = log_infos[len(log_infos) - self.k :] 1272 return log_infos 1273 1274 def generate_comment(self): 1275 title = "## Metrics over time ##\n" 1276 str_io = io.StringIO() 1277 if not self.args.update_dashboard_test and not self.args.no_graphs: 1278 for name in glob.glob(self.args.output_dir + "/*over_time.png"): 1279 output = ( 1280 subprocess.check_output([self.args.dashboard_image_uploader, name]) 1281 .decode("ascii") 1282 .rstrip() 1283 ) 1284 str_io.write(f"\n{name} : \n") 1285 comment = generate_dropdown_comment(title, str_io.getvalue()) 1286 1287 with open(f"{self.args.output_dir}/gh_regression.txt", "w") as gh_fh: 1288 gh_fh.write(comment) 1289 1290 def diff(self): 1291 log_infos = self.find_last_k() 1292 1293 for metric in ["geomean", "passrate", "comp_time", "memory"]: 1294 fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(15, 5)) 1295 for idx, suite in enumerate(self.suites): 1296 dfs = [] 1297 for log_info in log_infos: 1298 dir_path = os.path.join( 1299 self.args.dashboard_archive_path, log_info.dir_path 1300 ) 1301 assert os.path.exists(dir_path) 1302 gmean_filename = os.path.join(dir_path, f"{metric}.csv") 1303 if not os.path.exists(gmean_filename): 1304 continue 1305 df = pd.read_csv(gmean_filename) 1306 if suite not in df: 1307 continue 1308 if metric == "geomean" or metric == "memory": 1309 df[suite] = df[suite].str.replace("x", "").astype(float) 1310 elif metric == "passrate": 1311 df[suite] = df[suite].str.split("%").str[0].astype(float) 1312 df.insert(0, "day", get_date(log_info)) 1313 df = df.pivot(index="day", columns="Compiler", values=suite) 1314 1315 # Interim stage when both inductor_cudagraphs and inductor exist 1316 df = df.rename(columns={"inductor_cudagraphs": "inductor"}) 1317 for col_name in df.columns: 1318 if col_name not in self.args.compilers: 1319 df = df.drop(columns=[col_name]) 1320 dfs.append(df) 1321 1322 df = pd.concat(dfs) 1323 df = df.interpolate(method="linear") 1324 ax = df.plot( 1325 ax=axes[idx], 1326 kind="line", 1327 ylabel=metric, 1328 xlabel="Date", 1329 grid=True, 1330 ylim=0 if metric == "passrate" else 0.8, 1331 title=suite, 1332 style=".-", 1333 legend=False, 1334 ) 1335 ax.legend(loc="lower right", ncol=2) 1336 1337 plt.tight_layout() 1338 plt.savefig(os.path.join(output_dir, f"{metric}_over_time.png")) 1339 1340 self.generate_comment() 1341 1342 1343class DashboardUpdater: 1344 """ 1345 Aggregates the information and makes a comment to Performance Dashboard. 1346 https://github.com/pytorch/torchdynamo/issues/681 1347 """ 1348 1349 def __init__(self, args): 1350 self.args = args 1351 self.output_dir = args.output_dir 1352 self.lookup_file = os.path.join(self.args.dashboard_archive_path, "lookup.csv") 1353 assert os.path.exists(self.lookup_file) 1354 try: 1355 if not self.args.update_dashboard_test and not self.args.no_update_archive: 1356 self.update_lookup_file() 1357 except subprocess.CalledProcessError: 1358 sys.stderr.write("failed to update lookup file\n") 1359 1360 def update_lookup_file(self): 1361 dtype = self.args.dtypes[0] 1362 day, _ = archive_data(self.args.archive_name) 1363 target_dir = get_archive_name(self.args, dtype) 1364 # Update lookup csv the folder to arhived logs 1365 subprocess.check_call( 1366 f'echo "{day},performance,{dtype},{target_dir}" >> {self.lookup_file}', 1367 shell=True, 1368 ) 1369 1370 def archive(self): 1371 dtype = self.args.dtypes[0] 1372 # Copy the folder to archived location 1373 archive( 1374 self.output_dir, 1375 self.args.dashboard_archive_path, 1376 self.args.archive_name, 1377 dtype, 1378 ) 1379 1380 def upload_graphs(self): 1381 title = "## Performance graphs ##\n" 1382 str_io = io.StringIO() 1383 if not self.args.update_dashboard_test and not self.args.no_graphs: 1384 for name in glob.glob(self.output_dir + "/*png"): 1385 if "over_time" not in name: 1386 output = ( 1387 subprocess.check_output( 1388 [self.args.dashboard_image_uploader, name] 1389 ) 1390 .decode("ascii") 1391 .rstrip() 1392 ) 1393 str_io.write(f"\n{name} : \n") 1394 comment = generate_dropdown_comment(title, str_io.getvalue()) 1395 1396 with open(f"{self.output_dir}/gh_graphs.txt", "w") as gh_fh: 1397 gh_fh.write(comment) 1398 1399 def gen_comment(self): 1400 files = [ 1401 "gh_title.txt", 1402 "gh_executive_summary.txt", 1403 "gh_summary_diff.txt", 1404 "gh_warnings.txt", 1405 "gh_regression.txt", 1406 "gh_metric_regression.txt", 1407 "gh_training.txt" if self.args.training else "gh_inference.txt", 1408 "gh_graphs.txt", 1409 "gh_build_summary.txt", 1410 ] 1411 all_lines = [] 1412 for f in files: 1413 try: 1414 with open(os.path.join(self.output_dir, f)) as fh: 1415 all_lines.extend(fh.readlines()) 1416 except FileNotFoundError: 1417 pass 1418 1419 return "\n".join([x.rstrip() for x in all_lines]) 1420 1421 def comment_on_gh(self, comment): 1422 """ 1423 Send a commment to dashboard 1424 """ 1425 with tempfile.NamedTemporaryFile(mode="w", delete=False) as f: 1426 f.write(comment) 1427 filename = f.name 1428 1429 issue_number = "93794" 1430 if self.args.dtypes[0] == "float32": 1431 issue_number = "93518" 1432 1433 subprocess.check_call( 1434 [ 1435 self.args.dashboard_gh_cli_path, 1436 "issue", 1437 "comment", 1438 "--repo=https://github.com/pytorch/pytorch.git", 1439 issue_number, 1440 "-F", 1441 filename, 1442 ] 1443 ) 1444 1445 os.remove(filename) 1446 1447 def update(self): 1448 self.upload_graphs() 1449 if not self.args.no_detect_regressions: 1450 SummaryStatDiffer(self.args).generate_comment() 1451 RegressionDetector(self.args).generate_comment() 1452 try: 1453 RegressionTracker(self.args).diff() 1454 except Exception as e: 1455 logging.exception("") 1456 with open(f"{self.args.output_dir}/gh_regression.txt", "w") as gh_fh: 1457 gh_fh.write("") 1458 1459 comment = self.gen_comment() 1460 print(comment) 1461 1462 if not self.args.update_dashboard_test: 1463 if not self.args.no_gh_comment: 1464 self.comment_on_gh(comment) 1465 if not self.args.no_update_archive: 1466 self.archive() 1467 1468 1469if __name__ == "__main__": 1470 args = parse_args() 1471 1472 def extract(key): 1473 return DEFAULTS[key] if getattr(args, key, None) is None else getattr(args, key) 1474 1475 dtypes = extract("dtypes") 1476 suites = extract("suites") 1477 devices = extract("devices") 1478 1479 if args.inference: 1480 compilers = DEFAULTS["inference"] if args.compilers is None else args.compilers 1481 flag_compilers = ( 1482 DEFAULTS["flag_compilers"]["inference"] 1483 if args.flag_compilers is None 1484 else args.flag_compilers 1485 ) 1486 else: 1487 assert args.training 1488 compilers = DEFAULTS["training"] if args.compilers is None else args.compilers 1489 flag_compilers = ( 1490 DEFAULTS["flag_compilers"]["training"] 1491 if args.flag_compilers is None 1492 else args.flag_compilers 1493 ) 1494 1495 output_dir = args.output_dir 1496 args.compilers = compilers 1497 args.devices = devices 1498 args.dtypes = dtypes 1499 flag_compilers = list(set(flag_compilers) & set(compilers)) 1500 args.flag_compilers = flag_compilers 1501 args.suites = suites 1502 1503 if args.print_run_commands: 1504 generated_file = generate_commands( 1505 args, dtypes, suites, devices, compilers, output_dir 1506 ) 1507 print( 1508 f"Running commands are generated in file {generated_file}. Please run (bash {generated_file})." 1509 ) 1510 elif args.visualize_logs: 1511 parse_logs(args, dtypes, suites, devices, compilers, flag_compilers, output_dir) 1512 elif args.run: 1513 generated_file = generate_commands( 1514 args, dtypes, suites, devices, compilers, output_dir 1515 ) 1516 # generate memoized archive name now so that the date is reflective 1517 # of when the run started 1518 get_archive_name(args, dtypes[0]) 1519 # TODO - Do we need to worry about segfaults 1520 try: 1521 os.system(f"bash {generated_file}") 1522 except Exception as e: 1523 print( 1524 f"Running commands failed. Please run manually (bash {generated_file}) and inspect the errors." 1525 ) 1526 raise e 1527 if not args.log_operator_inputs: 1528 if not args.no_update_archive: 1529 archive( 1530 output_dir, 1531 args.dashboard_archive_path, 1532 args.archive_name, 1533 dtypes[0], 1534 ) 1535 parse_logs( 1536 args, dtypes, suites, devices, compilers, flag_compilers, output_dir 1537 ) 1538 if not args.no_update_archive: 1539 archive( 1540 output_dir, 1541 args.dashboard_archive_path, 1542 args.archive_name, 1543 dtypes[0], 1544 ) 1545 1546 if args.update_dashboard: 1547 DashboardUpdater(args).update() 1548