benchmarks/dynamo/torchbench.py

*da0073e9SAndroid Build Coastguard Worker#!/usr/bin/env python3
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Workerimport gc
*da0073e9SAndroid Build Coastguard Workerimport importlib
*da0073e9SAndroid Build Coastguard Workerimport logging
*da0073e9SAndroid Build Coastguard Workerimport os
*da0073e9SAndroid Build Coastguard Workerimport re
*da0073e9SAndroid Build Coastguard Workerimport sys
*da0073e9SAndroid Build Coastguard Workerimport warnings
*da0073e9SAndroid Build Coastguard Workerfrom collections import namedtuple
*da0073e9SAndroid Build Coastguard Workerfrom os.path import abspath, exists
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Workerimport torch
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Workertry:
*da0073e9SAndroid Build Coastguard Worker    from .common import BenchmarkRunner, load_yaml_file, main
*da0073e9SAndroid Build Coastguard Workerexcept ImportError:
*da0073e9SAndroid Build Coastguard Worker    from common import BenchmarkRunner, load_yaml_file, main
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Workerfrom torch._dynamo.testing import collect_results, reduce_to_scalar_loss
*da0073e9SAndroid Build Coastguard Workerfrom torch._dynamo.utils import clone_inputs
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker# We are primarily interested in tf32 datatype
*da0073e9SAndroid Build Coastguard Workertorch.backends.cuda.matmul.allow_tf32 = True
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker# Enable FX graph caching
*da0073e9SAndroid Build Coastguard Workerif "TORCHINDUCTOR_FX_GRAPH_CACHE" not in os.environ:
*da0073e9SAndroid Build Coastguard Worker    torch._inductor.config.fx_graph_cache = True
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Workerdef _reassign_parameters(model):
*da0073e9SAndroid Build Coastguard Worker    # torch_geometric models register parameter as tensors due to
*da0073e9SAndroid Build Coastguard Worker    # https://github.com/pyg-team/pytorch_geometric/blob/master/torch_geometric/nn/dense/linear.py#L158-L168
*da0073e9SAndroid Build Coastguard Worker    # Since it is unusual thing to do, we just reassign them to parameters
*da0073e9SAndroid Build Coastguard Worker    def state_dict_hook(module, destination, prefix, local_metadata):
*da0073e9SAndroid Build Coastguard Worker        for name, param in module.named_parameters():
*da0073e9SAndroid Build Coastguard Worker            if isinstance(destination[name], torch.Tensor) and not isinstance(
*da0073e9SAndroid Build Coastguard Worker                destination[name], torch.nn.Parameter
*da0073e9SAndroid Build Coastguard Worker            ):
*da0073e9SAndroid Build Coastguard Worker                destination[name] = torch.nn.Parameter(destination[name])
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    model._register_state_dict_hook(state_dict_hook)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Workerdef setup_torchbench_cwd():
*da0073e9SAndroid Build Coastguard Worker    original_dir = abspath(os.getcwd())
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    os.environ["KALDI_ROOT"] = "/tmp"  # avoids some spam
*da0073e9SAndroid Build Coastguard Worker    for torchbench_dir in (
*da0073e9SAndroid Build Coastguard Worker        "./torchbenchmark",
*da0073e9SAndroid Build Coastguard Worker        "../torchbenchmark",
*da0073e9SAndroid Build Coastguard Worker        "../torchbench",
*da0073e9SAndroid Build Coastguard Worker        "../benchmark",
*da0073e9SAndroid Build Coastguard Worker        "../../torchbenchmark",
*da0073e9SAndroid Build Coastguard Worker        "../../torchbench",
*da0073e9SAndroid Build Coastguard Worker        "../../benchmark",
*da0073e9SAndroid Build Coastguard Worker        "../../../torchbenchmark",
*da0073e9SAndroid Build Coastguard Worker        "../../../torchbench",
*da0073e9SAndroid Build Coastguard Worker        "../../../benchmark",
*da0073e9SAndroid Build Coastguard Worker    ):
*da0073e9SAndroid Build Coastguard Worker        if exists(torchbench_dir):
*da0073e9SAndroid Build Coastguard Worker            break
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    if exists(torchbench_dir):
*da0073e9SAndroid Build Coastguard Worker        torchbench_dir = abspath(torchbench_dir)
*da0073e9SAndroid Build Coastguard Worker        os.chdir(torchbench_dir)
*da0073e9SAndroid Build Coastguard Worker        sys.path.append(torchbench_dir)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    return original_dir
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Workerdef process_hf_reformer_output(out):
*da0073e9SAndroid Build Coastguard Worker    assert isinstance(out, list)
*da0073e9SAndroid Build Coastguard Worker    # second output is unstable
*da0073e9SAndroid Build Coastguard Worker    return [elem for i, elem in enumerate(out) if i != 1]
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Workerdef process_hf_whisper_output(out):
*da0073e9SAndroid Build Coastguard Worker    out_ret = []
*da0073e9SAndroid Build Coastguard Worker    for i, elem in enumerate(out):
*da0073e9SAndroid Build Coastguard Worker        if i == 0:
*da0073e9SAndroid Build Coastguard Worker            assert isinstance(elem, dict)
*da0073e9SAndroid Build Coastguard Worker            out_ret.append({k: v for k, v in elem.items() if k != "logits"})
*da0073e9SAndroid Build Coastguard Worker        elif i != 1:
*da0073e9SAndroid Build Coastguard Worker            out_ret.append(elem)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    return out_ret
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Workerprocess_train_model_output = {
*da0073e9SAndroid Build Coastguard Worker    "hf_Reformer": process_hf_reformer_output,
*da0073e9SAndroid Build Coastguard Worker    "hf_Whisper": process_hf_whisper_output,
*da0073e9SAndroid Build Coastguard Worker}
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Workerclass TorchBenchmarkRunner(BenchmarkRunner):
*da0073e9SAndroid Build Coastguard Worker    def __init__(self):
*da0073e9SAndroid Build Coastguard Worker        super().__init__()
*da0073e9SAndroid Build Coastguard Worker        self.suite_name = "torchbench"
*da0073e9SAndroid Build Coastguard Worker        self.optimizer = None
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    @property
*da0073e9SAndroid Build Coastguard Worker    def _config(self):
*da0073e9SAndroid Build Coastguard Worker        return load_yaml_file("torchbench.yaml")
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    @property
*da0073e9SAndroid Build Coastguard Worker    def _skip(self):
*da0073e9SAndroid Build Coastguard Worker        return self._config["skip"]
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    @property
*da0073e9SAndroid Build Coastguard Worker    def _batch_size(self):
*da0073e9SAndroid Build Coastguard Worker        return self._config["batch_size"]
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    @property
*da0073e9SAndroid Build Coastguard Worker    def _tolerance(self):
*da0073e9SAndroid Build Coastguard Worker        return self._config["tolerance"]
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    @property
*da0073e9SAndroid Build Coastguard Worker    def _require_larger_multiplier_for_smaller_tensor(self):
*da0073e9SAndroid Build Coastguard Worker        return self._config["require_larger_multiplier_for_smaller_tensor"]
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    @property
*da0073e9SAndroid Build Coastguard Worker    def _accuracy(self):
*da0073e9SAndroid Build Coastguard Worker        return self._config["accuracy"]
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    @property
*da0073e9SAndroid Build Coastguard Worker    def skip_models(self):
*da0073e9SAndroid Build Coastguard Worker        return self._skip["all"]
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    @property
*da0073e9SAndroid Build Coastguard Worker    def skip_models_for_cpu(self):
*da0073e9SAndroid Build Coastguard Worker        return self._skip["device"]["cpu"]
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    @property
*da0073e9SAndroid Build Coastguard Worker    def skip_models_for_cuda(self):
*da0073e9SAndroid Build Coastguard Worker        return self._skip["device"]["cuda"]
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    @property
*da0073e9SAndroid Build Coastguard Worker    def skip_models_for_freezing_cuda(self):
*da0073e9SAndroid Build Coastguard Worker        return self._skip["freezing"]["cuda"]
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    @property
*da0073e9SAndroid Build Coastguard Worker    def skip_models_for_freezing_cpu(self):
*da0073e9SAndroid Build Coastguard Worker        return self._skip["freezing"]["cpu"]
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    @property
*da0073e9SAndroid Build Coastguard Worker    def slow_models(self):
*da0073e9SAndroid Build Coastguard Worker        return self._config["slow"]
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    @property
*da0073e9SAndroid Build Coastguard Worker    def very_slow_models(self):
*da0073e9SAndroid Build Coastguard Worker        return self._config["very_slow"]
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    @property
*da0073e9SAndroid Build Coastguard Worker    def non_deterministic_models(self):
*da0073e9SAndroid Build Coastguard Worker        return self._config["non_deterministic"]
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    @property
*da0073e9SAndroid Build Coastguard Worker    def get_output_amp_train_process_func(self):
*da0073e9SAndroid Build Coastguard Worker        return process_train_model_output
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    @property
*da0073e9SAndroid Build Coastguard Worker    def skip_not_suitable_for_training_models(self):
*da0073e9SAndroid Build Coastguard Worker        return self._skip["test"]["training"]
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    @property
*da0073e9SAndroid Build Coastguard Worker    def failing_fx2trt_models(self):
*da0073e9SAndroid Build Coastguard Worker        return self._config["trt_not_yet_working"]
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    @property
*da0073e9SAndroid Build Coastguard Worker    def force_amp_for_fp16_bf16_models(self):
*da0073e9SAndroid Build Coastguard Worker        return self._config["dtype"]["force_amp_for_fp16_bf16_models"]
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    @property
*da0073e9SAndroid Build Coastguard Worker    def force_fp16_for_bf16_models(self):
*da0073e9SAndroid Build Coastguard Worker        return self._config["dtype"]["force_fp16_for_bf16_models"]
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    @property
*da0073e9SAndroid Build Coastguard Worker    def skip_accuracy_checks_large_models_dashboard(self):
*da0073e9SAndroid Build Coastguard Worker        if self.args.dashboard or self.args.accuracy:
*da0073e9SAndroid Build Coastguard Worker            return self._accuracy["skip"]["large_models"]
*da0073e9SAndroid Build Coastguard Worker        return set()
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    @property
*da0073e9SAndroid Build Coastguard Worker    def skip_accuracy_check_as_eager_non_deterministic(self):
*da0073e9SAndroid Build Coastguard Worker        if self.args.accuracy and self.args.training:
*da0073e9SAndroid Build Coastguard Worker            return self._accuracy["skip"]["eager_not_deterministic"]
*da0073e9SAndroid Build Coastguard Worker        return set()
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    @property
*da0073e9SAndroid Build Coastguard Worker    def skip_multiprocess_models(self):
*da0073e9SAndroid Build Coastguard Worker        return self._skip["multiprocess"]
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    @property
*da0073e9SAndroid Build Coastguard Worker    def skip_models_due_to_control_flow(self):
*da0073e9SAndroid Build Coastguard Worker        return self._skip["control_flow"]
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    @property
*da0073e9SAndroid Build Coastguard Worker    def guard_on_nn_module_models(self):
*da0073e9SAndroid Build Coastguard Worker        return {
*da0073e9SAndroid Build Coastguard Worker            "vision_maskrcnn",
*da0073e9SAndroid Build Coastguard Worker        }
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    @property
*da0073e9SAndroid Build Coastguard Worker    def inline_inbuilt_nn_modules_models(self):
*da0073e9SAndroid Build Coastguard Worker        return {
*da0073e9SAndroid Build Coastguard Worker            "basic_gnn_edgecnn",
*da0073e9SAndroid Build Coastguard Worker            "drq",
*da0073e9SAndroid Build Coastguard Worker            "hf_Reformer",
*da0073e9SAndroid Build Coastguard Worker            "DALLE2_pytorch",
*da0073e9SAndroid Build Coastguard Worker            "hf_BigBird",
*da0073e9SAndroid Build Coastguard Worker            "detectron2_maskrcnn_r_50_fpn",
*da0073e9SAndroid Build Coastguard Worker            "detectron2_maskrcnn_r_101_fpn",
*da0073e9SAndroid Build Coastguard Worker            "vision_maskrcnn",
*da0073e9SAndroid Build Coastguard Worker            "doctr_reco_predictor",
*da0073e9SAndroid Build Coastguard Worker            "hf_T5_generate",
*da0073e9SAndroid Build Coastguard Worker        }
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    def load_model(
*da0073e9SAndroid Build Coastguard Worker        self,
*da0073e9SAndroid Build Coastguard Worker        device,
*da0073e9SAndroid Build Coastguard Worker        model_name,
*da0073e9SAndroid Build Coastguard Worker        batch_size=None,
*da0073e9SAndroid Build Coastguard Worker        part=None,
*da0073e9SAndroid Build Coastguard Worker        extra_args=None,
*da0073e9SAndroid Build Coastguard Worker    ):
*da0073e9SAndroid Build Coastguard Worker        if self.args.enable_activation_checkpointing:
*da0073e9SAndroid Build Coastguard Worker            raise NotImplementedError(
*da0073e9SAndroid Build Coastguard Worker                "Activation checkpointing not implemented for Torchbench models"
*da0073e9SAndroid Build Coastguard Worker            )
*da0073e9SAndroid Build Coastguard Worker        is_training = self.args.training
*da0073e9SAndroid Build Coastguard Worker        use_eval_mode = self.args.use_eval_mode
*da0073e9SAndroid Build Coastguard Worker        dynamic_shapes = self.args.dynamic_shapes
*da0073e9SAndroid Build Coastguard Worker        candidates = [
*da0073e9SAndroid Build Coastguard Worker            f"torchbenchmark.models.{model_name}",
*da0073e9SAndroid Build Coastguard Worker            f"torchbenchmark.canary_models.{model_name}",
*da0073e9SAndroid Build Coastguard Worker            f"torchbenchmark.models.fb.{model_name}",
*da0073e9SAndroid Build Coastguard Worker        ]
*da0073e9SAndroid Build Coastguard Worker        for c in candidates:
*da0073e9SAndroid Build Coastguard Worker            try:
*da0073e9SAndroid Build Coastguard Worker                module = importlib.import_module(c)
*da0073e9SAndroid Build Coastguard Worker                break
*da0073e9SAndroid Build Coastguard Worker            except ModuleNotFoundError as e:
*da0073e9SAndroid Build Coastguard Worker                if e.name != c:
*da0073e9SAndroid Build Coastguard Worker                    raise
*da0073e9SAndroid Build Coastguard Worker        else:
*da0073e9SAndroid Build Coastguard Worker            raise ImportError(f"could not import any of {candidates}")
*da0073e9SAndroid Build Coastguard Worker        benchmark_cls = getattr(module, "Model", None)
*da0073e9SAndroid Build Coastguard Worker        if benchmark_cls is None:
*da0073e9SAndroid Build Coastguard Worker            raise NotImplementedError(f"{model_name}.Model is None")
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        if not hasattr(benchmark_cls, "name"):
*da0073e9SAndroid Build Coastguard Worker            benchmark_cls.name = model_name
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        cant_change_batch_size = (
*da0073e9SAndroid Build Coastguard Worker            not getattr(benchmark_cls, "ALLOW_CUSTOMIZE_BSIZE", True)
*da0073e9SAndroid Build Coastguard Worker            or model_name in self._config["dont_change_batch_size"]
*da0073e9SAndroid Build Coastguard Worker        )
*da0073e9SAndroid Build Coastguard Worker        if cant_change_batch_size:
*da0073e9SAndroid Build Coastguard Worker            batch_size = None
*da0073e9SAndroid Build Coastguard Worker        if (
*da0073e9SAndroid Build Coastguard Worker            batch_size is None
*da0073e9SAndroid Build Coastguard Worker            and is_training
*da0073e9SAndroid Build Coastguard Worker            and model_name in self._batch_size["training"]
*da0073e9SAndroid Build Coastguard Worker        ):
*da0073e9SAndroid Build Coastguard Worker            batch_size = self._batch_size["training"][model_name]
*da0073e9SAndroid Build Coastguard Worker        elif (
*da0073e9SAndroid Build Coastguard Worker            batch_size is None
*da0073e9SAndroid Build Coastguard Worker            and not is_training
*da0073e9SAndroid Build Coastguard Worker            and model_name in self._batch_size["inference"]
*da0073e9SAndroid Build Coastguard Worker        ):
*da0073e9SAndroid Build Coastguard Worker            batch_size = self._batch_size["inference"][model_name]
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # Control the memory footprint for few models
*da0073e9SAndroid Build Coastguard Worker        if self.args.accuracy and model_name in self._accuracy["max_batch_size"]:
*da0073e9SAndroid Build Coastguard Worker            batch_size = min(batch_size, self._accuracy["max_batch_size"][model_name])
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # workaround "RuntimeError: not allowed to set torch.backends.cudnn flags"
*da0073e9SAndroid Build Coastguard Worker        torch.backends.__allow_nonbracketed_mutation_flag = True
*da0073e9SAndroid Build Coastguard Worker        if extra_args is None:
*da0073e9SAndroid Build Coastguard Worker            extra_args = []
*da0073e9SAndroid Build Coastguard Worker        if part:
*da0073e9SAndroid Build Coastguard Worker            extra_args += ["--part", part]
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # sam_fast only runs with amp
*da0073e9SAndroid Build Coastguard Worker        if model_name == "sam_fast":
*da0073e9SAndroid Build Coastguard Worker            self.args.amp = True
*da0073e9SAndroid Build Coastguard Worker            self.setup_amp()
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        if model_name == "vision_maskrcnn" and is_training:
*da0073e9SAndroid Build Coastguard Worker            # Output of vision_maskrcnn model is a list of bounding boxes,
*da0073e9SAndroid Build Coastguard Worker            # sorted on the basis of their scores. This makes accuracy
*da0073e9SAndroid Build Coastguard Worker            # comparison hard with torch.compile. torch.compile can cause minor
*da0073e9SAndroid Build Coastguard Worker            # divergences in the output because of how fusion works for amp in
*da0073e9SAndroid Build Coastguard Worker            # TorchInductor compared to eager.  Therefore, instead of looking at
*da0073e9SAndroid Build Coastguard Worker            # all the bounding boxes, we compare only top 4.
*da0073e9SAndroid Build Coastguard Worker            model_kwargs = {"box_detections_per_img": 4}
*da0073e9SAndroid Build Coastguard Worker            benchmark = benchmark_cls(
*da0073e9SAndroid Build Coastguard Worker                test="train",
*da0073e9SAndroid Build Coastguard Worker                device=device,
*da0073e9SAndroid Build Coastguard Worker                batch_size=batch_size,
*da0073e9SAndroid Build Coastguard Worker                extra_args=extra_args,
*da0073e9SAndroid Build Coastguard Worker                model_kwargs=model_kwargs,
*da0073e9SAndroid Build Coastguard Worker            )
*da0073e9SAndroid Build Coastguard Worker            use_eval_mode = True
*da0073e9SAndroid Build Coastguard Worker        elif is_training:
*da0073e9SAndroid Build Coastguard Worker            benchmark = benchmark_cls(
*da0073e9SAndroid Build Coastguard Worker                test="train",
*da0073e9SAndroid Build Coastguard Worker                device=device,
*da0073e9SAndroid Build Coastguard Worker                batch_size=batch_size,
*da0073e9SAndroid Build Coastguard Worker                extra_args=extra_args,
*da0073e9SAndroid Build Coastguard Worker            )
*da0073e9SAndroid Build Coastguard Worker        else:
*da0073e9SAndroid Build Coastguard Worker            benchmark = benchmark_cls(
*da0073e9SAndroid Build Coastguard Worker                test="eval",
*da0073e9SAndroid Build Coastguard Worker                device=device,
*da0073e9SAndroid Build Coastguard Worker                batch_size=batch_size,
*da0073e9SAndroid Build Coastguard Worker                extra_args=extra_args,
*da0073e9SAndroid Build Coastguard Worker            )
*da0073e9SAndroid Build Coastguard Worker        model, example_inputs = benchmark.get_module()
*da0073e9SAndroid Build Coastguard Worker        if model_name in [
*da0073e9SAndroid Build Coastguard Worker            "basic_gnn_edgecnn",
*da0073e9SAndroid Build Coastguard Worker            "basic_gnn_gcn",
*da0073e9SAndroid Build Coastguard Worker            "basic_gnn_sage",
*da0073e9SAndroid Build Coastguard Worker            "basic_gnn_gin",
*da0073e9SAndroid Build Coastguard Worker        ]:
*da0073e9SAndroid Build Coastguard Worker            _reassign_parameters(model)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        # Models that must be in train mode while training
*da0073e9SAndroid Build Coastguard Worker        if is_training and (
*da0073e9SAndroid Build Coastguard Worker            not use_eval_mode or model_name in self._config["only_training"]
*da0073e9SAndroid Build Coastguard Worker        ):
*da0073e9SAndroid Build Coastguard Worker            model.train()
*da0073e9SAndroid Build Coastguard Worker        else:
*da0073e9SAndroid Build Coastguard Worker            model.eval()
*da0073e9SAndroid Build Coastguard Worker        gc.collect()
*da0073e9SAndroid Build Coastguard Worker        batch_size = benchmark.batch_size
*da0073e9SAndroid Build Coastguard Worker        if model_name == "torchrec_dlrm":
*da0073e9SAndroid Build Coastguard Worker            batch_namedtuple = namedtuple(
*da0073e9SAndroid Build Coastguard Worker                "Batch", "dense_features sparse_features labels"
*da0073e9SAndroid Build Coastguard Worker            )
*da0073e9SAndroid Build Coastguard Worker            example_inputs = tuple(
*da0073e9SAndroid Build Coastguard Worker                batch_namedtuple(
*da0073e9SAndroid Build Coastguard Worker                    dense_features=batch.dense_features,
*da0073e9SAndroid Build Coastguard Worker                    sparse_features=batch.sparse_features,
*da0073e9SAndroid Build Coastguard Worker                    labels=batch.labels,
*da0073e9SAndroid Build Coastguard Worker                )
*da0073e9SAndroid Build Coastguard Worker                for batch in example_inputs
*da0073e9SAndroid Build Coastguard Worker            )
*da0073e9SAndroid Build Coastguard Worker        # Torchbench has quite different setup for yolov3, so directly passing
*da0073e9SAndroid Build Coastguard Worker        # the right example_inputs
*da0073e9SAndroid Build Coastguard Worker        if model_name == "yolov3":
*da0073e9SAndroid Build Coastguard Worker            example_inputs = (torch.rand(batch_size, 3, 384, 512).to(device),)
*da0073e9SAndroid Build Coastguard Worker        # See https://github.com/pytorch/benchmark/issues/1561
*da0073e9SAndroid Build Coastguard Worker        if model_name == "maml_omniglot":
*da0073e9SAndroid Build Coastguard Worker            batch_size = 5
*da0073e9SAndroid Build Coastguard Worker            assert example_inputs[0].shape[0] == batch_size
*da0073e9SAndroid Build Coastguard Worker        if model_name == "vision_maskrcnn":
*da0073e9SAndroid Build Coastguard Worker            batch_size = 1
*da0073e9SAndroid Build Coastguard Worker        # global current_name, current_device
*da0073e9SAndroid Build Coastguard Worker        # current_device = device
*da0073e9SAndroid Build Coastguard Worker        # current_name = benchmark.name
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        if self.args.trace_on_xla:
*da0073e9SAndroid Build Coastguard Worker            # work around for: https://github.com/pytorch/xla/issues/4174
*da0073e9SAndroid Build Coastguard Worker            import torch_xla  # noqa: F401
*da0073e9SAndroid Build Coastguard Worker        self.validate_model(model, example_inputs)
*da0073e9SAndroid Build Coastguard Worker        return device, benchmark.name, model, example_inputs, batch_size
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    def iter_model_names(self, args):
*da0073e9SAndroid Build Coastguard Worker        from torchbenchmark import _list_canary_model_paths, _list_model_paths
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        models = _list_model_paths()
*da0073e9SAndroid Build Coastguard Worker        models += [
*da0073e9SAndroid Build Coastguard Worker            f
*da0073e9SAndroid Build Coastguard Worker            for f in _list_canary_model_paths()
*da0073e9SAndroid Build Coastguard Worker            if os.path.basename(f) in self._config["canary_models"]
*da0073e9SAndroid Build Coastguard Worker        ]
*da0073e9SAndroid Build Coastguard Worker        models.sort()
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        start, end = self.get_benchmark_indices(len(models))
*da0073e9SAndroid Build Coastguard Worker        for index, model_path in enumerate(models):
*da0073e9SAndroid Build Coastguard Worker            if index < start or index >= end:
*da0073e9SAndroid Build Coastguard Worker                continue
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker            model_name = os.path.basename(model_path)
*da0073e9SAndroid Build Coastguard Worker            if (
*da0073e9SAndroid Build Coastguard Worker                not re.search("|".join(args.filter), model_name, re.IGNORECASE)
*da0073e9SAndroid Build Coastguard Worker                or re.search("|".join(args.exclude), model_name, re.IGNORECASE)
*da0073e9SAndroid Build Coastguard Worker                or model_name in args.exclude_exact
*da0073e9SAndroid Build Coastguard Worker                or model_name in self.skip_models
*da0073e9SAndroid Build Coastguard Worker            ):
*da0073e9SAndroid Build Coastguard Worker                continue
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker            yield model_name
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    def pick_grad(self, name, is_training):
*da0073e9SAndroid Build Coastguard Worker        if is_training or name in ("maml",):
*da0073e9SAndroid Build Coastguard Worker            return torch.enable_grad()
*da0073e9SAndroid Build Coastguard Worker        else:
*da0073e9SAndroid Build Coastguard Worker            return torch.no_grad()
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    def use_larger_multiplier_for_smaller_tensor(self, name):
*da0073e9SAndroid Build Coastguard Worker        return name in self._require_larger_multiplier_for_smaller_tensor
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    def get_tolerance_and_cosine_flag(self, is_training, current_device, name):
*da0073e9SAndroid Build Coastguard Worker        tolerance = 1e-4
*da0073e9SAndroid Build Coastguard Worker        cosine = self.args.cosine
*da0073e9SAndroid Build Coastguard Worker        # Increase the tolerance for torch allclose
*da0073e9SAndroid Build Coastguard Worker        if self.args.float16 or self.args.amp:
*da0073e9SAndroid Build Coastguard Worker            if name in self._tolerance["higher_fp16"]:
*da0073e9SAndroid Build Coastguard Worker                return 1e-2, cosine
*da0073e9SAndroid Build Coastguard Worker            elif name in self._tolerance["even_higher"]:
*da0073e9SAndroid Build Coastguard Worker                return 8 * 1e-2, cosine
*da0073e9SAndroid Build Coastguard Worker            return 1e-3, cosine
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        if self.args.bfloat16:
*da0073e9SAndroid Build Coastguard Worker            if name in self._tolerance["higher_bf16"]:
*da0073e9SAndroid Build Coastguard Worker                return 1e-2, cosine
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker        if is_training and (current_device == "cuda" or current_device == "xpu"):
*da0073e9SAndroid Build Coastguard Worker            tolerance = 1e-3
*da0073e9SAndroid Build Coastguard Worker            if name in self._tolerance["cosine"]:
*da0073e9SAndroid Build Coastguard Worker                cosine = True
*da0073e9SAndroid Build Coastguard Worker            elif name in self._tolerance["higher"]:
*da0073e9SAndroid Build Coastguard Worker                tolerance = 1e-3
*da0073e9SAndroid Build Coastguard Worker            elif name in self._tolerance["even_higher"]:
*da0073e9SAndroid Build Coastguard Worker                tolerance = 8 * 1e-2
*da0073e9SAndroid Build Coastguard Worker        return tolerance, cosine
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    def compute_loss(self, pred):
*da0073e9SAndroid Build Coastguard Worker        return reduce_to_scalar_loss(pred)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    def forward_pass(self, mod, inputs, collect_outputs=True):
*da0073e9SAndroid Build Coastguard Worker        with self.autocast(**self.autocast_arg):
*da0073e9SAndroid Build Coastguard Worker            if isinstance(inputs, dict):
*da0073e9SAndroid Build Coastguard Worker                return mod(**inputs)
*da0073e9SAndroid Build Coastguard Worker            else:
*da0073e9SAndroid Build Coastguard Worker                return mod(*inputs)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker    def forward_and_backward_pass(self, mod, inputs, collect_outputs=True):
*da0073e9SAndroid Build Coastguard Worker        cloned_inputs = clone_inputs(inputs)
*da0073e9SAndroid Build Coastguard Worker        self.optimizer_zero_grad(mod)
*da0073e9SAndroid Build Coastguard Worker        with self.autocast(**self.autocast_arg):
*da0073e9SAndroid Build Coastguard Worker            if isinstance(cloned_inputs, dict):
*da0073e9SAndroid Build Coastguard Worker                pred = mod(**cloned_inputs)
*da0073e9SAndroid Build Coastguard Worker            else:
*da0073e9SAndroid Build Coastguard Worker                pred = mod(*cloned_inputs)
*da0073e9SAndroid Build Coastguard Worker            loss = self.compute_loss(pred)
*da0073e9SAndroid Build Coastguard Worker        self.grad_scaler.scale(loss).backward()
*da0073e9SAndroid Build Coastguard Worker        self.optimizer_step()
*da0073e9SAndroid Build Coastguard Worker        if collect_outputs:
*da0073e9SAndroid Build Coastguard Worker            return collect_results(mod, pred, loss, cloned_inputs)
*da0073e9SAndroid Build Coastguard Worker        return None
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Workerdef torchbench_main():
*da0073e9SAndroid Build Coastguard Worker    original_dir = setup_torchbench_cwd()
*da0073e9SAndroid Build Coastguard Worker    logging.basicConfig(level=logging.WARNING)
*da0073e9SAndroid Build Coastguard Worker    warnings.filterwarnings("ignore")
*da0073e9SAndroid Build Coastguard Worker    main(TorchBenchmarkRunner(), original_dir)
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Worker
*da0073e9SAndroid Build Coastguard Workerif __name__ == "__main__":
*da0073e9SAndroid Build Coastguard Worker    torchbench_main()