"""Default set of benchmarks. Parser notes: `parse_stmts`: - Width for the left (Python) column MUST be 40 characters. - The column separator is " | ", not "|". Whitespace matters. `GroupedVariants`: - `Setup` and `Global_Setup` (case insensitive) are reserved keywords to populate `setup` and `global_setup` for every generated benchmark. - To set a label for the succeeding block, add `# @YOUR_LABEL` (Python) or `// @YOUR_LABEL` (C++). """ # mypy: ignore-errors from core.api import GroupedModules, GroupedStmts, GroupedVariants from core.types import FlatIntermediateDefinition from core.utils import flatten, parse_stmts from definitions.setup import Setup BENCHMARKS: FlatIntermediateDefinition = flatten( { "Empty": { "no allocation": GroupedStmts( r"torch.empty(())", r"torch::empty({0});", ), "with allocation": GroupedStmts( r"torch.empty((1,))", r"torch::empty({1});", ), "overloads": GroupedVariants( cpp_block=r""" // @Setup auto options_empty = c10::TensorOptions(); auto options_full = c10::TensorOptions().dtype(at::kFloat).device(at::kCPU); auto optional_float = std::make_optional(at::kFloat); // @TensorOptions overload at::empty({0}, options_empty); at::empty({0}, options_full); at::empty({0}, at::kFloat); // implicit conversion // @Faithful overload at::empty({0}, std::nullopt, std::nullopt, std::nullopt, std::nullopt, std::nullopt); at::empty({0}, at::kFloat, std::nullopt, std::nullopt, std::nullopt, std::nullopt); at::empty({0}, optional_float, std::nullopt, std::nullopt, std::nullopt, std::nullopt); """ ), }, "Pointwise": { "Math": GroupedVariants( *parse_stmts( r""" Python | C++ ---------------------------------------- | ---------------------------------------- # @setup | // @setup torch.manual_seed(138_10_23) | torch::manual_seed(1381023); x = torch.rand((4, 4)) | auto x = torch::rand({4, 4}); y_float = torch.ones((4, 4)) | auto y_float = torch::ones({4, 4}); y_vector = torch.ones((4, 1)) | auto y_vector = torch::ones({4, 1}); y_int = torch.ones( | auto y_int = torch::ones({4, 4}, at::kInt); (4, 4), dtype=torch.int32) | | # @add | // @add x += 1.0 | x += 1; x += y_float | x += y_float; x += y_vector | x += y_vector; x += y_int | x += y_int; x + y_float | x + y_float; torch.add(x, y_float) | torch::add(x, y_float); torch.add(x, y_float, out=x) | torch::add_out(/*out=*/x, x, y_float); | # @multiply | // @multiply x *= 1.0 | x *= 1; x *= y_float | x *= y_float; x *= y_vector | x *= y_vector; x *= y_int | x *= y_int; x * y_float | x * y_float; torch.mul(x, y_float) | torch::mul(x, y_float); torch.mul(x, y_float, out=x) | torch::mul_out(/*out=*/x, x, y_float); | # @equality | // @equality x == y_float | x == y_float; x == 1.0 | x == 1.0; """ ) ), "Data movement": GroupedVariants( *parse_stmts( r""" Python | C++ ---------------------------------------- | ---------------------------------------- # @setup | // @setup x = torch.ones((4, 4)) | auto x = torch::ones({4, 4}); y = torch.ones((4, 4)) | auto y = torch::ones({4, 4}); x_t = x.t() | auto x_t = x.t(); | # @contiguous (trivial) | // @contiguous (trivial) x.contiguous() | x.contiguous(); | # @contiguous (non-trivial) | // @contiguous (non-trivial) x_t.contiguous() | x_t.contiguous(); | # @clone | // @clone x.clone() | x.clone(); | # @copy_ | // @copy_ x.copy_(y) | x.copy_(y); | # @zero_ | // @zero_ x.zero_() | x.zero_(); | # @RNG | // @RNG x.uniform_() | x.uniform_(); """ ) ), }, "Reduction": GroupedVariants( *parse_stmts( r""" Python | C++ ---------------------------------------- | ---------------------------------------- # @setup | // @setup x = torch.ones((4, 4)) | auto x = torch::ones({4, 4}); | # @max | // @max x.max() | x.max(); | # @sum | // @sum x.sum() | x.sum(); | # @variance | // @variance x.var(0) | x.var(0); """ ) ), "Indexing": GroupedVariants( *parse_stmts( r""" Python | C++ ---------------------------------------- | ---------------------------------------- # @setup | // @setup | using namespace torch::indexing; torch.manual_seed(6626_10_34) | torch::manual_seed(66261034); | x = torch.randn(1, 1, 1) | auto x = torch::randn({1, 1, 1}); y = torch.randn(1, 1, 1) | auto y = torch::randn({1, 1, 1}); | # @Tensor-Scalar | // @Tensor-Scalar x[0] = 1 | x.index_put_({0}, 1); x[0, 0] = 1 | x.index_put_({0, 0}, 1); x[0, 0, 0] = 1 | x.index_put_({0, 0, 0}, 1); | # @Tensor-Scalar (Advanced) | // @Tensor-Scalar (Advanced) x[...] = 1 | x.index_put_({"..."}, 1); x[:] = 1 | x.index_put_({Slice(None, None, None)}, 1); x[None] = 1 | x.index_put_({None}, 1); x[False] = 1 | x.index_put_({false}, 1); x[True] = 1 | x.index_put_({true}, 1); | # @Tensor-Tensor | // @Tensor-Tensor x[0] = y[0] | x.index_put_({0}, y.index({0})); x[0, 0] = y[0, 0] | x.index_put_({0, 0}, y.index({0, 0})); x[0, 0, 0] = y[0, 0, 0] | x.index_put_({0, 0, 0}, y.index({0, 0, 0})); | # @Tensor-Tensor (Advanced) | // @Tensor-Tensor (Advanced) x[...] = y[...] | x.index_put_({"..."}, y.index({"..."})); x[:] = y[:] | x.index_put_({Slice(None, None, None)}, y.index({Slice(None, None, None)})); x[None] = y[None] | x.index_put_({None}, y.index({None})); x[False] = y[False] | x.index_put_({false}, y.index({false})); x[True] = y[True] | x.index_put_({true}, y.index({true})); """ ) ), "Metadata and views": GroupedVariants( *parse_stmts( r""" Python | C++ ---------------------------------------- | ---------------------------------------- # @setup | // @setup x = torch.ones((4, 4)) | auto x = torch::ones({4, 4}); | # @size | // @size x.size()[0] | x.sizes()[0]; | # @stride | // @stride x.stride(0) | x.stride(0); | # @as_strided | // @as_strided torch.as_strided(x, (2, 3), (4, 1), 2) | torch::as_strided(x, {2, 3}, {4, 1}, 2); | # @select | // @select x.select(1, 1) | x.select(1, 1); | # @unsqueeze | // @unsqueeze x.unsqueeze(0) | x.unsqueeze(0); | # @view | // @view x.view(-1, 1) | x.view({-1, 1}); | # @transpose | // @transpose x.t() | x.t(); | # @reshape | // @reshape x.reshape((16, 1)) | x.reshape({16, 1}); """ ) ), "nn Modules": { py_constructor.split("(")[0]: GroupedModules( f"model = torch.nn.{py_constructor}", f"auto model = torch::nn::{cpp_constructor};", setup=setup.value, signature="f(x) -> y", torchscript=torchscript, ) for setup, torchscript, (py_constructor, cpp_constructor) in ( (Setup.TRIVIAL_4D, True, ("BatchNorm2d(4)",) * 2), (Setup.TRIVIAL_4D, True, ("GroupNorm(2, 4)",) * 2), ( Setup.TRIVIAL_4D, True, ("LayerNorm(4)", "LayerNorm(torch::nn::LayerNormOptions({4}))"), ), (Setup.TRIVIAL_3D, True, ("Conv1d(4, 4, 1)",) * 2), (Setup.TRIVIAL_4D, True, ("Conv2d(4, 4, 1)",) * 2), (Setup.TRIVIAL_4D, True, ("MaxPool2d(2)",) * 2), (Setup.TRIVIAL_2D, True, ("ReLU()",) * 2), (Setup.TRIVIAL_2D, True, ("Sigmoid()",) * 2), (Setup.TRIVIAL_4D, True, ("Linear(4, 2)",) * 2), # TODO: LSTM can't be TorchScript'd (Setup.TRIVIAL_3D, False, ("LSTM(4, 2)",) * 2), ) }, "training": { "simple": GroupedStmts( *parse_stmts( r""" Python | C++ ---------------------------------------- | ---------------------------------------- a0 = torch.nn.functional.relu(x * w0) | auto a0 = torch::nn::functional::relu(x * w0); y = a0 * w1 | auto y = a0 * w1; """ ), Setup.TRAINING.value, num_threads=(1, 2), signature=r"f(x, w0, w1) -> y", torchscript=True, autograd=True, ), "ensemble": GroupedStmts( *parse_stmts( r""" Python | C++ ---------------------------------------- | ---------------------------------------- a0 = torch.nn.functional.gelu(x * w0) | auto a0 = torch::nn::functional::gelu(x * w0); a1 = torch.nn.functional.prelu(y, w1) | auto a1 = torch::nn::functional::prelu(y, w1); z = torch.nn.functional.normalize( | auto z = torch::nn::functional::normalize( torch.cat([a0, a1]), | torch::cat({a0, a1}), p=2.0, dim=0, | torch::nn::functional::NormalizeFuncOptions().p(2).dim(0) ).dot(w2) | ).dot(w2); """ ), Setup.TRAINING.value, num_threads=(1, 2), signature=r"f(x, y, w0, w1, w2) -> z", torchscript=True, autograd=True, ), }, "InferenceMode": GroupedVariants( # In general, the mixed input scenario is less common so its # perf can be less important than pure inference tensor inputs. cpp_block=r""" // @Setup auto s = torch::ones({3, 3}); // Normal Tensor c10::InferenceMode guard; auto x = torch::ones({3, 3}); // Inference Tensor // @View torch::Tensor y = x.view({9}); // @Inplace torch::Tensor y = x.mul_(x); // @Mixed torch::Tensor y = x + s; """ ), } )