1{ 2 "test_AllenaiLongformerBase_repro_cpu (__main__.CpuHalideTests)": 211.949, 3 "test_adaptive_max_pool2d1_cpu (__main__.CpuHalideTests)": 111.929, 4 "test_alexnet_prefix_cpu (__main__.CpuHalideTests)": 185.141, 5 "test_aot_autograd_symbolic_exhaustive_linalg_svd_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 67.44693333333333, 6 "test_aot_autograd_symbolic_exhaustive_nn_functional_max_pool1d_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 103.4952, 7 "test_aot_autograd_symbolic_exhaustive_nn_functional_max_pool2d_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 215.06906666666666, 8 "test_aot_autograd_symbolic_exhaustive_nn_functional_max_pool3d_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 126.95360000000001, 9 "test_aot_autograd_symbolic_exhaustive_svd_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 69.75275, 10 "test_aot_autograd_symbolic_module_exhaustive_nn_TransformerDecoderLayer_cpu_float32 (__main__.TestEagerFusionModuleInfoCPU)": 110.57966666666667, 11 "test_aot_export_joint_simple_repro_dynamic_shapes (__main__.DynamicShapesAotAutogradFallbackTests)": 345.20975000000004, 12 "test_aoti_eager_override_registration_cpu (__main__.CpuTests)": 81.80724000000001, 13 "test_aoti_eager_override_registration_cuda (__main__.GPUTests)": 81.5502857142857, 14 "test_aoti_eager_override_registration_dynamic_shapes_cpu (__main__.DynamicShapesCodegenCpuTests)": 80.72995238095237, 15 "test_aoti_eager_override_registration_dynamic_shapes_cpu (__main__.DynamicShapesCpuTests)": 79.88047619047619, 16 "test_aoti_eager_override_registration_dynamic_shapes_cuda (__main__.DynamicShapesCodegenGPUTests)": 75.02325, 17 "test_aoti_eager_override_registration_dynamic_shapes_cuda (__main__.DynamicShapesGPUTests)": 74.38550000000001, 18 "test_aoti_eager_with_scalar_cpu (__main__.CpuTests)": 87.54754166666667, 19 "test_aoti_eager_with_scalar_cuda (__main__.GPUTests)": 85.06014285714285, 20 "test_aoti_eager_with_scalar_dynamic_shapes_cpu (__main__.DynamicShapesCodegenCpuTests)": 87.73019047619047, 21 "test_aoti_eager_with_scalar_dynamic_shapes_cpu (__main__.DynamicShapesCpuTests)": 87.14119047619049, 22 "test_aoti_eager_with_scalar_dynamic_shapes_cuda (__main__.DynamicShapesCodegenGPUTests)": 83.3455, 23 "test_aoti_eager_with_scalar_dynamic_shapes_cuda (__main__.DynamicShapesGPUTests)": 82.4865, 24 "test_avg_pool3d_backward2_cuda (__main__.GPUTests)": 94.67914285714285, 25 "test_avg_pool3d_backward2_dynamic_shapes_cpu (__main__.DynamicShapesCodegenCpuTests)": 70.80028571428572, 26 "test_avg_pool3d_backward2_dynamic_shapes_cpu (__main__.DynamicShapesCpuTests)": 66.188125, 27 "test_avg_pool3d_backward2_dynamic_shapes_cuda (__main__.DynamicShapesCodegenGPUTests)": 66.239, 28 "test_avg_pool3d_backward2_dynamic_shapes_cuda (__main__.DynamicShapesGPUTests)": 135.30899999999997, 29 "test_avg_pool3d_backward_cpu (__main__.CpuHalideTests)": 61.719, 30 "test_backward_nn_functional_multi_head_attention_forward_cpu_float32 (__main__.TestCompositeComplianceCPU)": 78.70693333333334, 31 "test_backward_nn_functional_multi_head_attention_forward_cuda_float32 (__main__.TestCompositeComplianceCUDA)": 96.261, 32 "test_basic_cpu (__main__.EfficientConvBNEvalCpuTests)": 237.76485714285712, 33 "test_basic_cuda (__main__.EfficientConvBNEvalCudaTests)": 92.503, 34 "test_captured_score_mod_aot_eager_gradcheck_score_mod_name__head_offset_mode_eager (__main__.TestFlexAttention)": 139.8705, 35 "test_checkpoint_cast (__main__.TestFxToOnnx)": 136.983, 36 "test_comprehensive_constant_pad_nd_cpu_float16 (__main__.TestInductorOpInfoCPU)": 60.31935294117646, 37 "test_comprehensive_diff_cpu_bool (__main__.TestInductorOpInfoCPU)": 92.7407, 38 "test_comprehensive_diff_cpu_float32 (__main__.TestInductorOpInfoCPU)": 92.67049999999999, 39 "test_comprehensive_diff_cpu_float64 (__main__.TestInductorOpInfoCPU)": 91.261, 40 "test_comprehensive_diff_cpu_int32 (__main__.TestInductorOpInfoCPU)": 92.00640000000001, 41 "test_comprehensive_diff_cpu_int64 (__main__.TestInductorOpInfoCPU)": 88.7649, 42 "test_comprehensive_diff_cuda_complex128 (__main__.TestDecompCUDA)": 120.986, 43 "test_comprehensive_diff_cuda_complex64 (__main__.TestDecompCUDA)": 105.15944444444446, 44 "test_comprehensive_diff_cuda_float32 (__main__.TestDecompCUDA)": 87.45349999999999, 45 "test_comprehensive_diff_cuda_float64 (__main__.TestDecompCUDA)": 88.94550000000001, 46 "test_comprehensive_dist_cpu_float16 (__main__.TestInductorOpInfoCPU)": 72.8767, 47 "test_comprehensive_dist_cpu_float32 (__main__.TestInductorOpInfoCPU)": 71.64869999999999, 48 "test_comprehensive_dist_cpu_float64 (__main__.TestInductorOpInfoCPU)": 70.62299999999999, 49 "test_comprehensive_eye_cpu_bool (__main__.TestInductorOpInfoCPU)": 112.79639999999999, 50 "test_comprehensive_eye_cpu_float16 (__main__.TestInductorOpInfoCPU)": 110.69359999999999, 51 "test_comprehensive_eye_cpu_float32 (__main__.TestInductorOpInfoCPU)": 111.8332, 52 "test_comprehensive_eye_cpu_float64 (__main__.TestInductorOpInfoCPU)": 113.01580000000001, 53 "test_comprehensive_eye_cpu_int32 (__main__.TestInductorOpInfoCPU)": 110.6647, 54 "test_comprehensive_eye_cpu_int64 (__main__.TestInductorOpInfoCPU)": 113.61270000000002, 55 "test_comprehensive_grid_sampler_2d_cpu_float32 (__main__.TestDecompCPU)": 337.5013, 56 "test_comprehensive_grid_sampler_2d_cpu_float32 (__main__.TestInductorOpInfoCPU)": 112.65060000000001, 57 "test_comprehensive_grid_sampler_2d_cpu_float64 (__main__.TestDecompCPU)": 352.82779999999997, 58 "test_comprehensive_grid_sampler_2d_cpu_float64 (__main__.TestInductorOpInfoCPU)": 67.2527, 59 "test_comprehensive_grid_sampler_2d_cuda_bfloat16 (__main__.TestDecompCUDA)": 277.8468888888889, 60 "test_comprehensive_grid_sampler_2d_cuda_float16 (__main__.TestDecompCUDA)": 261.31533333333334, 61 "test_comprehensive_grid_sampler_2d_cuda_float32 (__main__.TestDecompCUDA)": 1217.510111111111, 62 "test_comprehensive_grid_sampler_2d_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 73.16566666666667, 63 "test_comprehensive_grid_sampler_2d_cuda_float64 (__main__.TestDecompCUDA)": 1187.5324999999998, 64 "test_comprehensive_grid_sampler_2d_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 81.23666666666666, 65 "test_comprehensive_linalg_svd_cuda_complex128 (__main__.TestDecompCUDA)": 83.36449999999999, 66 "test_comprehensive_linalg_svd_cuda_complex64 (__main__.TestDecompCUDA)": 85.197, 67 "test_comprehensive_linalg_vector_norm_cpu_float16 (__main__.TestInductorOpInfoCPU)": 176.8523, 68 "test_comprehensive_linalg_vector_norm_cpu_float32 (__main__.TestInductorOpInfoCPU)": 176.5644, 69 "test_comprehensive_linalg_vector_norm_cpu_float64 (__main__.TestInductorOpInfoCPU)": 176.33440000000002, 70 "test_comprehensive_logspace_cpu_float32 (__main__.TestInductorOpInfoCPU)": 379.27200000000005, 71 "test_comprehensive_logspace_cpu_float64 (__main__.TestInductorOpInfoCPU)": 382.0692, 72 "test_comprehensive_logspace_cpu_int32 (__main__.TestInductorOpInfoCPU)": 365.48, 73 "test_comprehensive_logspace_cpu_int64 (__main__.TestInductorOpInfoCPU)": 366.99120000000005, 74 "test_comprehensive_masked_amax_cpu_float16 (__main__.TestInductorOpInfoCPU)": 85.73589999999999, 75 "test_comprehensive_masked_amax_cpu_float32 (__main__.TestInductorOpInfoCPU)": 84.76559999999999, 76 "test_comprehensive_masked_amax_cpu_float64 (__main__.TestInductorOpInfoCPU)": 83.74539999999999, 77 "test_comprehensive_masked_amax_cpu_int32 (__main__.TestInductorOpInfoCPU)": 81.6752, 78 "test_comprehensive_masked_amax_cpu_int64 (__main__.TestInductorOpInfoCPU)": 80.1269, 79 "test_comprehensive_masked_amin_cpu_float16 (__main__.TestInductorOpInfoCPU)": 85.1681, 80 "test_comprehensive_masked_amin_cpu_float32 (__main__.TestInductorOpInfoCPU)": 87.01599999999999, 81 "test_comprehensive_masked_amin_cpu_float64 (__main__.TestInductorOpInfoCPU)": 85.30009999999999, 82 "test_comprehensive_masked_amin_cpu_int32 (__main__.TestInductorOpInfoCPU)": 81.06280000000001, 83 "test_comprehensive_masked_amin_cpu_int64 (__main__.TestInductorOpInfoCPU)": 84.49640000000001, 84 "test_comprehensive_masked_mean_cpu_bool (__main__.TestInductorOpInfoCPU)": 82.6498, 85 "test_comprehensive_masked_mean_cpu_float16 (__main__.TestInductorOpInfoCPU)": 85.0721, 86 "test_comprehensive_masked_mean_cpu_float32 (__main__.TestInductorOpInfoCPU)": 86.45490000000002, 87 "test_comprehensive_masked_mean_cpu_float64 (__main__.TestInductorOpInfoCPU)": 84.9486, 88 "test_comprehensive_masked_mean_cpu_int32 (__main__.TestInductorOpInfoCPU)": 85.1464, 89 "test_comprehensive_masked_mean_cpu_int64 (__main__.TestInductorOpInfoCPU)": 83.1313, 90 "test_comprehensive_masked_norm_cpu_float16 (__main__.TestInductorOpInfoCPU)": 422.03270000000003, 91 "test_comprehensive_masked_norm_cpu_float32 (__main__.TestInductorOpInfoCPU)": 419.49539999999996, 92 "test_comprehensive_masked_norm_cpu_float64 (__main__.TestInductorOpInfoCPU)": 409.55060000000003, 93 "test_comprehensive_masked_norm_cuda_float16 (__main__.TestInductorOpInfoCUDA)": 93.67716666666666, 94 "test_comprehensive_masked_norm_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 88.622, 95 "test_comprehensive_masked_norm_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 91.79166666666667, 96 "test_comprehensive_masked_prod_cpu_bool (__main__.TestInductorOpInfoCPU)": 81.3001, 97 "test_comprehensive_masked_prod_cpu_float16 (__main__.TestInductorOpInfoCPU)": 86.5596, 98 "test_comprehensive_masked_prod_cpu_float32 (__main__.TestInductorOpInfoCPU)": 85.2926, 99 "test_comprehensive_masked_prod_cpu_float64 (__main__.TestInductorOpInfoCPU)": 84.71660000000001, 100 "test_comprehensive_masked_prod_cpu_int32 (__main__.TestInductorOpInfoCPU)": 84.0162, 101 "test_comprehensive_masked_prod_cpu_int64 (__main__.TestInductorOpInfoCPU)": 81.37209999999999, 102 "test_comprehensive_masked_sum_cpu_bool (__main__.TestInductorOpInfoCPU)": 81.57050000000001, 103 "test_comprehensive_masked_sum_cpu_float16 (__main__.TestInductorOpInfoCPU)": 82.18870000000001, 104 "test_comprehensive_masked_sum_cpu_float32 (__main__.TestInductorOpInfoCPU)": 82.77929999999999, 105 "test_comprehensive_masked_sum_cpu_float64 (__main__.TestInductorOpInfoCPU)": 81.9615, 106 "test_comprehensive_masked_sum_cpu_int32 (__main__.TestInductorOpInfoCPU)": 82.8871, 107 "test_comprehensive_masked_sum_cpu_int64 (__main__.TestInductorOpInfoCPU)": 83.2116, 108 "test_comprehensive_nn_functional_conv_transpose3d_cuda_complex128 (__main__.TestDecompCUDA)": 62.840444444444444, 109 "test_comprehensive_nn_functional_conv_transpose3d_cuda_complex64 (__main__.TestDecompCUDA)": 63.12155555555556, 110 "test_comprehensive_nn_functional_gaussian_nll_loss_cuda_float32 (__main__.TestDecompCUDA)": 115.99399999999999, 111 "test_comprehensive_nn_functional_gaussian_nll_loss_cuda_float64 (__main__.TestDecompCUDA)": 112.42922222222222, 112 "test_comprehensive_nn_functional_glu_cpu_float16 (__main__.TestInductorOpInfoCPU)": 66.2836, 113 "test_comprehensive_nn_functional_glu_cpu_float32 (__main__.TestInductorOpInfoCPU)": 63.87760000000001, 114 "test_comprehensive_nn_functional_glu_cpu_float64 (__main__.TestInductorOpInfoCPU)": 61.07164705882354, 115 "test_comprehensive_nn_functional_grid_sample_cpu_float32 (__main__.TestDecompCPU)": 93.46609090909091, 116 "test_comprehensive_nn_functional_grid_sample_cpu_float64 (__main__.TestDecompCPU)": 92.66881818181818, 117 "test_comprehensive_nn_functional_grid_sample_cuda_bfloat16 (__main__.TestDecompCUDA)": 72.35, 118 "test_comprehensive_nn_functional_grid_sample_cuda_float16 (__main__.TestDecompCUDA)": 64.90466666666666, 119 "test_comprehensive_nn_functional_grid_sample_cuda_float32 (__main__.TestDecompCUDA)": 265.2443333333333, 120 "test_comprehensive_nn_functional_grid_sample_cuda_float64 (__main__.TestDecompCUDA)": 250.08033333333333, 121 "test_comprehensive_nn_functional_interpolate_bicubic_cuda_float32 (__main__.TestDecompCUDA)": 61.85044444444444, 122 "test_comprehensive_nn_functional_interpolate_bicubic_cuda_float64 (__main__.TestDecompCUDA)": 63.002444444444436, 123 "test_comprehensive_nn_functional_interpolate_trilinear_cuda_float32 (__main__.TestDecompCUDA)": 102.0025, 124 "test_comprehensive_nn_functional_interpolate_trilinear_cuda_float64 (__main__.TestDecompCUDA)": 104.59100000000001, 125 "test_comprehensive_nn_functional_max_pool1d_cpu_float16 (__main__.TestInductorOpInfoCPU)": 152.2596, 126 "test_comprehensive_nn_functional_max_pool1d_cpu_float32 (__main__.TestInductorOpInfoCPU)": 140.01214285714286, 127 "test_comprehensive_nn_functional_max_pool1d_cpu_float64 (__main__.TestInductorOpInfoCPU)": 140.58085714285716, 128 "test_comprehensive_nn_functional_max_pool2d_cpu_float16 (__main__.TestInductorOpInfoCPU)": 710.7855714285714, 129 "test_comprehensive_nn_functional_max_pool2d_cpu_float32 (__main__.TestInductorOpInfoCPU)": 697.3474285714285, 130 "test_comprehensive_nn_functional_max_pool2d_cpu_float64 (__main__.TestInductorOpInfoCPU)": 678.1218571428572, 131 "test_comprehensive_nn_functional_max_pool2d_cpu_int32 (__main__.TestInductorOpInfoCPU)": 641.9231428571428, 132 "test_comprehensive_nn_functional_max_pool2d_cpu_int64 (__main__.TestInductorOpInfoCPU)": 655.1732857142857, 133 "test_comprehensive_nn_functional_max_pool2d_cuda_float16 (__main__.TestInductorOpInfoCUDA)": 775.9625, 134 "test_comprehensive_nn_functional_max_pool2d_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 767.9121666666666, 135 "test_comprehensive_nn_functional_max_pool2d_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 778.5028333333333, 136 "test_comprehensive_nn_functional_pad_constant_cpu_float16 (__main__.TestInductorOpInfoCPU)": 60.259235294117644, 137 "test_comprehensive_nn_functional_pad_constant_cpu_float32 (__main__.TestInductorOpInfoCPU)": 60.22264705882352, 138 "test_comprehensive_nn_functional_pad_constant_cpu_float64 (__main__.TestInductorOpInfoCPU)": 60.483411764705885, 139 "test_comprehensive_nn_functional_poisson_nll_loss_cpu_float16 (__main__.TestInductorOpInfoCPU)": 94.4827142857143, 140 "test_comprehensive_nn_functional_poisson_nll_loss_cpu_float32 (__main__.TestInductorOpInfoCPU)": 96.45214285714285, 141 "test_comprehensive_nn_functional_poisson_nll_loss_cpu_float64 (__main__.TestInductorOpInfoCPU)": 91.70985714285715, 142 "test_comprehensive_nn_functional_poisson_nll_loss_cpu_int32 (__main__.TestInductorOpInfoCPU)": 95.28557142857143, 143 "test_comprehensive_nn_functional_poisson_nll_loss_cpu_int64 (__main__.TestInductorOpInfoCPU)": 92.5167142857143, 144 "test_comprehensive_nn_functional_unfold_cpu_float16 (__main__.TestInductorOpInfoCPU)": 189.38628571428572, 145 "test_comprehensive_nn_functional_unfold_cpu_float32 (__main__.TestInductorOpInfoCPU)": 183.38171428571428, 146 "test_comprehensive_nn_functional_unfold_cpu_float64 (__main__.TestInductorOpInfoCPU)": 184.58571428571423, 147 "test_comprehensive_ormqr_cuda_complex128 (__main__.TestDecompCUDA)": 137.61211111111112, 148 "test_comprehensive_ormqr_cuda_complex64 (__main__.TestDecompCUDA)": 139.59522222222222, 149 "test_comprehensive_ormqr_cuda_float32 (__main__.TestDecompCUDA)": 88.364, 150 "test_comprehensive_ormqr_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 83.6305, 151 "test_comprehensive_ormqr_cuda_float64 (__main__.TestDecompCUDA)": 87.9585, 152 "test_comprehensive_pca_lowrank_cuda_complex128 (__main__.TestDecompCUDA)": 68.5215, 153 "test_comprehensive_pca_lowrank_cuda_complex64 (__main__.TestDecompCUDA)": 62.06933333333333, 154 "test_comprehensive_svd_cuda_complex128 (__main__.TestDecompCUDA)": 94.2525, 155 "test_comprehensive_svd_cuda_complex64 (__main__.TestDecompCUDA)": 94.307, 156 "test_comprehensive_svd_lowrank_cuda_complex128 (__main__.TestDecompCUDA)": 68.14222222222222, 157 "test_comprehensive_svd_lowrank_cuda_complex64 (__main__.TestDecompCUDA)": 72.507, 158 "test_cond_autograd_nested (__main__.TestControlFlow)": 162.97220000000002, 159 "test_constructor_autograd_SparseBSC_cuda (__main__.TestSparseAnyCUDA)": 110.65333333333334, 160 "test_constructor_autograd_SparseBSR_cuda (__main__.TestSparseAnyCUDA)": 119.97566666666665, 161 "test_constructor_autograd_SparseCSC_cuda (__main__.TestSparseAnyCUDA)": 86.57166666666667, 162 "test_constructor_autograd_SparseCSR_cuda (__main__.TestSparseAnyCUDA)": 88.60611111111112, 163 "test_conv1d_basic (__main__.TestXNNPACKConv1dTransformPass)": 93.19026470588237, 164 "test_conv2d_binary_inplace_fusion_failed_cpu_cpp_wrapper (__main__.TestCppWrapper)": 75.7442, 165 "test_conv3d_binary_dynamic_shapes (__main__.TestDynamicPatternMatcher)": 113.2255238095238, 166 "test_conv3d_unary_dynamic_shapes (__main__.TestDynamicPatternMatcher)": 71.51290476190476, 167 "test_conv_freezing_non_abi_compatible_cuda (__main__.AOTInductorTestNonABICompatibleCuda)": 73.27433333333333, 168 "test_conv_transpose2d_packed_cpu_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 62.88960000000001, 169 "test_correctness_NAdam_use_closure_True_cuda_float32 (__main__.CompiledOptimizerParityTestsCUDA)": 66.9665, 170 "test_cusparse_multiple_threads_same_device (__main__.TestCuda)": 93.28726315789474, 171 "test_custom_module_lstm (__main__.TestQuantizedOps)": 81.82516, 172 "test_ddp_model_diff_shape_across_ranks (__main__.TestDistBackendWithSpawn)": 91.468, 173 "test_ddp_uneven_inputs (__main__.TestDistBackendWithSpawn)": 538.063, 174 "test_diff_hyperparams_sharding_strategy_str_full_shard (__main__.TestFSDPUseOrigParamsMultipleParamGroups)": 70.84433333333332, 175 "test_diff_hyperparams_sharding_strategy_str_no_shard (__main__.TestFSDPUseOrigParamsMultipleParamGroups)": 69.47433333333333, 176 "test_diff_hyperparams_sharding_strategy_str_shard_grad_op (__main__.TestFSDPUseOrigParamsMultipleParamGroups)": 67.11266666666667, 177 "test_dispatch_symbolic_meta_outplace_all_strides_nn_functional_gaussian_nll_loss_cuda_float32 (__main__.TestMetaCUDA)": 113.79488888888889, 178 "test_dtypeview_cpu (__main__.CpuTests)": 84.04108000000001, 179 "test_dtypeview_cuda_cuda_wrapper (__main__.TestCudaWrapper)": 279.51483333333334, 180 "test_dtypeview_cuda_dynamic_shapes_cuda_wrapper (__main__.DynamicShapesCudaWrapperCudaTests)": 283.54316666666665, 181 "test_dtypeview_dynamic_shapes_cpu (__main__.DynamicShapesCodegenCpuTests)": 96.16909523809525, 182 "test_dtypeview_dynamic_shapes_cpu (__main__.DynamicShapesCpuTests)": 97.33604761904763, 183 "test_fail_creation_ops.py (__main__.TestTyping)": 65.19961538461537, 184 "test_fail_random.py (__main__.TestTyping)": 63.64657575757575, 185 "test_fake_tensor_mode_huggingface_databricks_dolly_v2_3b (__main__.TORCH_EXPORT_EXPORTEDPROGRAM)": 96.72200000000001, 186 "test_fake_tensor_mode_huggingface_google_t5 (__main__.TORCH_EXPORT_EXPORTEDPROGRAM)": 108.259, 187 "test_fake_tensor_mode_huggingface_google_t5 (__main__.TORCH_NN_MODULE)": 209.738, 188 "test_fn_fwgrad_bwgrad_cumprod_cuda_complex128 (__main__.TestFwdGradientsCUDA)": 100.12588888888891, 189 "test_fn_gradgrad_cumprod_cuda_complex128 (__main__.TestBwdGradientsCUDA)": 108.84245454545454, 190 "test_fn_gradgrad_map_nested_cpu_float64 (__main__.TestBwdGradientsCPU)": 81.5349411764706, 191 "test_fn_gradgrad_map_nested_cuda_float64 (__main__.TestBwdGradientsCUDA)": 78.576, 192 "test_fn_gradgrad_map_triple_nested_cpu_float64 (__main__.TestBwdGradientsCPU)": 514.4126666666667, 193 "test_fn_gradgrad_map_triple_nested_cuda_float64 (__main__.TestBwdGradientsCUDA)": 398.1390909090909, 194 "test_fn_gradgrad_ormqr_cuda_complex128 (__main__.TestBwdGradientsCUDA)": 70.3155, 195 "test_fuse_large_params_cpu (__main__.CpuTests)": 101.38141666666665, 196 "test_fuse_large_params_dynamic_shapes_cpu (__main__.DynamicShapesCodegenCpuTests)": 142.97099999999998, 197 "test_fuse_large_params_dynamic_shapes_cpu (__main__.DynamicShapesCpuTests)": 144.0665238095238, 198 "test_fuse_large_params_dynamic_shapes_cuda (__main__.DynamicShapesCodegenGPUTests)": 73.9485, 199 "test_grad_nn_Transformer_cuda_float64 (__main__.TestModuleCUDA)": 97.20400000000001, 200 "test_gradgrad_nn_TransformerDecoderLayer_cuda_float64 (__main__.TestModuleCUDA)": 230.74563636363638, 201 "test_gradgrad_nn_TransformerEncoder_eval_mode_cuda_float64 (__main__.TestModuleCUDA)": 130.09063636363638, 202 "test_gradgrad_nn_TransformerEncoder_train_mode_cuda_float64 (__main__.TestModuleCUDA)": 152.54545454545453, 203 "test_grid_sampler_2d_cpu (__main__.CpuHalideTests)": 191.693, 204 "test_inplace_gradgrad_cumprod_cuda_complex128 (__main__.TestBwdGradientsCUDA)": 105.473, 205 "test_jit_cuda_archflags (__main__.TestCppExtensionJIT)": 136.84644444444444, 206 "test_linear_binary_cpp_wrapper (__main__.TestCppWrapper)": 382.9056, 207 "test_linear_binary_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 403.69230000000005, 208 "test_linear_dynamic_shapes_batch_size_1000_in_features_1000_out_features_1024_bias_False_input_3d_False_cpu_float16 (__main__.TestSelectAlgorithmDynamicShapesCPU)": 66.2571, 209 "test_linear_dynamic_shapes_batch_size_1000_in_features_1000_out_features_1024_bias_False_input_3d_True_cpu_float16 (__main__.TestSelectAlgorithmDynamicShapesCPU)": 131.267, 210 "test_linear_dynamic_shapes_batch_size_1000_in_features_1000_out_features_1024_bias_True_input_3d_False_cpu_float16 (__main__.TestSelectAlgorithmDynamicShapesCPU)": 69.4666, 211 "test_linear_dynamic_shapes_batch_size_1000_in_features_1000_out_features_1024_bias_True_input_3d_True_cpu_float16 (__main__.TestSelectAlgorithmDynamicShapesCPU)": 134.5207, 212 "test_linear_packed_cpp_wrapper (__main__.TestCppWrapper)": 198.969, 213 "test_linear_packed_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 206.356, 214 "test_linear_static_shapes_batch_size_1000_in_features_1000_out_features_1024_bias_False_input_3d_False_cpu_float16 (__main__.TestSelectAlgorithmCPU)": 67.953, 215 "test_linear_static_shapes_batch_size_1000_in_features_1000_out_features_1024_bias_False_input_3d_True_cpu_float16 (__main__.TestSelectAlgorithmCPU)": 128.8496, 216 "test_linear_static_shapes_batch_size_1000_in_features_1000_out_features_1024_bias_True_input_3d_False_cpu_float16 (__main__.TestSelectAlgorithmCPU)": 71.0599, 217 "test_linear_static_shapes_batch_size_1000_in_features_1000_out_features_1024_bias_True_input_3d_True_cpu_float16 (__main__.TestSelectAlgorithmCPU)": 132.49540000000005, 218 "test_lstm_cpu (__main__.TestMkldnnCPU)": 94.47895454545456, 219 "test_lstm_packed_change_input_sizes_cpu_cpp_wrapper (__main__.TestCppWrapper)": 62.3839, 220 "test_lstm_packed_change_input_sizes_cpu_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 60.9987, 221 "test_max_autotune_cutlass_backend_addmm_dynamic_False_max_autotune_gemm_backends_ATen,Triton,CUTLASS (__main__.TestCutlassBackend)": 81.91425, 222 "test_missing_cubin_non_abi_compatible_cuda (__main__.AOTInductorTestNonABICompatibleCuda)": 76.22216666666667, 223 "test_pipeline_order_flex_and_zero_bubble_ScheduleClass0 (__main__.TestSchedulePlan)": 76.18842857142859, 224 "test_python_ref__refs_special_zeta_cuda_float64 (__main__.TestCommonCUDA)": 64.07560000000001, 225 "test_python_ref_executor__refs_special_zeta_executor_aten_cuda_float64 (__main__.TestCommonCUDA)": 111.9, 226 "test_python_ref_torch_fallback__refs_special_zeta_cuda_float64 (__main__.TestCommonCUDA)": 62.56008333333333, 227 "test_qat_conv2d_unary (__main__.TestQuantizePT2EX86Inductor)": 165.02692, 228 "test_qat_conv_bn_fusion_cuda (__main__.TestQuantizePT2EQAT_ConvBn1d)": 64.07754545454546, 229 "test_qat_conv_bn_fusion_cuda (__main__.TestQuantizePT2EQAT_ConvBn2d)": 63.76154545454545, 230 "test_qat_conv_bn_fusion_no_conv_bias (__main__.TestQuantizePT2EQAT_ConvBn1d)": 75.56493617021276, 231 "test_qat_conv_bn_fusion_no_conv_bias (__main__.TestQuantizePT2EQAT_ConvBn2d)": 74.55095744680851, 232 "test_qat_conv_bn_relu_fusion_cuda (__main__.TestQuantizePT2EQAT_ConvBn1d)": 62.93418181818183, 233 "test_qat_conv_bn_relu_fusion_cuda (__main__.TestQuantizePT2EQAT_ConvBn2d)": 64.67954545454546, 234 "test_qat_mobilenet_v2 (__main__.TestQuantizePT2EQATModels)": 206.24339393939394, 235 "test_qat_resnet18 (__main__.TestQuantizePT2EQATModels)": 68.29046153846154, 236 "test_qlinear_add_cpu (__main__.TestPatternMatcher)": 70.38814285714287, 237 "test_qlinear_add_cpu_cpp_wrapper (__main__.TestCppWrapper)": 504.9821999999999, 238 "test_qlinear_add_cpu_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 528.0255, 239 "test_qlinear_add_int8_mixed_bf16 (__main__.TestPatternMatcher)": 153.60283333333334, 240 "test_qlinear_add_relu_cpu (__main__.TestPatternMatcher)": 71.7899523809524, 241 "test_qlinear_add_relu_cpu_cpp_wrapper (__main__.TestCppWrapper)": 512.7648999999999, 242 "test_qlinear_add_relu_cpu_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 530.9839, 243 "test_qlinear_add_relu_int8_mixed_bf16 (__main__.TestPatternMatcher)": 157.79833333333332, 244 "test_qlinear_gelu_cpu_cpp_wrapper (__main__.TestCppWrapper)": 61.369600000000005, 245 "test_qlinear_gelu_cpu_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 61.51380000000001, 246 "test_quick_core_backward__unsafe_masked_index_cpu_float64 (__main__.TestDecompCPU)": 389.8819, 247 "test_quick_core_backward__unsafe_masked_index_cuda_float64 (__main__.TestDecompCUDA)": 888.4723333333333, 248 "test_quick_core_backward__unsafe_masked_index_put_accumulate_cpu_float64 (__main__.TestDecompCPU)": 578.2715000000001, 249 "test_quick_core_backward__unsafe_masked_index_put_accumulate_cuda_float64 (__main__.TestDecompCUDA)": 1291.462111111111, 250 "test_quick_core_backward_expand_copy_cuda_float64 (__main__.TestDecompCUDA)": 92.35, 251 "test_quick_core_backward_roll_cpu_float64 (__main__.TestDecompCPU)": 97.31772727272727, 252 "test_quick_core_backward_roll_cuda_float64 (__main__.TestDecompCUDA)": 212.4562222222222, 253 "test_quick_core_backward_select_scatter_cpu_float64 (__main__.TestDecompCPU)": 65.85936363636364, 254 "test_quick_core_backward_select_scatter_cuda_float64 (__main__.TestDecompCUDA)": 151.2542222222222, 255 "test_quick_core_backward_split_cuda_float64 (__main__.TestDecompCUDA)": 79.787, 256 "test_quick_core_backward_split_with_sizes_copy_cpu_float64 (__main__.TestDecompCPU)": 76.91172727272728, 257 "test_quick_core_backward_split_with_sizes_copy_cuda_float64 (__main__.TestDecompCUDA)": 173.51111111111112, 258 "test_quick_core_backward_std_cuda_float64 (__main__.TestDecompCUDA)": 117.01122222222222, 259 "test_rnn_decomp_module_nn_LSTM_train_mode_cuda_float32 (__main__.TestDecompCUDA)": 72.3970909090909, 260 "test_save_load_large_string_attribute (__main__.TestSaveLoad)": 128.59333333333333, 261 "test_shuffler_iterdatapipe (__main__.IntegrationTestDataLoaderDataPipe)": 111.14317647058824, 262 "test_sum_all_cpu_float64 (__main__.TestReductionsCPU)": 161.41700000000003, 263 "test_svd_lowrank_cuda_complex128 (__main__.TestLinalgCUDA)": 306.7471111111111, 264 "test_svd_lowrank_cuda_float64 (__main__.TestLinalgCUDA)": 70.03777777777779, 265 "test_terminate_handler_on_crash (__main__.TestTorch)": 68.968, 266 "test_terminate_signal (__main__.ForkTest)": 144.6801515151515, 267 "test_terminate_signal (__main__.SpawnTest)": 135.16911764705878, 268 "test_train_parity_multi_group (__main__.TestFullyShard1DTrainingCore)": 88.26866666666666, 269 "test_transpose_copy (__main__.CPUReproTests)": 62.54614285714285, 270 "test_triton_bsr_scatter_mm_blocksize_32_cuda_bfloat16 (__main__.TestSparseCompressedTritonKernelsCUDA)": 119.48249999999999, 271 "test_triton_bsr_scatter_mm_blocksize_64_cuda_bfloat16 (__main__.TestSparseCompressedTritonKernelsCUDA)": 71.85900000000001, 272 "test_triton_bsr_scatter_mm_blocksize_64_cuda_float32 (__main__.TestSparseCompressedTritonKernelsCUDA)": 87.0915, 273 "test_unary_ops (__main__.TestTEFuserDynamic)": 119.30365384615385, 274 "test_unary_ops (__main__.TestTEFuserStatic)": 90.27661538461538, 275 "test_unspec_inputs_cuda_cuda_wrapper (__main__.TestCudaWrapper)": 84.24216666666666, 276 "test_unspec_inputs_cuda_dynamic_shapes_cuda_wrapper (__main__.DynamicShapesCudaWrapperCudaTests)": 83.43050000000001, 277 "test_upsample_bicubic2d_cpu (__main__.CpuHalideTests)": 96.144, 278 "test_variant_consistency_jit_nn_functional_max_pool2d_cpu_float32 (__main__.TestJitCPU)": 106.12053333333334, 279 "test_variant_consistency_jit_nn_functional_max_pool2d_cuda_float32 (__main__.TestJitCUDA)": 105.8185, 280 "test_vec_compare_op_cpu_only (__main__.CPUReproTests)": 71.327, 281 "test_verify_model_across_rank_with_logger (__main__.TestDistBackendWithSpawn)": 61.44333333333333, 282 "test_verify_model_across_rank_without_logger (__main__.TestDistBackendWithSpawn)": 61.16233333333334, 283 "test_vmapjvpvjp_diff_cuda_float32 (__main__.TestOperatorsCUDA)": 81.9345, 284 "test_vmapjvpvjp_linalg_lu_solve_cpu_float32 (__main__.TestOperatorsCPU)": 62.15947826086957, 285 "test_vmapjvpvjp_linalg_lu_solve_cuda_float32 (__main__.TestOperatorsCUDA)": 86.87155555555556, 286 "test_vmapjvpvjp_linalg_multi_dot_cuda_float32 (__main__.TestOperatorsCUDA)": 84.2345, 287 "test_vmapjvpvjp_linalg_solve_triangular_cuda_float32 (__main__.TestOperatorsCUDA)": 83.042, 288 "test_vmapjvpvjp_linalg_svd_cuda_float32 (__main__.TestOperatorsCUDA)": 91.31800000000001, 289 "test_vmapjvpvjp_max_pool2d_with_indices_backward_cpu_float32 (__main__.TestOperatorsCPU)": 84.47900000000003, 290 "test_vmapjvpvjp_max_pool2d_with_indices_backward_cuda_float32 (__main__.TestOperatorsCUDA)": 111.041, 291 "test_vmapjvpvjp_nn_functional_conv2d_cpu_float32 (__main__.TestOperatorsCPU)": 70.18206666666667, 292 "test_vmapjvpvjp_nn_functional_conv2d_cuda_float32 (__main__.TestOperatorsCUDA)": 71.4435, 293 "test_vmapjvpvjp_nn_functional_max_pool1d_cuda_float32 (__main__.TestOperatorsCUDA)": 65.864, 294 "test_vmapjvpvjp_nn_functional_max_pool2d_cpu_float32 (__main__.TestOperatorsCPU)": 78.47160000000001, 295 "test_vmapjvpvjp_nn_functional_max_pool2d_cuda_float32 (__main__.TestOperatorsCUDA)": 108.5055, 296 "test_vmapjvpvjp_svd_cuda_float32 (__main__.TestOperatorsCUDA)": 78.44033333333334, 297 "test_vmapjvpvjp_unbind_cpu_float32 (__main__.TestOperatorsCPU)": 61.437625000000004, 298 "test_vmapjvpvjp_unbind_cuda_float32 (__main__.TestOperatorsCUDA)": 113.4555, 299 "test_vmapvjpvjp_meshgrid_list_of_tensors_cuda_float32 (__main__.TestOperatorsCUDA)": 111.5335, 300 "test_vmapvjpvjp_meshgrid_variadic_tensors_cuda_float32 (__main__.TestOperatorsCUDA)": 117.1695, 301 "test_vmapvjpvjp_nn_functional_bilinear_cuda_float32 (__main__.TestOperatorsCUDA)": 146.6571111111111 302}