// Copyright 2022 Google LLC // // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. #include #include #include "runtime-tester.h" #include namespace xnnpack { TEST(ADD_THEN_CLAMP, fusion) { auto tester = RuntimeTester(4); float output_min = -0.5f; float output_max = 0.5f; uint32_t input1_id = 0; uint32_t input2_id = 1; uint32_t intermediate_id = 2; uint32_t output_id = 3; tester .AddInputTensorF32({1, 2, 2, 3}, input1_id) .AddInputTensorF32({1, 2, 2, 3}, input2_id) .AddDynamicTensorF32({1, 2, 2, 3}, intermediate_id) .AddOutputTensorF32({1, 2, 2, 3}, output_id) .AddAddition(input1_id, input2_id, intermediate_id) .AddClamp(output_min, output_max, intermediate_id, output_id); std::vector unoptimized_output = tester.RunWithoutFusion(); ASSERT_EQ(tester.NumOperators(), 2); std::vector optimized_output = tester.RunWithFusion(); ASSERT_EQ(tester.NumOperators(), 1); ASSERT_EQ(tester.Node(0)->activation.output_min, output_min); ASSERT_EQ(tester.Node(0)->activation.output_max, output_max); ASSERT_EQ(tester.Node(0)->outputs[0], output_id); ASSERT_EQ(tester.Node(1)->compute_type, xnn_compute_type_invalid); ASSERT_EQ(unoptimized_output, optimized_output); } TEST(AVERAGE_POOLING_2D_THEN_CLAMP, fusion) { auto tester = RuntimeTester(3); float output_min = -0.5f; float output_max = 0.5f; uint32_t input_id = 0; uint32_t intermediate_id = 1; uint32_t output_id = 2; tester .AddInputTensorF32({1, 10, 10, 3}, input_id) .AddDynamicTensorF32({1, 9, 9, 3}, intermediate_id) .AddOutputTensorF32({1, 9, 9, 3}, output_id) .AddAveragePooling2D(0, 0, 0, 0, 2, 2, 1, 1, input_id, intermediate_id) .AddClamp(output_min, output_max, intermediate_id, output_id); std::vector unoptimized_output = tester.RunWithoutFusion(); ASSERT_EQ(tester.NumOperators(), 2); std::vector optimized_output = tester.RunWithFusion(); ASSERT_EQ(tester.NumOperators(), 1); ASSERT_EQ(tester.Node(0)->activation.output_min, output_min); ASSERT_EQ(tester.Node(0)->activation.output_max, output_max); ASSERT_EQ(tester.Node(0)->outputs[0], output_id); ASSERT_EQ(tester.Node(1)->compute_type, xnn_compute_type_invalid); ASSERT_EQ(unoptimized_output, optimized_output); } TEST(CLAMP_THEN_CLAMP, fusion) { auto tester = RuntimeTester(3); float output_min = -0.5f; float output_max = 0.5f; uint32_t input_id = 0; uint32_t intermediate_id = 1; uint32_t output_id = 2; tester .AddInputTensorF32({1, 10, 10, 3}, input_id) .AddDynamicTensorF32({1, 10, 10, 3}, intermediate_id) .AddOutputTensorF32({1, 10, 10, 3}, output_id) .AddClamp( -std::numeric_limits::infinity(), std::numeric_limits::infinity(), input_id, intermediate_id) .AddClamp(output_min, output_max, intermediate_id, output_id); std::vector unoptimized_output = tester.RunWithoutFusion(); ASSERT_EQ(tester.NumOperators(), 2); std::vector optimized_output = tester.RunWithFusion(); ASSERT_EQ(tester.NumOperators(), 1); ASSERT_EQ(tester.Node(0)->activation.output_min, output_min); ASSERT_EQ(tester.Node(0)->activation.output_max, output_max); ASSERT_EQ(tester.Node(0)->outputs[0], output_id); ASSERT_EQ(tester.Node(1)->compute_type, xnn_compute_type_invalid); ASSERT_EQ(unoptimized_output, optimized_output); } TEST(CONVOLUTION_2D_THEN_CLAMP, fusion) { auto tester = RuntimeTester(5); float output_min = -0.5f; float output_max = 0.5f; uint32_t input_id = 0; uint32_t filter_id = 1; uint32_t bias_id = 2; uint32_t intermediate_id = 3; uint32_t output_id = 4; tester .AddInputTensorF32({1, 256, 256, 3}, input_id) .AddStaticTensorF32({32, 3, 3, 3}, TensorType::kDense, filter_id) .AddStaticTensorF32({32}, TensorType::kDense, bias_id) .AddDynamicTensorF32({1, 128, 128, 32}, intermediate_id) .AddOutputTensorF32({1, 128, 128, 32}, output_id) .AddConvolution2D( ConvolutionParams{ Padding{1, 1, 1, 1}, Kernel{3, 3}, Subsampling{2, 2}, Dilation{1, 1}, /*groups=*/ 1, /*group_input_channels=*/ 3, /*group_output_channels=*/ 32, }, input_id, filter_id, bias_id, intermediate_id) .AddClamp(output_min, output_max, intermediate_id, output_id); std::vector unoptimized_output = tester.RunWithoutFusion(); ASSERT_EQ(tester.NumOperators(), 2); std::vector optimized_output = tester.RunWithFusion(); ASSERT_EQ(tester.NumOperators(), 1); ASSERT_EQ(tester.Node(0)->activation.output_min, output_min); ASSERT_EQ(tester.Node(0)->activation.output_max, output_max); ASSERT_EQ(tester.Node(0)->outputs[0], output_id); ASSERT_EQ(tester.Node(1)->compute_type, xnn_compute_type_invalid); ASSERT_EQ(unoptimized_output, optimized_output); } TEST(DIVIDE_THEN_CLAMP, fusion) { auto tester = RuntimeTester(4); float output_min = -0.5f; float output_max = 0.5f; uint32_t input1_id = 0; uint32_t input2_id = 1; uint32_t intermediate_id = 2; uint32_t output_id = 3; tester .AddInputTensorF32({1, 2, 2, 3}, input1_id) .AddInputTensorF32({1, 2, 2, 3}, input2_id) .AddDynamicTensorF32({1, 2, 2, 3}, intermediate_id) .AddOutputTensorF32({1, 2, 2, 3}, output_id) .AddDivide(input1_id, input2_id, intermediate_id) .AddClamp(output_min, output_max, intermediate_id, output_id); std::vector unoptimized_output = tester.RunWithoutFusion(); ASSERT_EQ(tester.NumOperators(), 2); std::vector optimized_output = tester.RunWithFusion(); ASSERT_EQ(tester.NumOperators(), 1); ASSERT_EQ(tester.Node(0)->activation.output_min, output_min); ASSERT_EQ(tester.Node(0)->activation.output_max, output_max); ASSERT_EQ(tester.Node(0)->outputs[0], output_id); ASSERT_EQ(tester.Node(1)->compute_type, xnn_compute_type_invalid); ASSERT_EQ(unoptimized_output, optimized_output); } TEST(DECONVOLUTION_2D_THEN_CLAMP, fusion) { auto tester = RuntimeTester(5); float output_min = -0.5f; float output_max = 0.5f; uint32_t input_id = 0; uint32_t filter_id = 1; uint32_t bias_id = 2; uint32_t intermediate_id = 3; uint32_t output_id = 4; tester .AddInputTensorF32({1, 128, 128, 3}, input_id) .AddStaticTensorF32({32, 3, 3, 3}, TensorType::kDense, filter_id) .AddStaticTensorF32({32}, TensorType::kDense, bias_id) .AddDynamicTensorF32({1, 255, 255, 32}, intermediate_id) .AddOutputTensorF32({1, 255, 255, 32}, output_id) .AddDeconvolution2D( DeconvolutionParams{ Padding{1, 1, 1, 1}, Adjustment{0, 0}, Kernel{3, 3}, Upsampling{2, 2}, Dilation{1, 1}, /*groups=*/ 1, /*group_input_channels=*/ 3, /*groups_output_channels*/ 32 }, input_id, filter_id, bias_id, intermediate_id) .AddClamp(output_min, output_max, intermediate_id, output_id); std::vector unoptimized_output = tester.RunWithoutFusion(); ASSERT_EQ(tester.NumOperators(), 2); std::vector optimized_output = tester.RunWithFusion(); ASSERT_EQ(tester.NumOperators(), 1); ASSERT_EQ(tester.Node(0)->activation.output_min, output_min); ASSERT_EQ(tester.Node(0)->activation.output_max, output_max); ASSERT_EQ(tester.Node(0)->outputs[0], output_id); ASSERT_EQ(tester.Node(1)->compute_type, xnn_compute_type_invalid); ASSERT_EQ(unoptimized_output, optimized_output); } TEST(DEPTHWISE_CONVOLUTION_2D_THEN_CLAMP, fusion) { auto tester = RuntimeTester(5); float output_min = -0.5f; float output_max = 0.5f; uint32_t input_id = 0; uint32_t filter_id = 1; uint32_t bias_id = 2; uint32_t intermediate_id = 3; uint32_t output_id = 4; tester .AddInputTensorF32({1, 128, 128, 4}, input_id) .AddStaticTensorF32({1, 3, 3, 4}, TensorType::kDense, filter_id) .AddStaticTensorF32({4}, TensorType::kDense, bias_id) .AddDynamicTensorF32({1, 128, 128, 4}, intermediate_id) .AddOutputTensorF32({1, 128, 128, 4}, output_id) .AddDepthwiseConvolution2D( DepthwiseConvolutionParams{ Padding{1, 1, 1, 1}, Kernel{3, 3}, Subsampling{1, 1}, Dilation{1, 1}, /*depth_multiplier=*/ 1, /*input_channels=*/ 4 }, input_id, filter_id, bias_id, intermediate_id) .AddClamp(output_min, output_max, intermediate_id, output_id); std::vector unoptimized_output = tester.RunWithoutFusion(); ASSERT_EQ(tester.NumOperators(), 2); std::vector optimized_output = tester.RunWithFusion(); ASSERT_EQ(tester.NumOperators(), 1); ASSERT_EQ(tester.Node(0)->activation.output_min, output_min); ASSERT_EQ(tester.Node(0)->activation.output_max, output_max); ASSERT_EQ(tester.Node(0)->outputs[0], output_id); ASSERT_EQ(tester.Node(1)->compute_type, xnn_compute_type_invalid); ASSERT_EQ(unoptimized_output, optimized_output); } TEST(FULLY_CONNECTED_2D_THEN_CLAMP, fusion) { auto tester = RuntimeTester(5); float output_min = -0.5f; float output_max = 0.5f; uint32_t input_id = 0; uint32_t filter_id = 1; uint32_t bias_id = 2; uint32_t intermediate_id = 3; uint32_t output_id = 4; tester .AddInputTensorF32({5, 3}, input_id) .AddStaticTensorF32({7, 3}, TensorType::kDense, filter_id) .AddStaticTensorF32({7}, TensorType::kDense, bias_id) .AddDynamicTensorF32({5, 7}, intermediate_id) .AddOutputTensorF32({5, 7}, output_id) .AddFullyConnected(input_id, filter_id, bias_id, intermediate_id) .AddClamp(output_min, output_max, intermediate_id, output_id); std::vector unoptimized_output = tester.RunWithoutFusion(); ASSERT_EQ(tester.NumOperators(), 2); std::vector optimized_output = tester.RunWithFusion(); ASSERT_EQ(tester.NumOperators(), 1); ASSERT_EQ(tester.Node(0)->activation.output_min, output_min); ASSERT_EQ(tester.Node(0)->activation.output_max, output_max); ASSERT_EQ(tester.Node(0)->outputs[0], output_id); ASSERT_EQ(tester.Node(1)->compute_type, xnn_compute_type_invalid); ASSERT_EQ(unoptimized_output, optimized_output); } TEST(MULTIPLY_THEN_CLAMP, fusion) { auto tester = RuntimeTester(4); float output_min = -0.5f; float output_max = 0.5f; uint32_t input1_id = 0; uint32_t input2_id = 1; uint32_t intermediate_id = 2; uint32_t output_id = 3; tester .AddInputTensorF32({1, 2, 2, 3}, input1_id) .AddInputTensorF32({1, 2, 2, 3}, input2_id) .AddDynamicTensorF32({1, 2, 2, 3}, intermediate_id) .AddOutputTensorF32({1, 2, 2, 3}, output_id) .AddMultiply(input1_id, input2_id, intermediate_id) .AddClamp(output_min, output_max, intermediate_id, output_id); std::vector unoptimized_output = tester.RunWithoutFusion(); ASSERT_EQ(tester.NumOperators(), 2); std::vector optimized_output = tester.RunWithFusion(); ASSERT_EQ(tester.NumOperators(), 1); ASSERT_EQ(tester.Node(0)->activation.output_min, output_min); ASSERT_EQ(tester.Node(0)->activation.output_max, output_max); ASSERT_EQ(tester.Node(0)->outputs[0], output_id); ASSERT_EQ(tester.Node(1)->compute_type, xnn_compute_type_invalid); ASSERT_EQ(unoptimized_output, optimized_output); } TEST(MAX_POOLING_THEN_CLAMP, fusion) { auto tester = RuntimeTester(3); float output_min = -0.5f; float output_max = 0.5f; uint32_t input_id = 0; uint32_t intermediate_id = 1; uint32_t output_id = 2; tester .AddInputTensorF32({1, 10, 10, 3}, input_id) .AddDynamicTensorF32({1, 9, 9, 3}, intermediate_id) .AddOutputTensorF32({1, 9, 9, 3}, output_id) .AddMaxPooling2D(0, 0, 0, 0, 2, 2, 1, 1, 1, 1, input_id, intermediate_id) .AddClamp(output_min, output_max, intermediate_id, output_id); std::vector unoptimized_output = tester.RunWithoutFusion(); ASSERT_EQ(tester.NumOperators(), 2); std::vector optimized_output = tester.RunWithFusion(); ASSERT_EQ(tester.NumOperators(), 1); ASSERT_EQ(tester.Node(0)->activation.output_min, output_min); ASSERT_EQ(tester.Node(0)->activation.output_max, output_max); ASSERT_EQ(tester.Node(0)->outputs[0], output_id); ASSERT_EQ(tester.Node(1)->compute_type, xnn_compute_type_invalid); ASSERT_EQ(unoptimized_output, optimized_output); } TEST(SUBTRACT_THEN_CLAMP, fusion) { auto tester = RuntimeTester(4); float output_min = -0.5f; float output_max = 0.5f; uint32_t input1_id = 0; uint32_t input2_id = 1; uint32_t intermediate_id = 2; uint32_t output_id = 3; tester .AddInputTensorF32({1, 2, 2, 3}, input1_id) .AddInputTensorF32({1, 2, 2, 3}, input2_id) .AddDynamicTensorF32({1, 2, 2, 3}, intermediate_id) .AddOutputTensorF32({1, 2, 2, 3}, output_id) .AddSubtract(input1_id, input2_id, intermediate_id) .AddClamp(output_min, output_max, intermediate_id, output_id); std::vector unoptimized_output = tester.RunWithoutFusion(); ASSERT_EQ(tester.NumOperators(), 2); std::vector optimized_output = tester.RunWithFusion(); ASSERT_EQ(tester.NumOperators(), 1); ASSERT_EQ(tester.Node(0)->activation.output_min, output_min); ASSERT_EQ(tester.Node(0)->activation.output_max, output_max); ASSERT_EQ(tester.Node(0)->outputs[0], output_id); ASSERT_EQ(tester.Node(1)->compute_type, xnn_compute_type_invalid); ASSERT_EQ(unoptimized_output, optimized_output); } TEST(CONSTANT_PAD_THEN_CONVOLUTION, fusion) { auto tester = RuntimeTester(5); uint32_t input_id = 0; uint32_t intermediate_id = 1; uint32_t filter_id = 2; uint32_t bias_id = 3; uint32_t output_id = 4; size_t pre_paddings[4] = {0, 2, 4, 0}; size_t post_paddings[4] = {0, 6, 8, 0}; float padding_value = 0.0f; tester .AddInputTensorF32({1, 254, 254, 3}, input_id) .AddDynamicTensorF32({1, 262, 266, 3}, intermediate_id) .AddStaticTensorF32({32, 3, 3, 3}, TensorType::kDense, filter_id) .AddStaticTensorF32({32}, TensorType::kDense, bias_id) .AddOutputTensorF32({1, 131, 133, 32}, output_id) .AddConstantPad(pre_paddings, post_paddings, padding_value, input_id, intermediate_id) .AddConvolution2D( ConvolutionParams{ Padding{0, 0, 0, 0}, Kernel{3, 3}, Subsampling{2, 2}, Dilation{1, 1}, /*groups=*/ 1, /*group_input_channels=*/ 3, /*group_output_channels=*/ 32, }, intermediate_id, filter_id, bias_id, output_id); std::vector unoptimized_output = tester.RunWithoutFusion(); ASSERT_EQ(tester.NumOperators(), 2); std::vector optimized_output = tester.RunWithFusion(); ASSERT_EQ(tester.NumOperators(), 1); ASSERT_EQ(tester.Node(0)->compute_type, xnn_compute_type_invalid); ASSERT_EQ(tester.Node(1)->params.convolution_2d.input_padding_top, 2); ASSERT_EQ(tester.Node(1)->params.convolution_2d.input_padding_left, 4); ASSERT_EQ(tester.Node(1)->params.convolution_2d.input_padding_right, 8); ASSERT_EQ(tester.Node(1)->params.convolution_2d.input_padding_bottom, 6); ASSERT_EQ(tester.Node(1)->outputs[0], output_id); ASSERT_EQ(unoptimized_output, optimized_output); } TEST(CONSTANT_PAD_THEN_CONVOLUTION, not_fused_due_to_non_zero_padding_in_n_dimension) { auto tester = RuntimeTester(5); uint32_t input_id = 0; uint32_t intermediate_id = 1; uint32_t filter_id = 2; uint32_t bias_id = 3; uint32_t output_id = 4; // Non-zero pre-padding in the N or C dimension. size_t pre_paddings[4] = {1, 2, 4, 0}; size_t post_paddings[4] = {0, 6, 8, 0}; float padding_value = 0.0f; tester .AddInputTensorF32({1, 254, 254, 3}, input_id) .AddDynamicTensorF32({2, 262, 266, 3}, intermediate_id) .AddStaticTensorF32({32, 3, 3, 3}, TensorType::kDense, filter_id) .AddStaticTensorF32({32}, TensorType::kDense, bias_id) .AddOutputTensorF32({2, 131, 133, 32}, output_id) .AddConstantPad(pre_paddings, post_paddings, padding_value, input_id, intermediate_id) .AddConvolution2D( ConvolutionParams{ Padding{0, 0, 0, 0}, Kernel{3, 3}, Subsampling{2, 2}, Dilation{1, 1}, /*groups=*/ 1, /*group_input_channels=*/ 3, /*group_output_channels=*/ 32, }, intermediate_id, filter_id, bias_id, output_id) .Optimize(); std::vector optimized_output = tester.RunWithFusion(); ASSERT_EQ(tester.NumOperators(), 2); } TEST(CONSTANT_PAD_THEN_CONVOLUTION, not_fused_due_to_padding_value_not_zero) { auto tester = RuntimeTester(5); uint32_t input_id = 0; uint32_t intermediate_id = 1; uint32_t filter_id = 2; uint32_t bias_id = 3; uint32_t output_id = 4; size_t pre_paddings[4] = {0, 2, 4, 0}; size_t post_paddings[4] = {0, 6, 8, 0}; float padding_value = 1.0f; tester .AddInputTensorF32({1, 254, 254, 3}, input_id) .AddDynamicTensorF32({2, 262, 266, 3}, intermediate_id) .AddStaticTensorF32({32, 3, 3, 3}, TensorType::kDense, filter_id) .AddStaticTensorF32({32}, TensorType::kDense, bias_id) .AddOutputTensorF32({2, 131, 133, 32}, output_id) .AddConstantPad(pre_paddings, post_paddings, padding_value, input_id, intermediate_id) .AddConvolution2D( ConvolutionParams{ Padding{0, 0, 0, 0}, Kernel{3, 3}, Subsampling{2, 2}, Dilation{1, 1}, /*groups=*/ 1, /*group_input_channels=*/ 3, /*group_output_channels=*/ 32, }, intermediate_id, filter_id, bias_id, output_id) .Optimize(); std::vector optimized_output = tester.RunWithFusion(); ASSERT_EQ(tester.NumOperators(), 2); } TEST(CONSTANT_PAD_THEN_DEPTHWISE_CONVOLUTION, fusion) { auto tester = RuntimeTester(5); uint32_t input_id = 0; uint32_t intermediate_id = 1; uint32_t filter_id = 2; uint32_t bias_id = 3; uint32_t output_id = 4; size_t pre_paddings[4] = {0, 2, 4, 0}; size_t post_paddings[4] = {0, 6, 8, 0}; float padding_value = 0.0f; tester .AddInputTensorF32({1, 128, 128, 4}, input_id) .AddDynamicTensorF32({1, 136, 140, 4}, intermediate_id) .AddStaticTensorF32({1, 3, 3, 4}, TensorType::kDense, filter_id) .AddStaticTensorF32({4}, TensorType::kDense, bias_id) .AddOutputTensorF32({1, 134, 140, 4}, output_id) .AddConstantPad(pre_paddings, post_paddings, padding_value, input_id, intermediate_id) .AddDepthwiseConvolution2D( DepthwiseConvolutionParams{ Padding{0, 0, 0, 0}, Kernel{3, 3}, Subsampling{1, 1}, Dilation{1, 1}, /*depth_multiplier=*/ 1, /*input_channels=*/ 4 }, intermediate_id, filter_id, bias_id, output_id); std::vector unoptimized_output = tester.RunWithoutFusion(); ASSERT_EQ(tester.NumOperators(), 2); std::vector optimized_output = tester.RunWithFusion(); ASSERT_EQ(tester.NumOperators(), 1); ASSERT_EQ(tester.Node(0)->compute_type, xnn_compute_type_invalid); ASSERT_EQ(tester.Node(1)->params.depthwise_convolution_2d.input_padding_top, 2); ASSERT_EQ(tester.Node(1)->params.depthwise_convolution_2d.input_padding_left, 4); ASSERT_EQ(tester.Node(1)->params.depthwise_convolution_2d.input_padding_right, 8); ASSERT_EQ(tester.Node(1)->params.depthwise_convolution_2d.input_padding_bottom, 6); ASSERT_EQ(tester.Node(1)->outputs[0], output_id); ASSERT_EQ(unoptimized_output, optimized_output); } TEST(CONSTANT_PAD_THEN_DEPTHWISE_CONVOLUTION, not_fused_due_to_non_zero_padding_in_n_dimension) { auto tester = RuntimeTester(5); uint32_t input_id = 0; uint32_t intermediate_id = 1; uint32_t filter_id = 2; uint32_t bias_id = 3; uint32_t output_id = 4; // Non-zero pre-padding in the N or C dimension. size_t pre_paddings[4] = {1, 2, 4, 0}; size_t post_paddings[4] = {0, 6, 8, 0}; float padding_value = 0.0f; tester .AddInputTensorF32({1, 128, 128, 4}, input_id) .AddDynamicTensorF32({2, 136, 140, 4}, intermediate_id) .AddStaticTensorF32({1, 3, 3, 4}, TensorType::kDense, filter_id) .AddStaticTensorF32({4}, TensorType::kDense, bias_id) .AddOutputTensorF32({2, 134, 140, 4}, output_id) .AddConstantPad(pre_paddings, post_paddings, padding_value, input_id, intermediate_id) .AddDepthwiseConvolution2D( DepthwiseConvolutionParams{ Padding{0, 0, 0, 0}, Kernel{3, 3}, Subsampling{1, 1}, Dilation{1, 1}, /*depth_multiplier=*/ 1, /*input_channels=*/ 4 }, intermediate_id, filter_id, bias_id, output_id); std::vector unoptimized_output = tester.RunWithoutFusion(); ASSERT_EQ(tester.NumOperators(), 2); std::vector optimized_output = tester.RunWithFusion(); ASSERT_EQ(tester.NumOperators(), 2); ASSERT_EQ(unoptimized_output, optimized_output); } TEST(CONSTANT_PAD_THEN_DEPTHWISE_CONVOLUTION, not_fused_due_to_padding_value_not_zero) { auto tester = RuntimeTester(5); uint32_t input_id = 0; uint32_t intermediate_id = 1; uint32_t filter_id = 2; uint32_t bias_id = 3; uint32_t output_id = 4; size_t pre_paddings[4] = {0, 2, 4, 0}; size_t post_paddings[4] = {0, 6, 8, 0}; float padding_value = 1.0f; tester .AddInputTensorF32({1, 128, 128, 4}, input_id) .AddDynamicTensorF32({1, 136, 140, 4}, intermediate_id) .AddStaticTensorF32({1, 3, 3, 4}, TensorType::kDense, filter_id) .AddStaticTensorF32({4}, TensorType::kDense, bias_id) .AddOutputTensorF32({1, 134, 140, 4}, output_id) .AddConstantPad(pre_paddings, post_paddings, padding_value, input_id, intermediate_id) .AddDepthwiseConvolution2D( DepthwiseConvolutionParams{ Padding{0, 0, 0, 0}, Kernel{3, 3}, Subsampling{1, 1}, Dilation{1, 1}, /*depth_multiplier=*/ 1, /*input_channels=*/ 4 }, intermediate_id, filter_id, bias_id, output_id); std::vector unoptimized_output = tester.RunWithoutFusion(); ASSERT_EQ(tester.NumOperators(), 2); std::vector optimized_output = tester.RunWithFusion(); ASSERT_EQ(tester.NumOperators(), 2); ASSERT_EQ(unoptimized_output, optimized_output); } } // namespace xnnpack