1 // Copyright 2019 Google LLC 2 // 3 // This source code is licensed under the BSD-style license found in the 4 // LICENSE file in the root directory of this source tree. 5 6 #include <gtest/gtest.h> 7 8 #include <xnnpack/common.h> 9 #include <xnnpack/isa-checks.h> 10 11 #include <xnnpack/conv.h> 12 #include "conv-hwc2chw-microkernel-tester.h" 13 14 15 #if XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 TEST(F16_CONV_HWC2CHW_3X3S2P1C3X4__NEONFP16ARITH_2X2,input_width_eq_4)16 TEST(F16_CONV_HWC2CHW_3X3S2P1C3X4__NEONFP16ARITH_2X2, input_width_eq_4) { 17 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 18 ConvHWC2CHWMicrokernelTester() 19 .kernel_size(3) 20 .subsampling(2) 21 .padding_width(1) 22 .input_channels(3) 23 .output_channels_tile(4) 24 .output_channels(4) 25 .input_width(4) 26 .input_height(3) 27 .Test(xnn_f16_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfp16arith_2x2, xnn_init_f16_minmax_neon_params); 28 } 29 TEST(F16_CONV_HWC2CHW_3X3S2P1C3X4__NEONFP16ARITH_2X2,input_width_div_4)30 TEST(F16_CONV_HWC2CHW_3X3S2P1C3X4__NEONFP16ARITH_2X2, input_width_div_4) { 31 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 32 for (size_t input_width = 8; input_width <= 32; input_width += 12) { 33 ConvHWC2CHWMicrokernelTester() 34 .kernel_size(3) 35 .subsampling(2) 36 .padding_width(1) 37 .input_channels(3) 38 .output_channels_tile(4) 39 .output_channels(4) 40 .input_width(input_width) 41 .input_height(3) 42 .Test(xnn_f16_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfp16arith_2x2, xnn_init_f16_minmax_neon_params); 43 } 44 } 45 TEST(F16_CONV_HWC2CHW_3X3S2P1C3X4__NEONFP16ARITH_2X2,input_width_lt_4)46 TEST(F16_CONV_HWC2CHW_3X3S2P1C3X4__NEONFP16ARITH_2X2, input_width_lt_4) { 47 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 48 for (size_t input_width = 1; input_width < 4; input_width++) { 49 ConvHWC2CHWMicrokernelTester() 50 .kernel_size(3) 51 .subsampling(2) 52 .padding_width(1) 53 .input_channels(3) 54 .output_channels_tile(4) 55 .output_channels(4) 56 .input_width(input_width) 57 .input_height(3) 58 .Test(xnn_f16_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfp16arith_2x2, xnn_init_f16_minmax_neon_params); 59 } 60 } 61 TEST(F16_CONV_HWC2CHW_3X3S2P1C3X4__NEONFP16ARITH_2X2,input_width_gt_4)62 TEST(F16_CONV_HWC2CHW_3X3S2P1C3X4__NEONFP16ARITH_2X2, input_width_gt_4) { 63 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 64 for (size_t input_width = 5; input_width < 8; input_width++) { 65 ConvHWC2CHWMicrokernelTester() 66 .kernel_size(3) 67 .subsampling(2) 68 .padding_width(1) 69 .input_channels(3) 70 .output_channels_tile(4) 71 .output_channels(4) 72 .input_width(input_width) 73 .input_height(3) 74 .Test(xnn_f16_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfp16arith_2x2, xnn_init_f16_minmax_neon_params); 75 } 76 } 77 TEST(F16_CONV_HWC2CHW_3X3S2P1C3X4__NEONFP16ARITH_2X2,output_channels_lt_4)78 TEST(F16_CONV_HWC2CHW_3X3S2P1C3X4__NEONFP16ARITH_2X2, output_channels_lt_4) { 79 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 80 for (size_t output_channels = 1; output_channels < 4; output_channels++) { 81 for (size_t input_width = 1; input_width < 32; input_width += 7) { 82 ConvHWC2CHWMicrokernelTester() 83 .kernel_size(3) 84 .subsampling(2) 85 .padding_width(1) 86 .input_channels(3) 87 .output_channels_tile(4) 88 .output_channels(output_channels) 89 .input_width(input_width) 90 .input_height(3) 91 .Test(xnn_f16_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfp16arith_2x2, xnn_init_f16_minmax_neon_params); 92 } 93 } 94 } 95 TEST(F16_CONV_HWC2CHW_3X3S2P1C3X4__NEONFP16ARITH_2X2,output_channels_div_4)96 TEST(F16_CONV_HWC2CHW_3X3S2P1C3X4__NEONFP16ARITH_2X2, output_channels_div_4) { 97 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 98 for (size_t output_channels = 8; output_channels <= 16; output_channels += 4) { 99 for (size_t input_width = 1; input_width < 32; input_width += 7) { 100 ConvHWC2CHWMicrokernelTester() 101 .kernel_size(3) 102 .subsampling(2) 103 .padding_width(1) 104 .input_channels(3) 105 .output_channels_tile(4) 106 .output_channels(output_channels) 107 .input_width(input_width) 108 .input_height(3) 109 .Test(xnn_f16_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfp16arith_2x2, xnn_init_f16_minmax_neon_params); 110 } 111 } 112 } 113 TEST(F16_CONV_HWC2CHW_3X3S2P1C3X4__NEONFP16ARITH_2X2,output_channels_gt_4)114 TEST(F16_CONV_HWC2CHW_3X3S2P1C3X4__NEONFP16ARITH_2X2, output_channels_gt_4) { 115 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 116 for (size_t output_channels = 5; output_channels < 8; output_channels++) { 117 for (size_t input_width = 1; input_width < 32; input_width += 7) { 118 ConvHWC2CHWMicrokernelTester() 119 .kernel_size(3) 120 .subsampling(2) 121 .padding_width(1) 122 .input_channels(3) 123 .output_channels_tile(4) 124 .output_channels(output_channels) 125 .input_width(input_width) 126 .input_height(3) 127 .Test(xnn_f16_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfp16arith_2x2, xnn_init_f16_minmax_neon_params); 128 } 129 } 130 } 131 TEST(F16_CONV_HWC2CHW_3X3S2P1C3X4__NEONFP16ARITH_2X2,input_height_lt_3)132 TEST(F16_CONV_HWC2CHW_3X3S2P1C3X4__NEONFP16ARITH_2X2, input_height_lt_3) { 133 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 134 for (size_t input_height = 1; input_height < 3; input_height++) { 135 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) { 136 for (size_t input_width = 1; input_width < 32; input_width += 7) { 137 ConvHWC2CHWMicrokernelTester() 138 .kernel_size(3) 139 .subsampling(2) 140 .padding(1) 141 .input_channels(3) // padded input height of at least 3 required 142 .output_channels_tile(4) 143 .output_channels(output_channels) 144 .input_width(input_width) 145 .input_height(input_height) 146 .Test(xnn_f16_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfp16arith_2x2, xnn_init_f16_minmax_neon_params); 147 } 148 } 149 } 150 } 151 TEST(F16_CONV_HWC2CHW_3X3S2P1C3X4__NEONFP16ARITH_2X2,input_height_gt_3)152 TEST(F16_CONV_HWC2CHW_3X3S2P1C3X4__NEONFP16ARITH_2X2, input_height_gt_3) { 153 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 154 for (size_t input_height = 4; input_height <= 9; input_height++) { 155 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) { 156 for (size_t input_width = 1; input_width < 32; input_width += 7) { 157 ConvHWC2CHWMicrokernelTester() 158 .kernel_size(3) 159 .subsampling(2) 160 .padding_width(1) 161 .input_channels(3) 162 .output_channels_tile(4) 163 .output_channels(output_channels) 164 .input_width(input_width) 165 .input_height(input_height) 166 .Test(xnn_f16_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfp16arith_2x2, xnn_init_f16_minmax_neon_params); 167 } 168 } 169 } 170 } 171 TEST(F16_CONV_HWC2CHW_3X3S2P1C3X4__NEONFP16ARITH_2X2,padding_top)172 TEST(F16_CONV_HWC2CHW_3X3S2P1C3X4__NEONFP16ARITH_2X2, padding_top) { 173 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 174 for (size_t padding_top = 0; padding_top <= 1; padding_top++) { 175 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) { 176 for (size_t input_width = 1; input_width < 32; input_width += 7) { 177 ConvHWC2CHWMicrokernelTester() 178 .kernel_size(3) 179 .subsampling(2) 180 .padding_width(1) 181 .padding_top(padding_top) 182 .input_channels(3) 183 .output_channels_tile(4) 184 .output_channels(output_channels) 185 .input_width(input_width) 186 .input_height(9) 187 .Test(xnn_f16_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfp16arith_2x2, xnn_init_f16_minmax_neon_params); 188 } 189 } 190 } 191 } 192 TEST(F16_CONV_HWC2CHW_3X3S2P1C3X4__NEONFP16ARITH_2X2,padding_bottom)193 TEST(F16_CONV_HWC2CHW_3X3S2P1C3X4__NEONFP16ARITH_2X2, padding_bottom) { 194 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 195 for (size_t padding_bottom = 0; padding_bottom <= 1; padding_bottom++) { 196 for (size_t output_channels = 1; output_channels < 16; output_channels += 7) { 197 for (size_t input_width = 1; input_width < 32; input_width += 7) { 198 ConvHWC2CHWMicrokernelTester() 199 .kernel_size(3) 200 .subsampling(2) 201 .padding_width(1) 202 .padding_bottom(padding_bottom) 203 .input_channels(3) 204 .output_channels_tile(4) 205 .output_channels(output_channels) 206 .input_width(input_width) 207 .input_height(9) 208 .Test(xnn_f16_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfp16arith_2x2, xnn_init_f16_minmax_neon_params); 209 } 210 } 211 } 212 } 213 TEST(F16_CONV_HWC2CHW_3X3S2P1C3X4__NEONFP16ARITH_2X2,output_y_start)214 TEST(F16_CONV_HWC2CHW_3X3S2P1C3X4__NEONFP16ARITH_2X2, output_y_start) { 215 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 216 for (size_t output_y_start = 1; output_y_start <= 3; output_y_start++) { 217 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) { 218 for (size_t input_width = 1; input_width < 32; input_width += 7) { 219 ConvHWC2CHWMicrokernelTester() 220 .kernel_size(3) 221 .subsampling(2) 222 .padding_width(1) 223 .input_channels(3) 224 .output_channels_tile(4) 225 .output_channels(output_channels) 226 .input_width(input_width) 227 .input_height(9) 228 .output_y_start(output_y_start) 229 .Test(xnn_f16_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfp16arith_2x2, xnn_init_f16_minmax_neon_params); 230 } 231 } 232 } 233 } 234 TEST(F16_CONV_HWC2CHW_3X3S2P1C3X4__NEONFP16ARITH_2X2,output_y_end)235 TEST(F16_CONV_HWC2CHW_3X3S2P1C3X4__NEONFP16ARITH_2X2, output_y_end) { 236 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 237 for (size_t output_y_end = 2; output_y_end < 5; output_y_end++) { 238 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) { 239 for (size_t input_width = 1; input_width < 32; input_width += 7) { 240 ConvHWC2CHWMicrokernelTester() 241 .kernel_size(3) 242 .subsampling(2) 243 .padding_width(1) 244 .input_channels(3) 245 .output_channels_tile(4) 246 .output_channels(output_channels) 247 .input_width(input_width) 248 .input_height(9) 249 .output_y_end(output_y_end) 250 .Test(xnn_f16_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfp16arith_2x2, xnn_init_f16_minmax_neon_params); 251 } 252 } 253 } 254 } 255 TEST(F16_CONV_HWC2CHW_3X3S2P1C3X4__NEONFP16ARITH_2X2,qmin)256 TEST(F16_CONV_HWC2CHW_3X3S2P1C3X4__NEONFP16ARITH_2X2, qmin) { 257 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 258 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) { 259 for (size_t input_width = 1; input_width < 32; input_width += 7) { 260 ConvHWC2CHWMicrokernelTester() 261 .kernel_size(3) 262 .subsampling(2) 263 .padding_width(1) 264 .input_channels(3) 265 .output_channels_tile(4) 266 .output_channels(output_channels) 267 .input_width(input_width) 268 .input_height(6) 269 .qmin(128) 270 .Test(xnn_f16_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfp16arith_2x2, xnn_init_f16_minmax_neon_params); 271 } 272 } 273 } 274 TEST(F16_CONV_HWC2CHW_3X3S2P1C3X4__NEONFP16ARITH_2X2,qmax)275 TEST(F16_CONV_HWC2CHW_3X3S2P1C3X4__NEONFP16ARITH_2X2, qmax) { 276 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 277 for (size_t output_channels = 1; output_channels < 8; output_channels += 3) { 278 for (size_t input_width = 1; input_width < 32; input_width += 7) { 279 ConvHWC2CHWMicrokernelTester() 280 .kernel_size(3) 281 .subsampling(2) 282 .padding_width(1) 283 .input_channels(3) 284 .output_channels_tile(4) 285 .output_channels(output_channels) 286 .input_width(input_width) 287 .input_height(6) 288 .qmax(128) 289 .Test(xnn_f16_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfp16arith_2x2, xnn_init_f16_minmax_neon_params); 290 } 291 } 292 } 293 #endif // XNN_ENABLE_ARM_FP16 && XNN_ARCH_ARM64 294