1 /*
2 * Copyright (c) 2017-2023 Arm Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24 #include "arm_compute/core/Types.h"
25 #include "arm_compute/runtime/NEON/functions/NEConvolutionLayer.h"
26 #include "arm_compute/runtime/NEON/functions/NEGEMMConv2d.h"
27 #include "arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h"
28 #include "arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h"
29 #include "arm_compute/runtime/Tensor.h"
30 #include "arm_compute/runtime/TensorAllocator.h"
31 #include "src/core/helpers/MemoryHelpers.h"
32 #include "src/cpu/operators/CpuGemmConv2d.h"
33 #include "src/cpu/operators/CpuGemmDirectConv2d.h"
34 #include "src/cpu/operators/CpuWinogradConv2d.h"
35 #include "tests/NEON/Accessor.h"
36 #include "tests/PaddingCalculator.h"
37 #include "tests/datasets/LargeConvolutionLayerDataset.h"
38 #include "tests/datasets/SmallConvolutionLayerDataset.h"
39 #include "tests/datasets/TinyConvolutionLayerDataset.h"
40 #include "tests/framework/Asserts.h"
41 #include "tests/framework/Macros.h"
42 #include "tests/framework/datasets/Datasets.h"
43 #include "tests/validation/Validation.h"
44 #include "tests/validation/fixtures/ConvolutionLayerFixture.h"
45 #include "tests/validation/fixtures/WinogradConvolutionLayerFixture.h"
46
47 namespace arm_compute
48 {
49 namespace test
50 {
51 namespace validation
52 {
53 namespace detail
54 {
55 template <>
configure_conv_function(NEGEMMConv2d & func,Tensor * src,const Tensor * weights,const Tensor * bias,Tensor * dst,const PadStrideInfo & info,const WeightsInfo & weights_info,const Size2D & dilation,const ActivationLayerInfo & act_info,unsigned int num_groups)56 void configure_conv_function<NEGEMMConv2d, Tensor>(NEGEMMConv2d &func,
57 Tensor *src, const Tensor *weights, const Tensor *bias, Tensor *dst,
58 const PadStrideInfo &info, const WeightsInfo &weights_info,
59 const Size2D &dilation, const ActivationLayerInfo &act_info, unsigned int num_groups)
60 {
61 ARM_COMPUTE_UNUSED(weights_info);
62
63 Conv2dInfo conv_info(info, dilation, act_info, false, num_groups);
64 func.configure(src, weights, bias, dst, conv_info);
65 }
66 } // namespace detail
67 namespace
68 {
69 const RelativeTolerance<float> rel_tolerance_f32(0.01f); /**< Relative tolerance for FP32 types */
70 const RelativeTolerance<float> rel_tolerance_winograd_3x3_f32(0.05f); /**< Relative tolerance for FP32 types */
71 const AbsoluteTolerance<float> abs_tolerance_f32(0.002f); /**< Absolute tolerance for FP32 types */
72 const AbsoluteTolerance<float> abs_tolerance_1xN_f32(0.0041f); /**< Absolute tolerance for FP32 types */
73
74 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
75 const AbsoluteTolerance<half> tolerance_convolution_layer_f16(half(0.4f));
76 constexpr float tolerance_num_f16 = 0.15f;
77 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
78
79 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
80 const RelativeTolerance<half_float::half> rel_tolerance_f16(half_float::half(0.2f)); /**< Relative tolerance value for FP16 types */
81 const AbsoluteTolerance<float> abs_tolerance_f16(0.2f); /**< Absolute tolerance for FP16 types */
82 constexpr float tolerance_num = 0.07f; /**< Tolerance number for the FP16 implementation */
83 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
84 constexpr AbsoluteTolerance<float> tolerance_qasymm8(0.0); /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */
85
86 /** CNN data types */
87 const auto CNNDataTypes = framework::dataset::make("DataType",
88 {
89 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
90 DataType::F16,
91 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
92 DataType::F32,
93 DataType::QASYMM8,
94 });
95 const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo",
96 {
97 ActivationLayerInfo(),
98 ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
99 ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 0.5f)
100 });
101
102 const auto QuantizationData = framework::dataset::make("QuantizationInfo",
103 {
104 QuantizationInfo(0.5f, 10),
105 QuantizationInfo(0.3f, 3),
106 QuantizationInfo(1.f, 10),
107 QuantizationInfo(1.1f, 10),
108 });
109 } // namespace
110
111 TEST_SUITE(NEON)
TEST_SUITE(ConvolutionLayer)112 TEST_SUITE(ConvolutionLayer)
113
114 // *INDENT-OFF*
115 // clang-format off
116 DATA_TEST_CASE(ValidateConvolutionMethod, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(
117 framework::dataset::make("InputInfo", { TensorInfo(TensorShape(18U, 18U, 32U), 1, DataType::F32),
118 TensorInfo(TensorShape(23U, 27U, 32U, 4U), 1, DataType::F32),
119 TensorInfo(TensorShape(3U, 3U, 2U, 1U), 1, DataType::F32),
120 TensorInfo(TensorShape(33U, 27U, 7U, 4U), 1, DataType::F32)
121 }),
122 framework::dataset::make("WeightsInfo", { TensorInfo(TensorShape(3U, 3U, 32U, 21U), 1, DataType::F32),
123 TensorInfo(TensorShape(5U, 5U, 32U, 21U), 1, DataType::F32),
124 TensorInfo(TensorShape(3U, 3U, 5U, 21U), 1, DataType::F32),
125 TensorInfo(TensorShape(5U, 5U, 7U, 16U), 1, DataType::F16)
126 })),
127 framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(16U, 16U, 21U), 1, DataType::F32),
128 TensorInfo(TensorShape(19U, 23U, 21U, 4U), 1, DataType::F32),
129 TensorInfo(TensorShape(11U, 25U, 21U), 1, DataType::F32),
130 TensorInfo(TensorShape(11U, 12U, 16U, 4U), 1, DataType::F32)
131 })),
132 framework::dataset::make("ConvInfo", { PadStrideInfo(1, 1, 0, 0),
133 PadStrideInfo(1, 1, 0, 0),
134 PadStrideInfo(2, 1, 0, 0),
135 PadStrideInfo(3, 2, 1, 0)
136 })),
137 framework::dataset::make("FastMath", { true,
138 true,
139 false,
140 false
141 })),
142 framework::dataset::make("Expected", { ConvolutionMethod::WINOGRAD, ConvolutionMethod::WINOGRAD, ConvolutionMethod::GEMM, ConvolutionMethod::GEMM })),
143 input_info, weights_info, output_info, conv_info, fast_math, expected)
144 {
145 ConvolutionMethod is_valid = NEConvolutionLayer::get_convolution_method(&input_info.clone()->set_is_resizable(true),
146 &weights_info.clone()->set_is_resizable(true),
147 &output_info.clone()->set_is_resizable(true), conv_info, WeightsInfo(), Size2D(1U, 1U), ActivationLayerInfo(), fast_math);
148 ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS);
149 }
150 // clang-format on
151 // *INDENT-ON*
152 TEST_SUITE_END() // ConvolutionLayer
153
154 TEST_SUITE(WinogradLayer)
155 template <typename T>
156 using NEWinogradConvolutionLayerFixture = WinogradConvolutionLayerFastMathValidationFixture<Tensor, Accessor, NEWinogradConvolutionLayer, T>;
157 template <typename T>
158 using NEWinogradConvolutionLayerMixedDataLayoutFixture = WinogradConvolutionLayerFastMathValidationFixture<Tensor, Accessor, NEWinogradConvolutionLayer, T, T, true, true>;
159
160 template <typename T>
161 using NEWinogradConvolutionLayerNoBiasFixture = WinogradConvolutionLayerFastMathValidationFixture<Tensor, Accessor, NEWinogradConvolutionLayer, T, T, false>;
162
163 /** Test case for memory injection in @ref cpu::CpuWinogradConv2d.
164 *
165 * Configure the operator once and inject memory at run-time in multiple executions.
166 *
167 * Checks performed in order:
168 * - Both runs compute the same output
169 */
TEST_CASE(MemoryInjection,framework::DatasetMode::ALL)170 TEST_CASE(MemoryInjection, framework::DatasetMode::ALL)
171 {
172 auto winograd = std::make_unique<cpu::CpuWinogradConv2d>();
173 const auto src_info = TensorInfo(TensorShape(8U, 8U, 32U), 1, DataType::F32);
174 const auto w_info = TensorInfo(TensorShape(1U), 1, DataType::F32);
175 const auto b_info = TensorInfo(TensorShape(1U, 3U, 32U, 1U), 1, DataType::F32);
176 auto dst_info = TensorInfo(TensorShape(8U, 6U, 1U), 1, DataType::F32);
177 const PadStrideInfo pad_info{};
178
179 winograd->configure(&src_info, &b_info, &w_info, &dst_info, pad_info);
180
181 // telhs are newly created every call of this lambda function
182 auto a = create_tensor<Tensor>(src_info);
183 auto b = create_tensor<Tensor>(b_info);
184 auto c = create_tensor<Tensor>(w_info);
185 a.allocator()->allocate();
186 b.allocator()->allocate();
187 c.allocator()->allocate();
188
189 ITensorPack run_pack{ { TensorType::ACL_SRC_0, &a }, { TensorType::ACL_SRC_1, &b }, { TensorType::ACL_SRC_2, &c } };
190 ITensorPack prep_pack{ { TensorType::ACL_SRC_1, &b }, { TensorType::ACL_SRC_2, &c } };
191
192 auto mg = MemoryGroup{};
193 auto ws = manage_workspace<Tensor>(winograd->workspace(), mg, run_pack, prep_pack);
194 auto run_conv = [&]() -> Tensor
195 {
196 auto dst = create_tensor<Tensor>(dst_info);
197 dst.allocator()->allocate();
198
199 run_pack.add_tensor(TensorType::ACL_DST, &dst);
200 library->fill_tensor_value(Accessor(a), 1.f);
201 library->fill_tensor_value(Accessor(b), 2.f);
202 library->fill_tensor_value(Accessor(c), 3.f);
203
204 // This operator is configured once and captured by this lambda.
205 winograd->prepare(prep_pack);
206 winograd->run(run_pack);
207 return dst;
208 };
209
210 auto result_0 = run_conv();
211 auto result_1 = run_conv();
212
213 for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i)
214 {
215 ARM_COMPUTE_EXPECT(((float *)result_0.buffer())[i] == ((float *)result_1.buffer())[i], framework::LogLevel::ERRORS);
216 }
217 }
218
219 /** Test case for memory injection in @ref NEWinogradConvolutionLayer.
220 *
221 * Make sure @ref NEWinogradConvolutionLayer still works through injecting the memory at configure time using the old API.
222 *
223 * Checks performed in order:
224 * - Both runs compute the same output
225 */
TEST_CASE(MultipleExecutionWithConfigure,framework::DatasetMode::ALL)226 TEST_CASE(MultipleExecutionWithConfigure, framework::DatasetMode::ALL)
227 {
228 auto gemm = std::make_unique<NEWinogradConvolutionLayer>();
229 const auto src_info = TensorInfo(TensorShape(8U, 8U, 32U), 1, DataType::F32);
230 const auto w_info = TensorInfo(TensorShape(1U), 1, DataType::F32);
231 const auto b_info = TensorInfo(TensorShape(1U, 3U, 32U, 1U), 1, DataType::F32);
232 auto dst_info = TensorInfo(TensorShape(8U, 6U, 1U), 1, DataType::F32);
233 const PadStrideInfo pad_info{};
234
235 auto run_conv = [&]()
236 {
237 auto src = create_tensor<Tensor>(src_info);
238 auto w = create_tensor<Tensor>(w_info);
239 auto b = create_tensor<Tensor>(b_info);
240 auto dst = create_tensor<Tensor>(dst_info);
241
242 gemm->configure(&src, &b, &w, &dst, pad_info);
243
244 src.allocator()->allocate();
245 b.allocator()->allocate();
246 w.allocator()->allocate();
247 dst.allocator()->allocate();
248
249 library->fill_tensor_value(Accessor(src), 1.f);
250 library->fill_tensor_value(Accessor(b), 2.f);
251 library->fill_tensor_value(Accessor(w), 3.f);
252 gemm->run();
253 return dst;
254 };
255
256 auto result_0 = run_conv();
257 auto result_1 = run_conv();
258
259 for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i)
260 {
261 ARM_COMPUTE_EXPECT(((float *)result_0.buffer())[i] == ((float *)result_1.buffer())[i], framework::LogLevel::ERRORS);
262 }
263 }
264
265 TEST_SUITE(FP32)
266
TEST_SUITE(Conv1x3)267 TEST_SUITE(Conv1x3)
268 FIXTURE_DATA_TEST_CASE(RunSmall, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT,
269 combine(combine(combine(datasets::SmallWinogradConvolutionLayer1x3Dataset(),
270 framework::dataset::make("DataType", { DataType::F32 })),
271 ActivationFunctionsDataset),
272 framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
273 {
274 // Validate output
275 validate(Accessor(_target), _reference, abs_tolerance_f32);
276 }
277 FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, NEWinogradConvolutionLayerMixedDataLayoutFixture<float>, framework::DatasetMode::PRECOMMIT,
278 combine(combine(combine(combine(combine(combine(combine(combine(
279 framework::dataset::make("Input", TensorShape(8U, 8U, 32U)),
280 framework::dataset::make("Weight", TensorShape(1U, 3U, 32U, 1U))),
281 framework::dataset::make("Bias", TensorShape(1U))),
282 framework::dataset::make("Output", TensorShape(8U, 6U, 1U))),
283 framework::dataset::make("PadStrideInfo", PadStrideInfo(1, 1, 0, 0))),
284 framework::dataset::make("Dilation", Size2D(1U, 1U))),
285 framework::dataset::make("DataType", { DataType::F32 })),
286 ActivationFunctionsDataset),
287 framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
288 {
289 // Validate output
290 validate(Accessor(_target), _reference, abs_tolerance_f32);
291 }
292 FIXTURE_DATA_TEST_CASE(RunLarge, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY,
293 combine(combine(combine(datasets::LargeWinogradConvolutionLayer1x3Dataset(),
294 framework::dataset::make("DataType", { DataType::F32 })),
295 ActivationFunctionsDataset),
296 framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
297 {
298 // Validate output
299 validate(Accessor(_target), _reference, abs_tolerance_1xN_f32);
300 }
301
302 TEST_SUITE_END() // Conv1x3
303
TEST_SUITE(Conv3x1)304 TEST_SUITE(Conv3x1)
305 FIXTURE_DATA_TEST_CASE(RunSmall, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT,
306 combine(combine(combine(datasets::SmallWinogradConvolutionLayer3x1Dataset(),
307 framework::dataset::make("DataType", { DataType::F32 })),
308 ActivationFunctionsDataset),
309 framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
310 {
311 // Validate output
312 validate(Accessor(_target), _reference, abs_tolerance_f32);
313 }
314 FIXTURE_DATA_TEST_CASE(RunLarge, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY,
315 combine(combine(combine(datasets::LargeWinogradConvolutionLayer3x1Dataset(),
316 framework::dataset::make("DataType", { DataType::F32 })),
317 ActivationFunctionsDataset),
318 framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
319 {
320 // Validate output
321 validate(Accessor(_target), _reference, abs_tolerance_1xN_f32);
322 }
323
324 TEST_SUITE_END() // Conv3x1
325
TEST_SUITE(Conv1x5)326 TEST_SUITE(Conv1x5)
327 FIXTURE_DATA_TEST_CASE(RunSmall, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT,
328 combine(combine(combine(datasets::SmallWinogradConvolutionLayer1x5Dataset(),
329 framework::dataset::make("DataType", { DataType::F32 })),
330 ActivationFunctionsDataset),
331 framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
332 {
333 // Validate output
334 validate(Accessor(_target), _reference, abs_tolerance_f32);
335 }
336 FIXTURE_DATA_TEST_CASE(RunLarge, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY,
337 combine(combine(combine(datasets::LargeWinogradConvolutionLayer1x5Dataset(),
338 framework::dataset::make("DataType", { DataType::F32 })),
339 ActivationFunctionsDataset),
340 framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
341 {
342 // Validate output
343 validate(Accessor(_target), _reference, abs_tolerance_1xN_f32);
344 }
345
346 TEST_SUITE_END() // Conv1x5
347
TEST_SUITE(Conv5x1)348 TEST_SUITE(Conv5x1)
349 FIXTURE_DATA_TEST_CASE(RunSmall, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT,
350 combine(combine(combine(datasets::SmallWinogradConvolutionLayer5x1Dataset(),
351 framework::dataset::make("DataType", { DataType::F32 })),
352 ActivationFunctionsDataset),
353 framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
354 {
355 // Validate output
356 validate(Accessor(_target), _reference, abs_tolerance_f32);
357 }
358 FIXTURE_DATA_TEST_CASE(RunLarge, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY,
359 combine(combine(combine(datasets::LargeWinogradConvolutionLayer5x1Dataset(),
360 framework::dataset::make("DataType", { DataType::F32 })),
361 ActivationFunctionsDataset),
362 framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
363 {
364 // Validate output
365 validate(Accessor(_target), _reference, abs_tolerance_1xN_f32);
366 }
367
368 TEST_SUITE_END() // Conv5x1
369
TEST_SUITE(Conv7x1)370 TEST_SUITE(Conv7x1)
371 FIXTURE_DATA_TEST_CASE(RunSmall, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT,
372 combine(combine(combine(datasets::SmallWinogradConvolutionLayer7x1Dataset(),
373 framework::dataset::make("DataType", { DataType::F32 })),
374 ActivationFunctionsDataset),
375 framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
376 {
377 // Validate output
378 validate(Accessor(_target), _reference, abs_tolerance_f32);
379 }
380
381 FIXTURE_DATA_TEST_CASE(RunLarge, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY,
382 combine(combine(combine(datasets::LargeWinogradConvolutionLayer7x1Dataset(),
383 framework::dataset::make("DataType", { DataType::F32 })),
384 ActivationFunctionsDataset),
385 framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
386 {
387 // Validate output
388 validate(Accessor(_target), _reference, abs_tolerance_1xN_f32);
389 }
390 TEST_SUITE_END() // Conv7x1
391
TEST_SUITE(Conv1x7)392 TEST_SUITE(Conv1x7)
393 FIXTURE_DATA_TEST_CASE(RunSmall, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT,
394 combine(combine(combine(datasets::SmallWinogradConvolutionLayer1x7Dataset(),
395 framework::dataset::make("DataType", { DataType::F32 })),
396 ActivationFunctionsDataset),
397 framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
398 {
399 // Validate output
400 validate(Accessor(_target), _reference, abs_tolerance_f32);
401 }
402
403 FIXTURE_DATA_TEST_CASE(RunLarge, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY,
404 combine(combine(combine(datasets::LargeWinogradConvolutionLayer7x1Dataset(),
405 framework::dataset::make("DataType", { DataType::F32 })),
406 ActivationFunctionsDataset),
407 framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
408 {
409 // Validate output
410 validate(Accessor(_target), _reference, abs_tolerance_1xN_f32);
411 }
412 TEST_SUITE_END() // Conv1x7
413
TEST_SUITE(Conv3x3)414 TEST_SUITE(Conv3x3)
415 FIXTURE_DATA_TEST_CASE(RunSmall, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT,
416 combine(combine(combine(datasets::SmallWinogradConvolutionLayer3x3Dataset(),
417 framework::dataset::make("DataType", { DataType::F32 })),
418 ActivationFunctionsDataset),
419 framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
420
421 {
422 // Validate output
423 validate(Accessor(_target), _reference, abs_tolerance_f32);
424 }
425 FIXTURE_DATA_TEST_CASE(RunLarge, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY,
426 combine(combine(combine(datasets::LargeWinogradConvolutionLayer3x3Dataset(),
427 framework::dataset::make("DataType", { DataType::F32 })),
428 ActivationFunctionsDataset),
429 framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
430
431 {
432 // Validate output
433 // floating point arithmetic the Winograd results will not be exactly the same as direct convolution, especially for big shapes
434 validate(Accessor(_target), _reference, rel_tolerance_winograd_3x3_f32, 0.f, float(abs_tolerance_f32));
435 }
436 TEST_SUITE_END() // Conv3x3
437
TEST_SUITE(Conv5x5)438 TEST_SUITE(Conv5x5)
439 FIXTURE_DATA_TEST_CASE(RunSmall, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT,
440 combine(combine(combine(datasets::SmallWinogradConvolutionLayer5x5Dataset(),
441 framework::dataset::make("DataType", { DataType::F32 })),
442 ActivationFunctionsDataset),
443 framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
444
445 {
446 // Validate output
447 validate(Accessor(_target), _reference, abs_tolerance_f32);
448 }
449 FIXTURE_DATA_TEST_CASE(RunLarge, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY,
450 combine(combine(combine(datasets::LargeWinogradConvolutionLayer5x5Dataset(),
451 framework::dataset::make("DataType", { DataType::F32 })),
452 ActivationFunctionsDataset),
453 framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
454
455 {
456 // Validate output
457 validate(Accessor(_target), _reference, abs_tolerance_f32);
458 }
459
460 TEST_SUITE_END() // Conv5x5
461
462 FIXTURE_DATA_TEST_CASE(RunSmallNoBias, NEWinogradConvolutionLayerNoBiasFixture<float>, framework::DatasetMode::PRECOMMIT,
463 combine(combine(combine(framework::dataset::concat(datasets::SmallWinogradConvolutionLayer3x3Dataset(),
464 datasets::SmallWinogradConvolutionLayer5x5Dataset()),
465 framework::dataset::make("DataType", { DataType::F32 })),
466 ActivationFunctionsDataset),
467
468 framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
469 {
470 // Validate output
471 validate(Accessor(_target), _reference, abs_tolerance_f32);
472 }
473
474 TEST_SUITE_END() // FP32
475
476 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
477 TEST_SUITE(FP16)
478 using CLWinogradConvolutionLayerFastMathFixture16 = WinogradConvolutionLayerFastMathValidationFixture<Tensor, Accessor, NEWinogradConvolutionLayer, half, float>;
479
480 DATA_TEST_CASE(ValidateConvolutionMethod, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(
481 framework::dataset::make("InputInfo", { TensorInfo(TensorShape(18U, 18U, 32U), 1, DataType::F16),
482 TensorInfo(TensorShape(18U, 18U, 32U), 1, DataType::F16)
483 }),
484 framework::dataset::make("WeightsInfo", { TensorInfo(TensorShape(3U, 3U, 32U, 21U), 1, DataType::F16),
485 TensorInfo(TensorShape(3U, 3U, 32U, 21U), 1, DataType::F16)
486 })),
487 framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(16U, 16U, 21U), 1, DataType::F32),
488 TensorInfo(TensorShape(16U, 16U, 21U), 1, DataType::F16)
489 })),
490 framework::dataset::make("ConvInfo", { PadStrideInfo(1, 1, 0, 0),
491 PadStrideInfo(1, 1, 0, 0)
492 })),
493 framework::dataset::make("FastMath", { false, // case fp16 and fast_math False then disable Winograd
494 true // case fp16 and fast_math True then enable Winograd
495 })),
496 framework::dataset::make("Expected", { ConvolutionMethod::GEMM, ConvolutionMethod::WINOGRAD })),
497 input_info, weights_info, output_info, conv_info, fast_math, expected)
498 {
499 ConvolutionMethod is_valid = NEConvolutionLayer::get_convolution_method(&input_info.clone()->set_is_resizable(true),
500 &weights_info.clone()->set_is_resizable(true),
501 &output_info.clone()->set_is_resizable(true), conv_info, WeightsInfo(), Size2D(1U, 1U), ActivationLayerInfo(), fast_math);
502 ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS);
503 }
504
505 TEST_SUITE(Conv3x3)
506 FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::PRECOMMIT,
507 combine(combine(combine(datasets::SmallWinogradConvolutionLayer3x3Dataset(),
508 framework::dataset::make("DataType", { DataType::F16 })),
509 ActivationFunctionsDataset),
510 framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
511
512 {
513 // Validate output
514 validate(Accessor(_target), _reference, tolerance_convolution_layer_f16, tolerance_num_f16);
515 }
516
517 FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY,
518 combine(combine(combine(datasets::LargeWinogradConvolutionLayer3x3Dataset(),
519 framework::dataset::make("DataType", { DataType::F16 })),
520 ActivationFunctionsDataset),
521 framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
522
523 {
524 // Validate output
525 validate(Accessor(_target), _reference, tolerance_convolution_layer_f16, tolerance_num_f16);
526 }
527 TEST_SUITE_END() // Conv3x3
528 TEST_SUITE_END() // FP16
529 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
530 TEST_SUITE_END() // WinogradLayer
531
532 #ifdef ARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS
533 TEST_SUITE(FIXED_FORMAT_KERNELS)
534 TEST_SUITE(VariableWeightUtils)
535
536 // UC2_1_* tests: the user requests a specific fixed format, but there is no kernel that supports it.
537
538 template <typename ConvolutionClass>
539 using HasOptImplFixtureNoFastMath = HasOptImplFixture<ConvolutionClass, /*enable_fast_math*/ false>;
540
541 template <typename ConvolutionClass>
542 using HasOptImplFixtureFastMath = HasOptImplFixture<ConvolutionClass, /*enable_fast_math*/ true>;
543
544 // UC2_1
545
546 FIXTURE_DATA_TEST_CASE(UC2_1_CpuGemmConv2d, HasOptImplFixtureNoFastMath<cpu::CpuGemmConv2d>, framework::DatasetMode::ALL,
547 combine(framework::dataset::make("DataType", { DataType::F32 }),
548 framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::OHWIo2 })))
549 {
550 ARM_COMPUTE_EXPECT(!_kernel_found, framework::LogLevel::ERRORS);
551 }
552 FIXTURE_DATA_TEST_CASE(UC2_1_NEGEMMConvolutionLayer, HasOptImplFixtureNoFastMath<NEGEMMConvolutionLayer>, framework::DatasetMode::ALL,
553 combine(framework::dataset::make("DataType", { DataType::F32 }),
554 framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::OHWIo2 })))
555 {
556 ARM_COMPUTE_EXPECT(!_kernel_found, framework::LogLevel::ERRORS);
557 }
558
559 FIXTURE_DATA_TEST_CASE(UC2_1_CpuGemmConv2d_FastMath, HasOptImplFixtureFastMath<cpu::CpuGemmConv2d>, framework::DatasetMode::ALL,
560 combine(framework::dataset::make("DataType", { DataType::F32 }),
561 framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::OHWIo2 })))
562 {
563 ARM_COMPUTE_EXPECT(!_kernel_found, framework::LogLevel::ERRORS);
564 }
565
566 FIXTURE_DATA_TEST_CASE(UC2_1_NEGEMMConvolutionLayer_FastMath, HasOptImplFixtureFastMath<NEGEMMConvolutionLayer>, framework::DatasetMode::ALL,
567 combine(framework::dataset::make("DataType", { DataType::F32 }),
568 framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::OHWIo2 })))
569 {
570 ARM_COMPUTE_EXPECT(!_kernel_found, framework::LogLevel::ERRORS);
571 }
572
573 // UC2_2_* tests: the user requests a specific fixed format, and a
574 // kernel that support that fixed format is found.
575
576 FIXTURE_DATA_TEST_CASE(UC2_2_CpuGemmConv2d, HasOptImplFixtureNoFastMath<cpu::CpuGemmConv2d>, framework::DatasetMode::ALL,
577 combine(framework::dataset::make("DataType", { DataType::F32 }),
578 framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::OHWIo4 })))
579 {
580 ARM_COMPUTE_EXPECT(_kernel_found, framework::LogLevel::ERRORS);
581 ARM_COMPUTE_EXPECT(_computed_weight_format == arm_compute::WeightFormat::OHWIo4, framework::LogLevel::ERRORS);
582 }
583
584 FIXTURE_DATA_TEST_CASE(UC2_2_NEGEMMConvolutionLayer, HasOptImplFixtureNoFastMath<NEGEMMConvolutionLayer>, framework::DatasetMode::ALL,
585 combine(framework::dataset::make("DataType", { DataType::F32 }),
586 framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::OHWIo4 })))
587 {
588 ARM_COMPUTE_EXPECT(_kernel_found, framework::LogLevel::ERRORS);
589 ARM_COMPUTE_EXPECT(_computed_weight_format == arm_compute::WeightFormat::OHWIo4, framework::LogLevel::ERRORS);
590 }
591
592 #if defined(ARM_COMPUTE_ENABLE_BF16)
593
594 FIXTURE_DATA_TEST_CASE(UC2_2_CpuGemmConv2d_FastMath, HasOptImplFixtureFastMath<cpu::CpuGemmConv2d>, framework::DatasetMode::ALL,
595 combine(framework::dataset::make("DataType", { DataType::F32 }),
596 framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::OHWIo8i4_bf16 })))
597 {
598 ARM_COMPUTE_EXPECT(_kernel_found, framework::LogLevel::ERRORS);
599 ARM_COMPUTE_EXPECT_EQUAL(_computed_weight_format, arm_compute::WeightFormat::OHWIo8i4_bf16, framework::LogLevel::ERRORS);
600 }
601
602 FIXTURE_DATA_TEST_CASE(UC2_2_NEGEMMConvolutionLayer_FastMath, HasOptImplFixtureFastMath<NEGEMMConvolutionLayer>, framework::DatasetMode::ALL,
603 combine(framework::dataset::make("DataType", { DataType::F32 }),
604 framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::OHWIo8i4_bf16 })))
605 {
606 ARM_COMPUTE_EXPECT(_kernel_found, framework::LogLevel::ERRORS);
607 ARM_COMPUTE_EXPECT(_computed_weight_format == arm_compute::WeightFormat::OHWIo8i4_bf16, framework::LogLevel::ERRORS);
608 }
609
610 #endif // ARM_COMPUTE_ENABLE_BF16
611
612 // UC3_1_* tests: the user queries for ANY fixed format, but there is
613 // no kernel that support the use case specified by the user (for
614 // example, there is no fixed format kernel for the datatype of the
615 // problem).
616
617 FIXTURE_DATA_TEST_CASE(UC3_1_CpuGemmConv2d, HasOptImplFixtureNoFastMath<cpu::CpuGemmConv2d>, framework::DatasetMode::ALL,
618 combine(framework::dataset::make("DataType", { DataType::S32 }),
619 framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::ANY })))
620 {
621 ARM_COMPUTE_EXPECT(!_kernel_found, framework::LogLevel::ERRORS);
622 }
623
624 FIXTURE_DATA_TEST_CASE(UC3_1_NEGEMMConvolutionLayer, HasOptImplFixtureNoFastMath<NEGEMMConvolutionLayer>, framework::DatasetMode::ALL,
625 combine(framework::dataset::make("DataType", { DataType::S32 }),
626 framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::ANY })))
627 {
628 ARM_COMPUTE_EXPECT(!_kernel_found, framework::LogLevel::ERRORS);
629 }
630
631 FIXTURE_DATA_TEST_CASE(UC3_1_CpuGemmConv2d_FastMath, HasOptImplFixtureFastMath<cpu::CpuGemmConv2d>, framework::DatasetMode::ALL,
632 combine(framework::dataset::make("DataType", { DataType::S32 }),
633 framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::ANY })))
634 {
635 ARM_COMPUTE_EXPECT(!_kernel_found, framework::LogLevel::ERRORS);
636 }
637
638 FIXTURE_DATA_TEST_CASE(UC3_1_NEGEMMConvolutionLayer_FastMath, HasOptImplFixtureFastMath<NEGEMMConvolutionLayer>, framework::DatasetMode::ALL,
639 combine(framework::dataset::make("DataType", { DataType::S32 }),
640 framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::ANY })))
641 {
642 ARM_COMPUTE_EXPECT(!_kernel_found, framework::LogLevel::ERRORS);
643 }
644
645 // UC3_2_* tests: the user queries for ANY fixed format. The search
646 // succeeded and the fixed format found is prompted back for
647 // consumption by the user. Note that we just test the
648 // _computed_weight_format to be anything but not the formats that are
649 // not fixed formats (ANY and UNSPECIFIED). This is because the weight
650 // format that the runtime produces depends on the size of the vector
651 // units of the hardware where the tests is executed. For example, a
652 // format like OHWIo4 for FP32 data returned for 128-bit NEON hardware
653 // is replaced by OHWIo8 when running on 256-bit SVE.
654
655 FIXTURE_DATA_TEST_CASE(UC3_2_CpuGemmConv2d, HasOptImplFixtureNoFastMath<cpu::CpuGemmConv2d>, framework::DatasetMode::ALL,
656 combine(framework::dataset::make("DataType", { DataType::F32 }),
657 framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::ANY })))
658 {
659 ARM_COMPUTE_EXPECT(_kernel_found, framework::LogLevel::ERRORS);
660 ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::ANY, framework::LogLevel::ERRORS);
661 ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::UNSPECIFIED, framework::LogLevel::ERRORS);
662 }
663
664 FIXTURE_DATA_TEST_CASE(UC3_2_NEGEMMConvolutionLayer, HasOptImplFixtureNoFastMath<NEGEMMConvolutionLayer>, framework::DatasetMode::ALL,
665 combine(framework::dataset::make("DataType", { DataType::F32 }),
666 framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::ANY })))
667 {
668 ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::ANY, framework::LogLevel::ERRORS);
669 ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::UNSPECIFIED, framework::LogLevel::ERRORS);
670 }
671
672 #if defined(ARM_COMPUTE_ENABLE_BF16)
673
674 FIXTURE_DATA_TEST_CASE(UC3_2_CpuGemmConv2d_FastMath, HasOptImplFixtureFastMath<cpu::CpuGemmConv2d>, framework::DatasetMode::ALL,
675 combine(framework::dataset::make("DataType", { DataType::F32 }),
676 framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::ANY })))
677 {
678 ARM_COMPUTE_EXPECT(_kernel_found, framework::LogLevel::ERRORS);
679 ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::ANY, framework::LogLevel::ERRORS);
680 ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::UNSPECIFIED, framework::LogLevel::ERRORS);
681 ARM_COMPUTE_EXPECT(arm_compute::is_fixed_format_fast_math(_computed_weight_format), framework::LogLevel::ERRORS);
682 }
683
684 FIXTURE_DATA_TEST_CASE(UC3_2_NEGEMMConvolutionLayer_FastMath, HasOptImplFixtureFastMath<NEGEMMConvolutionLayer>, framework::DatasetMode::ALL,
685 combine(framework::dataset::make("DataType", { DataType::F32 }),
686 framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::ANY })))
687 {
688 ARM_COMPUTE_EXPECT(_kernel_found, framework::LogLevel::ERRORS);
689 ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::ANY, framework::LogLevel::ERRORS);
690 ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::UNSPECIFIED, framework::LogLevel::ERRORS);
691 ARM_COMPUTE_EXPECT(arm_compute::is_fixed_format_fast_math(_computed_weight_format), framework::LogLevel::ERRORS);
692 }
693
694 #endif // ARM_COMPUTE_ENABLE_BF16
695
696 namespace
697 {
698 using TestCaseType = std::tuple<TensorShape, TensorShape, arm_compute::WeightFormat>;
699 auto prepare_weights_shapes = framework::dataset::make("TensorShape",
700 {
701 // OHWIo<interleave_by>i<block_by>
702 //
703 // OHWI --> O'HWI', where:
704 //
705 // O'= smallest multiple of <interleave_by> such that O<=O'
706 // I'= smallest multiple of <block_by> such that I<=I'
707 //
708
709 // Change N for OHWIo4
710 TestCaseType({ { 1U, 1U, 1U, 1U }, { 1U, 1U, 1U, 4U }, arm_compute::WeightFormat::OHWIo4 }),
711 TestCaseType({ { 1U, 1U, 1U, 2U }, { 1U, 1U, 1U, 4U }, arm_compute::WeightFormat::OHWIo4 }),
712 TestCaseType({ { 1U, 1U, 1U, 3U }, { 1U, 1U, 1U, 4U }, arm_compute::WeightFormat::OHWIo4 }),
713 TestCaseType({ { 1U, 1U, 1U, 4U }, { 1U, 1U, 1U, 4U }, arm_compute::WeightFormat::OHWIo4 }),
714 TestCaseType({ { 1U, 1U, 1U, 5U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo4 }),
715 TestCaseType({ { 1U, 1U, 1U, 6U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo4 }),
716 TestCaseType({ { 1U, 1U, 1U, 7U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo4 }),
717 TestCaseType({ { 1U, 1U, 1U, 8U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo4 }),
718 TestCaseType({ { 1U, 1U, 1U, 9U }, { 1U, 1U, 1U, 12U }, arm_compute::WeightFormat::OHWIo4 }),
719 // // Change N for OHWIo8
720 TestCaseType({ { 1U, 1U, 1U, 1U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo8 }),
721 TestCaseType({ { 1U, 1U, 1U, 2U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo8 }),
722 TestCaseType({ { 1U, 1U, 1U, 3U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo8 }),
723 TestCaseType({ { 1U, 1U, 1U, 4U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo8 }),
724 TestCaseType({ { 1U, 1U, 1U, 5U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo8 }),
725 TestCaseType({ { 1U, 1U, 1U, 6U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo8 }),
726 TestCaseType({ { 1U, 1U, 1U, 7U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo8 }),
727 TestCaseType({ { 1U, 1U, 1U, 8U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo8 }),
728 TestCaseType({ { 1U, 1U, 1U, 9U }, { 1U, 1U, 1U, 16U }, arm_compute::WeightFormat::OHWIo8 }),
729 // // Change N for OHWIo4 when H, W and C are not 1
730 TestCaseType({ { 3U, 4U, 2U, 1U }, { 3, 4, 2, 4 }, arm_compute::WeightFormat::OHWIo4 }),
731 TestCaseType({ { 3U, 4U, 2U, 2U }, { 3, 4, 2, 4 }, arm_compute::WeightFormat::OHWIo4 }),
732 TestCaseType({ { 3U, 4U, 2U, 3U }, { 3, 4, 2, 4 }, arm_compute::WeightFormat::OHWIo4 }),
733 TestCaseType({ { 3U, 4U, 2U, 4U }, { 3, 4, 2, 4 }, arm_compute::WeightFormat::OHWIo4 }),
734 TestCaseType({ { 3U, 4U, 2U, 5U }, { 3, 4, 2, 8 }, arm_compute::WeightFormat::OHWIo4 }),
735 TestCaseType({ { 3U, 4U, 2U, 6U }, { 3, 4, 2, 8 }, arm_compute::WeightFormat::OHWIo4 }),
736 TestCaseType({ { 3U, 4U, 2U, 7U }, { 3, 4, 2, 8 }, arm_compute::WeightFormat::OHWIo4 }),
737 TestCaseType({ { 3U, 4U, 2U, 8U }, { 3, 4, 2, 8 }, arm_compute::WeightFormat::OHWIo4 }),
738 TestCaseType({ { 3U, 4U, 2U, 9U }, { 3, 4, 2, 12 }, arm_compute::WeightFormat::OHWIo4 }),
739
740 // // Fix N and move HWI around, with different data layouts and formats
741 TestCaseType({ { 2U, 4U, 3U, 5U }, { 2, 4, 3, 8 }, arm_compute::WeightFormat::OHWIo4 }),
742 TestCaseType({ { 3U, 4U, 2U, 5U }, { 3, 4, 2, 8 }, arm_compute::WeightFormat::OHWIo4 }),
743 TestCaseType({ { 2U, 4U, 3U, 9U }, { 2, 4, 3, 16 }, arm_compute::WeightFormat::OHWIo8 }),
744 TestCaseType({ { 3U, 4U, 2U, 9U }, { 3, 4, 2, 16 }, arm_compute::WeightFormat::OHWIo8 }),
745 TestCaseType({ { 1024U, 1U, 1U, 1001U }, { 1024, 1, 1, 1008 }, arm_compute::WeightFormat::OHWIo8 }),
746
747 // // Adding <block_by> on I (=C)
748 TestCaseType({ { 1U, 4U, 3U, 5U }, { 2, 4, 3, 8 }, arm_compute::WeightFormat::OHWIo4i2 }),
749 TestCaseType({ { 2U, 4U, 3U, 5U }, { 2, 4, 3, 8 }, arm_compute::WeightFormat::OHWIo4i2 }),
750 TestCaseType({ { 3U, 4U, 3U, 5U }, { 4, 4, 3, 8 }, arm_compute::WeightFormat::OHWIo4i2 }),
751
752 // ---------
753 TestCaseType({ { 2, 2, 1, 5 }, { 2, 2, 1, 8 }, arm_compute::WeightFormat::OHWIo4 }),
754 TestCaseType({ { 1, 2, 2, 5 }, { 1, 2, 2, 8 }, arm_compute::WeightFormat::OHWIo4 }),
755
756 });
757 } // unnamed namespace
758
DATA_TEST_CASE(PrepareWeightShape,framework::DatasetMode::ALL,prepare_weights_shapes,shapes)759 DATA_TEST_CASE(PrepareWeightShape, framework::DatasetMode::ALL,
760 prepare_weights_shapes, shapes)
761 {
762 const TensorShape input_shape = std::get<0>(shapes);
763 const TensorShape expected_shape = std::get<1>(shapes);
764 const arm_compute::WeightFormat wf = std::get<2>(shapes);
765 const DataType DT = DataType::F32;
766 const DataLayout DL = DataLayout::NHWC;
767 const auto TI = TensorInfo(input_shape, 1 /*num_channels, deprecated*/, DT, DL);
768 const TensorInfo computed_info = ::arm_compute::test::validation::prepare_weights(TI, wf);
769 ARM_COMPUTE_EXPECT_EQUAL(computed_info.tensor_shape(), expected_shape, framework::LogLevel::ERRORS);
770 }
771
772 TEST_SUITE_END() // VariableWeightUtils
773
774 TEST_SUITE(ExperimentalCpuAPIVariableWeightWithFixtures)
775
776 template <typename ScalarType>
777 using VarWidth = VariableWeightsFixture<cpu::CpuGemmConv2d, Tensor, Accessor, ScalarType, /*enable_fast_math*/ false>;
778
779 FIXTURE_DATA_TEST_CASE(RunSmallFloat, VarWidth<float>, framework::DatasetMode::ALL,
780 combine(combine(datasets::SmallConvolutionLayerDataset(),
781 framework::dataset::make("DataLayout", { DataLayout::NHWC })),
782 framework::dataset::make("ACL Scalar type", { DataType::F32 })))
783 {
784 // Validate output
785 validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, float(abs_tolerance_f32));
786 }
787
788 FIXTURE_DATA_TEST_CASE(RunSmallHalf, VarWidth<half>, framework::DatasetMode::ALL,
789 combine(combine(datasets::SmallConvolutionLayerDataset(),
790 framework::dataset::make("DataLayout", { DataLayout::NHWC })),
791 framework::dataset::make("ACL Scalar type", { DataType::F16 })))
792 {
793 // Validate output
794 validate(Accessor(_target), _reference, rel_tolerance_f16, 0.f, half(abs_tolerance_f16));
795 }
796
797 #if defined(ARM_COMPUTE_ENABLE_BF16)
798 template <typename ScalarType>
799 using VarWidthFastMath = VariableWeightsFixture<cpu::CpuGemmConv2d, Tensor, Accessor, ScalarType, /*enable_fast_math*/ true>;
800
801 FIXTURE_DATA_TEST_CASE(RunSmallFloatFastMath, VarWidthFastMath<float>, framework::DatasetMode::ALL,
802 combine(combine(datasets::SmallConvolutionLayerDataset(),
803 framework::dataset::make("DataLayout", { DataLayout::NHWC })),
804 framework::dataset::make("ACL Scalar type", { DataType::F32 })))
805 {
806 // Validate output
807 validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, float(abs_tolerance_f32));
808 }
809 #endif // ARM_COMPUTE_ENABLE_BF16
810
811 TEST_SUITE_END() // ExperimentalCpuAPIVariableWeightWithFixtures
812
813 TEST_SUITE(ExperimentalNEAPIVariableWeightWithFixtures)
814
815 template <typename ScalarType>
816 using NEGEMMVarWidth = VariableWeightsFixtureNEInterface<NEGEMMConvolutionLayer, Tensor, Accessor, ScalarType, /*enable_fast_math*/ false>;
817
818 FIXTURE_DATA_TEST_CASE(NEGEMMRunSmallFloat, NEGEMMVarWidth<float>, framework::DatasetMode::ALL,
819 combine(combine(datasets::SmallConvolutionLayerDataset(),
820 framework::dataset::make("DataLayout", { DataLayout::NHWC })),
821 framework::dataset::make("ACL Scalar type", { DataType::F32 })))
822 {
823 // Validate output
824 validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, float(abs_tolerance_f32));
825 }
826
827 FIXTURE_DATA_TEST_CASE(NEGEMMRunSmallHalf, NEGEMMVarWidth<half>, framework::DatasetMode::ALL,
828 combine(combine(datasets::SmallConvolutionLayerDataset(),
829 framework::dataset::make("DataLayout", { DataLayout::NHWC })),
830 framework::dataset::make("ACL Scalar type", { DataType::F16 })))
831 {
832 // Validate output
833 validate(Accessor(_target), _reference, rel_tolerance_f16, 0.f, half(abs_tolerance_f16));
834 }
835
836 #if defined(ARM_COMPUTE_ENABLE_BF16)
837 template <typename ScalarType>
838 using NEGEMMVarWidthFastMath = VariableWeightsFixtureNEInterface<NEGEMMConvolutionLayer, Tensor, Accessor, ScalarType, /*enable_fast_math*/ true>;
839
840 FIXTURE_DATA_TEST_CASE(NEGEMMRunSmallFloatFastMath, NEGEMMVarWidthFastMath<float>, framework::DatasetMode::ALL,
841 combine(combine(datasets::SmallConvolutionLayerDataset(),
842 framework::dataset::make("DataLayout", { DataLayout::NHWC })),
843 framework::dataset::make("ACL Scalar type", { DataType::F32 })))
844 {
845 // Validate output
846 validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, float(abs_tolerance_f32));
847 }
848 #endif // ARM_COMPUTE_ENABLE_BF16
849
850 TEST_SUITE_END() // ExperimentalNEAPIVariableWeightWithFixtures
851 TEST_SUITE_END() // FIXED_FORMAT_KERNELS
852
853 #endif // ARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS
854
855 TEST_SUITE(GEMMConvolutionLayer)
856 template <typename T>
857 using NEGEMMConvolutionLayerFixture = ConvolutionValidationFixture<Tensor, Accessor, NEConvolutionLayer, T>;
858 template <typename T>
859 using NEGEMMConvolutionLayerMixedDataLayoutFixture = ConvolutionValidationFixture<Tensor, Accessor, NEConvolutionLayer, T, true>;
860
861 /** Test case for memory injection in @ref cpu::CpuGemmConv2d.
862 *
863 * Configure the operator once and inject memory at run-time in multiple executions.
864 *
865 * Checks performed in order:
866 * - Both runs compute the same output
867 */
TEST_CASE(MemoryInjection,framework::DatasetMode::ALL)868 TEST_CASE(MemoryInjection, framework::DatasetMode::ALL)
869 {
870 auto conv = std::make_unique<cpu::CpuGemmConv2d>();
871 const auto src_info = TensorInfo(TensorShape(1U, 5U, 2U), 1, DataType::F32, DataLayout::NCHW);
872 const auto weight_info = TensorInfo(TensorShape(1U, 3U, 2U, 3U), 1, DataType::F32, DataLayout::NCHW);
873 const auto bias_info = TensorInfo(TensorShape(3U), 1, DataType::F32, DataLayout::NCHW);
874 auto dst_info = TensorInfo(TensorShape(1U, 7U, 3U), 1, DataType::F32, DataLayout::NCHW);
875 const auto conv_info = PadStrideInfo(1, 1, 0, 0, 2, 2, DimensionRoundingType::FLOOR);
876 WeightsInfo weights_info(false, 3U, 3U, 1U);
877 conv->configure(&src_info, &weight_info, &bias_info, &dst_info, conv_info, weights_info);
878
879 // tensors are newly created every call of this lambda function
880 auto src = create_tensor<Tensor>(src_info);
881 auto weight = create_tensor<Tensor>(weight_info);
882 auto bias = create_tensor<Tensor>(bias_info);
883 src.allocator()->allocate();
884 weight.allocator()->allocate();
885 bias.allocator()->allocate();
886
887 ITensorPack run_pack{ { TensorType::ACL_SRC_0, &src }, { TensorType::ACL_SRC_1, &weight }, { TensorType::ACL_SRC_2, &bias } };
888 ITensorPack prep_pack{ { TensorType::ACL_SRC_1, &weight }, { TensorType::ACL_SRC_2, &bias } };
889
890 auto mg = MemoryGroup{};
891 auto ws = manage_workspace<Tensor>(conv->workspace(), mg, run_pack, prep_pack);
892
893 auto run_conv = [&]() -> Tensor
894 {
895 auto dst = create_tensor<Tensor>(dst_info);
896 dst.allocator()->allocate();
897 run_pack.add_tensor(TensorType::ACL_DST, &dst);
898
899 library->fill_tensor_value(Accessor(src), 1.f);
900 library->fill_tensor_value(Accessor(weight), 2.f);
901 library->fill_tensor_value(Accessor(bias), 3.f);
902 // This operator is configured once and captured by this lambda.
903 conv->prepare(prep_pack);
904 conv->run(run_pack);
905 return dst;
906 };
907 auto result_0 = run_conv();
908 auto result_1 = run_conv();
909 for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i)
910 {
911 ARM_COMPUTE_EXPECT(((float *)result_0.buffer())[i] == ((float *)result_1.buffer())[i], framework::LogLevel::ERRORS);
912 }
913 }
914
915 /** Test case for memory injection in @ref NEGEMMConvolutionLayer.
916 *
917 * Make sure @ref NEGEMMConvolutionLayer still works through injecting the memory at configure time using the old API.
918 *
919 * Checks performed in order:
920 * - Both runs compute the same output
921 */
TEST_CASE(MultipleExecutionWithConfigure,framework::DatasetMode::ALL)922 TEST_CASE(MultipleExecutionWithConfigure, framework::DatasetMode::ALL)
923 {
924 auto conv = std::make_unique<NEGEMMConvolutionLayer>();
925 const auto src_info = TensorInfo(TensorShape(1U, 5U, 2U), 1, DataType::F32, DataLayout::NCHW);
926 const auto weight_info = TensorInfo(TensorShape(1U, 3U, 2U, 3U), 1, DataType::F32, DataLayout::NCHW);
927 const auto bias_info = TensorInfo(TensorShape(3U), 1, DataType::F32, DataLayout::NCHW);
928 auto dst_info = TensorInfo(TensorShape(1U, 7U, 3U), 1, DataType::F32, DataLayout::NCHW);
929 const auto conv_info = PadStrideInfo(1, 1, 0, 0, 2, 2, DimensionRoundingType::FLOOR);
930 WeightsInfo weights_info(false, 3U, 3U, 1U);
931 auto run_conv = [&]()
932 {
933 auto src = create_tensor<Tensor>(src_info);
934 auto weight = create_tensor<Tensor>(weight_info);
935 auto bias = create_tensor<Tensor>(bias_info);
936 auto dst = create_tensor<Tensor>(dst_info);
937 conv->configure(&src, &weight, &bias, &dst, conv_info, weights_info);
938 src.allocator()->allocate();
939 weight.allocator()->allocate();
940 bias.allocator()->allocate();
941 dst.allocator()->allocate();
942 library->fill_tensor_value(Accessor(src), 1.f);
943 library->fill_tensor_value(Accessor(weight), 2.f);
944 library->fill_tensor_value(Accessor(bias), 3.f);
945 conv->run();
946 return dst;
947 };
948 auto result_0 = run_conv();
949 auto result_1 = run_conv();
950 for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i)
951 {
952 ARM_COMPUTE_EXPECT(((float *)result_0.buffer())[i] == ((float *)result_1.buffer())[i], framework::LogLevel::ERRORS);
953 }
954 }
955
956 TEST_SUITE(Float)
957 #if defined(ARM_COMPUTE_ENABLE_BF16)
TEST_SUITE(BFLOAT16)958 TEST_SUITE(BFLOAT16)
959 FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
960 framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::BFLOAT16)), framework::dataset::make("DataLayout", { DataLayout::NHWC })),
961 ActivationFunctionsDataset))
962 {
963 // Validate output
964 validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, float(abs_tolerance_f32));
965 }
966 TEST_SUITE_END() // BFLOAT16
967 #endif /* defined(ARM_COMPUTE_ENABLE_BF16) */
968
969 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
TEST_SUITE(FP16)970 TEST_SUITE(FP16)
971 FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
972 framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("DataLayout", { DataLayout::NCHW })), ActivationFunctionsDataset))
973 {
974 // Validate output
975 validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_f16);
976 }
977 TEST_SUITE_END() // FP16
978 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
979
TEST_SUITE(FP32)980 TEST_SUITE(FP32)
981 FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
982 framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
983 ActivationFunctionsDataset))
984 {
985 // Validate output
986 validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, float(abs_tolerance_f32));
987 }
988 FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, NEGEMMConvolutionLayerMixedDataLayoutFixture<float>, framework::DatasetMode::ALL,
989 combine(combine(combine(combine(combine(combine(combine(combine(combine(
990 framework::dataset::make("Input", TensorShape(23U, 27U, 5U)),
991 framework::dataset::make("Weights", TensorShape(3U, 3U, 5U, 2U))),
992 framework::dataset::make("Bias", TensorShape(2U))),
993 framework::dataset::make("Output", TensorShape(11U, 25U, 2U))),
994 framework::dataset::make("PadStrideInfo", PadStrideInfo(2, 1, 0, 0))),
995 framework::dataset::make("Dilation", Size2D(1, 1))),
996 framework::dataset::make("ReshapeWeights", { true })),
997 framework::dataset::make("DataType", DataType::F32)),
998 framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
999 ActivationFunctionsDataset))
1000 {
1001 // Validate output
1002 validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, float(abs_tolerance_f32));
1003 }
1004 TEST_SUITE_END() // FP32
1005 TEST_SUITE_END() // Float
1006
1007 template <typename T>
1008 using NEGEMMConvolutionLayerQuantizedFixture = ConvolutionValidationQuantizedFixture<Tensor, Accessor, NEConvolutionLayer, T>;
1009 template <typename T>
1010 using NEGEMMConvolutionLayerQuantizedMixedDataLayoutFixture = ConvolutionValidationQuantizedFixture<Tensor, Accessor, NEConvolutionLayer, T, true>;
1011
1012 template <typename T>
1013 using NEGEMMConvolutionLayerQuantizedPerChannelFixture = ConvolutionValidationQuantizedPerChannelFixture<Tensor, Accessor, NEConvolutionLayer, T, int8_t>;
1014
1015 const auto QuantizedActivationFunctionsDataset = framework::dataset::make("ActivationInfo",
1016 {
1017 ActivationLayerInfo(),
1018 ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
1019 ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f)
1020 });
1021 TEST_SUITE(Quantized)
TEST_SUITE(QASYMM8)1022 TEST_SUITE(QASYMM8)
1023 FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
1024 framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::QASYMM8)), framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
1025 framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })), QuantizedActivationFunctionsDataset))
1026 {
1027 // Validate output
1028 validate(Accessor(_target), _reference, tolerance_qasymm8);
1029 }
1030 FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, NEGEMMConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::ALL,
1031 combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
1032 framework::dataset::make("Input", TensorShape(23U, 27U, 5U)),
1033 framework::dataset::make("Weights", TensorShape(3U, 3U, 5U, 2U))),
1034 framework::dataset::make("Bias", TensorShape(2U))),
1035 framework::dataset::make("Output", TensorShape(11U, 25U, 2U))),
1036 framework::dataset::make("PadStrideInfo", PadStrideInfo(2, 1, 0, 0))),
1037 framework::dataset::make("Dilation", Size2D(1, 1))),
1038 framework::dataset::make("ReshapeWeights", { true })),
1039 framework::dataset::make("DataType", DataType::QASYMM8)),
1040 framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
1041 framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })),
1042 QuantizedActivationFunctionsDataset))
1043 {
1044 // Validate output
1045 validate(Accessor(_target), _reference, tolerance_qasymm8);
1046 }
1047 TEST_SUITE_END() // QASYMM8
1048
TEST_SUITE(QASYMM8_SIGNED)1049 TEST_SUITE(QASYMM8_SIGNED)
1050 FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
1051 framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
1052 framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.01f, -10) })), QuantizedActivationFunctionsDataset))
1053 {
1054 // Validate output
1055 validate(Accessor(_target), _reference, tolerance_qasymm8);
1056 }
1057 FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, NEGEMMConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::ALL,
1058 combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
1059 framework::dataset::make("Input", TensorShape(23U, 27U, 5U)),
1060 framework::dataset::make("Weights", TensorShape(3U, 3U, 5U, 2U))),
1061 framework::dataset::make("Bias", TensorShape(2U))),
1062 framework::dataset::make("Output", TensorShape(11U, 25U, 2U))),
1063 framework::dataset::make("PadStrideInfo", PadStrideInfo(2, 1, 0, 0))),
1064 framework::dataset::make("Dilation", Size2D(1, 1))),
1065 framework::dataset::make("ReshapeWeights", { true })),
1066 framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
1067 framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
1068 framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })),
1069 QuantizedActivationFunctionsDataset))
1070 {
1071 // Validate output
1072 validate(Accessor(_target), _reference, tolerance_qasymm8);
1073 }
1074 TEST_SUITE_END() // QASYMM8_SIGNED
1075
TEST_SUITE(QSYMM8_PER_CHANNEL)1076 TEST_SUITE(QSYMM8_PER_CHANNEL)
1077 FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerQuantizedPerChannelFixture<uint8_t>, framework::DatasetMode::ALL,
1078 combine(combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
1079 framework::dataset::make("ReshapeWeights", { true })),
1080 framework::dataset::make("DataType", { DataType::QASYMM8 })),
1081 framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
1082 QuantizationData),
1083 QuantizedActivationFunctionsDataset),
1084 framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL })))
1085 {
1086 // Validate output
1087 validate(Accessor(_target), _reference, tolerance_qasymm8);
1088 }
1089 FIXTURE_DATA_TEST_CASE(RunSmallSigned, NEGEMMConvolutionLayerQuantizedPerChannelFixture<int8_t>, framework::DatasetMode::ALL,
1090 combine(combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
1091 framework::dataset::make("ReshapeWeights", { true })),
1092 framework::dataset::make("DataType", { DataType::QASYMM8_SIGNED })),
1093 framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
1094 QuantizationData),
1095 QuantizedActivationFunctionsDataset),
1096 framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL })))
1097 {
1098 // Validate output
1099 validate(Accessor(_target), _reference, tolerance_qasymm8);
1100 }
1101 TEST_SUITE_END() // QSYMM8_PER_CHANNEL
1102 TEST_SUITE_END() // Quantized
1103
1104 TEST_SUITE_END() // GEMMConvolutionLayer
1105
1106 TEST_SUITE(DirectGEMMConv2d)
1107 template <typename T>
1108 using NEDirectGEMMConv2dLayerFixture = ConvolutionValidationFixture<Tensor, Accessor, NEGEMMConv2d, T>;
1109
1110 /** Test case for memory injection in @ref cpu::CpuGemmDirectConv2d.
1111 *
1112 * Configure the operator once and inject memory at run-time in multiple executions.
1113 *
1114 * Checks performed in order:
1115 * - Both runs compute the same output
1116 */
TEST_CASE(MemoryInjection,framework::DatasetMode::ALL)1117 TEST_CASE(MemoryInjection, framework::DatasetMode::ALL)
1118 {
1119 auto conv = std::make_unique<cpu::CpuGemmDirectConv2d>();
1120 const auto src_info = TensorInfo(TensorShape(1U, 5U, 2U), 1, DataType::F32, DataLayout::NHWC);
1121 const auto weight_info = TensorInfo(TensorShape(1U, 3U, 2U, 3U), 1, DataType::F32, DataLayout::NHWC);
1122 const auto bias_info = TensorInfo(TensorShape(3U), 1, DataType::F32, DataLayout::NHWC);
1123 auto dst_info = TensorInfo(TensorShape(1U, 7U, 3U), 1, DataType::F32, DataLayout::NHWC);
1124 const auto conv_info = Conv2dInfo{};
1125 conv->configure(&src_info, &weight_info, &bias_info, &dst_info, conv_info);
1126
1127 // tensors are newly created every call of this lambda function
1128 auto src = create_tensor<Tensor>(src_info);
1129 auto weight = create_tensor<Tensor>(weight_info);
1130 auto bias = create_tensor<Tensor>(bias_info);
1131 src.allocator()->allocate();
1132 weight.allocator()->allocate();
1133 bias.allocator()->allocate();
1134
1135 ITensorPack run_pack{ { TensorType::ACL_SRC_0, &src }, { TensorType::ACL_SRC_1, &weight }, { TensorType::ACL_SRC_2, &bias } };
1136 ITensorPack prep_pack{ { TensorType::ACL_SRC_1, &weight }, { TensorType::ACL_SRC_2, &bias } };
1137
1138 auto mg = MemoryGroup{};
1139 auto ws = manage_workspace<Tensor>(conv->workspace(), mg, run_pack, prep_pack);
1140
1141 auto run_conv = [&]() -> Tensor
1142 {
1143 auto dst = create_tensor<Tensor>(dst_info);
1144 dst.allocator()->allocate();
1145 run_pack.add_tensor(TensorType::ACL_DST, &dst);
1146
1147 library->fill_tensor_value(Accessor(src), 1.f);
1148 library->fill_tensor_value(Accessor(weight), 2.f);
1149 library->fill_tensor_value(Accessor(bias), 3.f);
1150 // This operator is configured once and captured by this lambda.
1151 conv->prepare(prep_pack);
1152 conv->run(run_pack);
1153 return dst;
1154 };
1155 auto result_0 = run_conv();
1156 auto result_1 = run_conv();
1157 for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i)
1158 {
1159 ARM_COMPUTE_EXPECT(((float *)result_0.buffer())[i] == ((float *)result_1.buffer())[i], framework::LogLevel::ERRORS);
1160 }
1161 }
1162
1163 /** Test case for memory injection in @ref NEGEMMConv2d.
1164 *
1165 * Make sure @ref NEGEMMConv2d still works through injecting the memory at configure time using the old API.
1166 *
1167 * Checks performed in order:
1168 * - Both runs compute the same output
1169 */
TEST_CASE(MultipleExecutionWithConfigure,framework::DatasetMode::ALL)1170 TEST_CASE(MultipleExecutionWithConfigure, framework::DatasetMode::ALL)
1171 {
1172 auto conv = std::make_unique<NEGEMMConv2d>();
1173 const auto src_info = TensorInfo(TensorShape(1U, 5U, 2U), 1, DataType::F32, DataLayout::NHWC);
1174 const auto weight_info = TensorInfo(TensorShape(1U, 3U, 2U, 3U), 1, DataType::F32, DataLayout::NHWC);
1175 const auto bias_info = TensorInfo(TensorShape(3U), 1, DataType::F32, DataLayout::NHWC);
1176 auto dst_info = TensorInfo(TensorShape(1U, 7U, 3U), 1, DataType::F32, DataLayout::NHWC);
1177 const auto conv_info = Conv2dInfo{};
1178 auto run_conv = [&]()
1179 {
1180 auto src = create_tensor<Tensor>(src_info);
1181 auto weight = create_tensor<Tensor>(weight_info);
1182 auto bias = create_tensor<Tensor>(bias_info);
1183 auto dst = create_tensor<Tensor>(dst_info);
1184 conv->configure(&src, &weight, &bias, &dst, conv_info);
1185 src.allocator()->allocate();
1186 weight.allocator()->allocate();
1187 bias.allocator()->allocate();
1188 dst.allocator()->allocate();
1189 library->fill_tensor_value(Accessor(src), 1.f);
1190 library->fill_tensor_value(Accessor(weight), 2.f);
1191 library->fill_tensor_value(Accessor(bias), 3.f);
1192 conv->run();
1193 return dst;
1194 };
1195 auto result_0 = run_conv();
1196 auto result_1 = run_conv();
1197 for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i)
1198 {
1199 ARM_COMPUTE_EXPECT(((float *)result_0.buffer())[i] == ((float *)result_1.buffer())[i], framework::LogLevel::ERRORS);
1200 }
1201 }
1202
1203 TEST_SUITE(Float)
TEST_SUITE(FP32)1204 TEST_SUITE(FP32)
1205 FIXTURE_DATA_TEST_CASE(RunSmall, NEDirectGEMMConv2dLayerFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
1206 framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("DataLayout", { DataLayout::NHWC })), ActivationFunctionsDataset))
1207 {
1208 // Validate output
1209 validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, float(abs_tolerance_f32));
1210 }
1211 TEST_SUITE_END() // FP32
1212 TEST_SUITE_END() // Float
1213
1214 #ifdef __aarch64__
1215 template <typename T>
1216 using NEDirectGEMMConv2dLayerQuantizedFixture = ConvolutionValidationQuantizedFixture<Tensor, Accessor, NEGEMMConv2d, T>;
1217
1218 template <typename T>
1219 using NEDirectGEMMConv2dLayerQuantizedPerChannelFixture = ConvolutionValidationQuantizedPerChannelFixture<Tensor, Accessor, NEGEMMConv2d, T, int8_t>;
1220
1221 const auto QuantizedActivationFunctionsDataset = framework::dataset::make("ActivationInfo",
1222 {
1223 ActivationLayerInfo(),
1224 ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
1225 ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f)
1226 });
1227 TEST_SUITE(Quantized)
TEST_SUITE(QASYMM8)1228 TEST_SUITE(QASYMM8)
1229 FIXTURE_DATA_TEST_CASE(RunSmall, NEDirectGEMMConv2dLayerQuantizedFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
1230 framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::QASYMM8)), framework::dataset::make("DataLayout", { DataLayout::NHWC })),
1231 framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })), QuantizedActivationFunctionsDataset))
1232 {
1233 // Validate output
1234 validate(Accessor(_target), _reference, tolerance_qasymm8);
1235 }
1236 TEST_SUITE_END() // QASYMM8
1237
TEST_SUITE(QASYMM8_SIGNED)1238 TEST_SUITE(QASYMM8_SIGNED)
1239 FIXTURE_DATA_TEST_CASE(RunSmall, NEDirectGEMMConv2dLayerQuantizedFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
1240 framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), framework::dataset::make("DataLayout", { DataLayout::NHWC })),
1241 framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.01f, -10) })), QuantizedActivationFunctionsDataset))
1242 {
1243 // Validate output
1244 validate(Accessor(_target), _reference, tolerance_qasymm8);
1245 }
1246 TEST_SUITE_END() // QASYMM8_SIGNED
1247
TEST_SUITE(QSYMM8_PER_CHANNEL)1248 TEST_SUITE(QSYMM8_PER_CHANNEL)
1249 FIXTURE_DATA_TEST_CASE(RunSmallSigned, NEDirectGEMMConv2dLayerQuantizedPerChannelFixture<int8_t>, framework::DatasetMode::ALL,
1250 combine(combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
1251 framework::dataset::make("ReshapeWeights", { true })),
1252 framework::dataset::make("DataType", { DataType::QASYMM8_SIGNED })),
1253 framework::dataset::make("DataLayout", { DataLayout::NHWC })),
1254 QuantizationData),
1255 QuantizedActivationFunctionsDataset),
1256 framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL })))
1257 {
1258 // Validate output
1259 validate(Accessor(_target), _reference, tolerance_qasymm8);
1260 }
1261 TEST_SUITE_END() // QSYMM8_PER_CHANNEL
1262 TEST_SUITE_END() // Quantized
1263 #endif // __aarch64__
1264
1265 TEST_SUITE_END() // DirectGEMMConv2d
1266
1267 TEST_SUITE_END() // Neon
1268 } // namespace validation
1269 } // namespace test
1270 } // namespace arm_compute
1271