xref: /aosp_15_r20/external/ComputeLibrary/tests/validation/NEON/ConvolutionLayer.cpp (revision c217d954acce2dbc11938adb493fc0abd69584f3)
1 /*
2  * Copyright (c) 2017-2023 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #include "arm_compute/core/Types.h"
25 #include "arm_compute/runtime/NEON/functions/NEConvolutionLayer.h"
26 #include "arm_compute/runtime/NEON/functions/NEGEMMConv2d.h"
27 #include "arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h"
28 #include "arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h"
29 #include "arm_compute/runtime/Tensor.h"
30 #include "arm_compute/runtime/TensorAllocator.h"
31 #include "src/core/helpers/MemoryHelpers.h"
32 #include "src/cpu/operators/CpuGemmConv2d.h"
33 #include "src/cpu/operators/CpuGemmDirectConv2d.h"
34 #include "src/cpu/operators/CpuWinogradConv2d.h"
35 #include "tests/NEON/Accessor.h"
36 #include "tests/PaddingCalculator.h"
37 #include "tests/datasets/LargeConvolutionLayerDataset.h"
38 #include "tests/datasets/SmallConvolutionLayerDataset.h"
39 #include "tests/datasets/TinyConvolutionLayerDataset.h"
40 #include "tests/framework/Asserts.h"
41 #include "tests/framework/Macros.h"
42 #include "tests/framework/datasets/Datasets.h"
43 #include "tests/validation/Validation.h"
44 #include "tests/validation/fixtures/ConvolutionLayerFixture.h"
45 #include "tests/validation/fixtures/WinogradConvolutionLayerFixture.h"
46 
47 namespace arm_compute
48 {
49 namespace test
50 {
51 namespace validation
52 {
53 namespace detail
54 {
55 template <>
configure_conv_function(NEGEMMConv2d & func,Tensor * src,const Tensor * weights,const Tensor * bias,Tensor * dst,const PadStrideInfo & info,const WeightsInfo & weights_info,const Size2D & dilation,const ActivationLayerInfo & act_info,unsigned int num_groups)56 void configure_conv_function<NEGEMMConv2d, Tensor>(NEGEMMConv2d &func,
57                                                    Tensor *src, const Tensor *weights, const Tensor *bias, Tensor *dst,
58                                                    const PadStrideInfo &info, const WeightsInfo &weights_info,
59                                                    const Size2D &dilation, const ActivationLayerInfo &act_info, unsigned int num_groups)
60 {
61     ARM_COMPUTE_UNUSED(weights_info);
62 
63     Conv2dInfo conv_info(info, dilation, act_info, false, num_groups);
64     func.configure(src, weights, bias, dst, conv_info);
65 }
66 } // namespace detail
67 namespace
68 {
69 const RelativeTolerance<float> rel_tolerance_f32(0.01f);              /**< Relative tolerance for FP32 types */
70 const RelativeTolerance<float> rel_tolerance_winograd_3x3_f32(0.05f); /**< Relative tolerance for FP32 types */
71 const AbsoluteTolerance<float> abs_tolerance_f32(0.002f);             /**< Absolute tolerance for FP32 types */
72 const AbsoluteTolerance<float> abs_tolerance_1xN_f32(0.0041f);        /**< Absolute tolerance for FP32 types */
73 
74 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
75 const AbsoluteTolerance<half> tolerance_convolution_layer_f16(half(0.4f));
76 constexpr float               tolerance_num_f16 = 0.15f;
77 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
78 
79 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
80 const RelativeTolerance<half_float::half> rel_tolerance_f16(half_float::half(0.2f)); /**< Relative tolerance value for FP16 types */
81 const AbsoluteTolerance<float>            abs_tolerance_f16(0.2f);                   /**< Absolute tolerance for FP16 types */
82 constexpr float                           tolerance_num = 0.07f;                     /**< Tolerance number for the FP16 implementation */
83 #endif                                                                               /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
84 constexpr AbsoluteTolerance<float> tolerance_qasymm8(0.0);                           /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */
85 
86 /** CNN data types */
87 const auto CNNDataTypes = framework::dataset::make("DataType",
88 {
89 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
90     DataType::F16,
91 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
92     DataType::F32,
93     DataType::QASYMM8,
94 });
95 const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo",
96 {
97     ActivationLayerInfo(),
98     ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
99     ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 0.5f)
100 });
101 
102 const auto QuantizationData = framework::dataset::make("QuantizationInfo",
103 {
104     QuantizationInfo(0.5f, 10),
105     QuantizationInfo(0.3f, 3),
106     QuantizationInfo(1.f, 10),
107     QuantizationInfo(1.1f, 10),
108 });
109 } // namespace
110 
111 TEST_SUITE(NEON)
TEST_SUITE(ConvolutionLayer)112 TEST_SUITE(ConvolutionLayer)
113 
114 // *INDENT-OFF*
115 // clang-format off
116 DATA_TEST_CASE(ValidateConvolutionMethod, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(
117                                           framework::dataset::make("InputInfo", { TensorInfo(TensorShape(18U, 18U, 32U), 1, DataType::F32),
118                                                                                   TensorInfo(TensorShape(23U, 27U, 32U, 4U), 1, DataType::F32),
119                                                                                   TensorInfo(TensorShape(3U, 3U, 2U, 1U), 1, DataType::F32),
120                                                                                   TensorInfo(TensorShape(33U, 27U, 7U, 4U), 1, DataType::F32)
121                                           }),
122                                           framework::dataset::make("WeightsInfo", { TensorInfo(TensorShape(3U, 3U, 32U, 21U), 1, DataType::F32),
123                                                                                     TensorInfo(TensorShape(5U, 5U, 32U, 21U), 1, DataType::F32),
124                                                                                     TensorInfo(TensorShape(3U, 3U, 5U, 21U), 1, DataType::F32),
125                                                                                     TensorInfo(TensorShape(5U, 5U, 7U, 16U), 1, DataType::F16)
126                                           })),
127                                           framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(16U, 16U, 21U), 1, DataType::F32),
128                                                                                    TensorInfo(TensorShape(19U, 23U, 21U, 4U), 1, DataType::F32),
129                                                                                    TensorInfo(TensorShape(11U, 25U, 21U), 1, DataType::F32),
130                                                                                    TensorInfo(TensorShape(11U, 12U, 16U, 4U), 1, DataType::F32)
131                                           })),
132                                           framework::dataset::make("ConvInfo", { PadStrideInfo(1, 1, 0, 0),
133                                                                                  PadStrideInfo(1, 1, 0, 0),
134                                                                                  PadStrideInfo(2, 1, 0, 0),
135                                                                                  PadStrideInfo(3, 2, 1, 0)
136                                           })),
137                                           framework::dataset::make("FastMath", { true,
138                                                                                  true,
139                                                                                  false,
140                                                                                  false
141                                           })),
142                                                                            framework::dataset::make("Expected", { ConvolutionMethod::WINOGRAD, ConvolutionMethod::WINOGRAD, ConvolutionMethod::GEMM, ConvolutionMethod::GEMM })),
143                input_info, weights_info, output_info, conv_info, fast_math, expected)
144 {
145     ConvolutionMethod is_valid = NEConvolutionLayer::get_convolution_method(&input_info.clone()->set_is_resizable(true),
146                                                                             &weights_info.clone()->set_is_resizable(true),
147                                                                             &output_info.clone()->set_is_resizable(true), conv_info, WeightsInfo(), Size2D(1U, 1U), ActivationLayerInfo(), fast_math);
148     ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS);
149 }
150 // clang-format on
151 // *INDENT-ON*
152 TEST_SUITE_END() // ConvolutionLayer
153 
154 TEST_SUITE(WinogradLayer)
155 template <typename T>
156 using NEWinogradConvolutionLayerFixture = WinogradConvolutionLayerFastMathValidationFixture<Tensor, Accessor, NEWinogradConvolutionLayer, T>;
157 template <typename T>
158 using NEWinogradConvolutionLayerMixedDataLayoutFixture = WinogradConvolutionLayerFastMathValidationFixture<Tensor, Accessor, NEWinogradConvolutionLayer, T, T, true, true>;
159 
160 template <typename T>
161 using NEWinogradConvolutionLayerNoBiasFixture = WinogradConvolutionLayerFastMathValidationFixture<Tensor, Accessor, NEWinogradConvolutionLayer, T, T, false>;
162 
163 /** Test case for memory injection in @ref cpu::CpuWinogradConv2d.
164  *
165  * Configure the operator once and inject memory at run-time in multiple executions.
166  *
167  * Checks performed in order:
168  * - Both runs compute the same output
169  */
TEST_CASE(MemoryInjection,framework::DatasetMode::ALL)170 TEST_CASE(MemoryInjection, framework::DatasetMode::ALL)
171 {
172     auto                winograd = std::make_unique<cpu::CpuWinogradConv2d>();
173     const auto          src_info = TensorInfo(TensorShape(8U, 8U, 32U), 1, DataType::F32);
174     const auto          w_info   = TensorInfo(TensorShape(1U), 1, DataType::F32);
175     const auto          b_info   = TensorInfo(TensorShape(1U, 3U, 32U, 1U), 1, DataType::F32);
176     auto                dst_info = TensorInfo(TensorShape(8U, 6U, 1U), 1, DataType::F32);
177     const PadStrideInfo pad_info{};
178 
179     winograd->configure(&src_info, &b_info, &w_info, &dst_info, pad_info);
180 
181     // telhs are newly created every call of this lambda function
182     auto a = create_tensor<Tensor>(src_info);
183     auto b = create_tensor<Tensor>(b_info);
184     auto c = create_tensor<Tensor>(w_info);
185     a.allocator()->allocate();
186     b.allocator()->allocate();
187     c.allocator()->allocate();
188 
189     ITensorPack run_pack{ { TensorType::ACL_SRC_0, &a }, { TensorType::ACL_SRC_1, &b }, { TensorType::ACL_SRC_2, &c } };
190     ITensorPack prep_pack{ { TensorType::ACL_SRC_1, &b }, { TensorType::ACL_SRC_2, &c } };
191 
192     auto mg       = MemoryGroup{};
193     auto ws       = manage_workspace<Tensor>(winograd->workspace(), mg, run_pack, prep_pack);
194     auto run_conv = [&]() -> Tensor
195     {
196         auto dst = create_tensor<Tensor>(dst_info);
197         dst.allocator()->allocate();
198 
199         run_pack.add_tensor(TensorType::ACL_DST, &dst);
200         library->fill_tensor_value(Accessor(a), 1.f);
201         library->fill_tensor_value(Accessor(b), 2.f);
202         library->fill_tensor_value(Accessor(c), 3.f);
203 
204         // This operator is configured once and captured by this lambda.
205         winograd->prepare(prep_pack);
206         winograd->run(run_pack);
207         return dst;
208     };
209 
210     auto result_0 = run_conv();
211     auto result_1 = run_conv();
212 
213     for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i)
214     {
215         ARM_COMPUTE_EXPECT(((float *)result_0.buffer())[i] == ((float *)result_1.buffer())[i], framework::LogLevel::ERRORS);
216     }
217 }
218 
219 /** Test case for memory injection in @ref NEWinogradConvolutionLayer.
220  *
221  * Make sure @ref NEWinogradConvolutionLayer still works through injecting the memory at configure time using the old API.
222  *
223  * Checks performed in order:
224  * - Both runs compute the same output
225  */
TEST_CASE(MultipleExecutionWithConfigure,framework::DatasetMode::ALL)226 TEST_CASE(MultipleExecutionWithConfigure, framework::DatasetMode::ALL)
227 {
228     auto                gemm     = std::make_unique<NEWinogradConvolutionLayer>();
229     const auto          src_info = TensorInfo(TensorShape(8U, 8U, 32U), 1, DataType::F32);
230     const auto          w_info   = TensorInfo(TensorShape(1U), 1, DataType::F32);
231     const auto          b_info   = TensorInfo(TensorShape(1U, 3U, 32U, 1U), 1, DataType::F32);
232     auto                dst_info = TensorInfo(TensorShape(8U, 6U, 1U), 1, DataType::F32);
233     const PadStrideInfo pad_info{};
234 
235     auto run_conv = [&]()
236     {
237         auto src = create_tensor<Tensor>(src_info);
238         auto w   = create_tensor<Tensor>(w_info);
239         auto b   = create_tensor<Tensor>(b_info);
240         auto dst = create_tensor<Tensor>(dst_info);
241 
242         gemm->configure(&src, &b, &w, &dst, pad_info);
243 
244         src.allocator()->allocate();
245         b.allocator()->allocate();
246         w.allocator()->allocate();
247         dst.allocator()->allocate();
248 
249         library->fill_tensor_value(Accessor(src), 1.f);
250         library->fill_tensor_value(Accessor(b), 2.f);
251         library->fill_tensor_value(Accessor(w), 3.f);
252         gemm->run();
253         return dst;
254     };
255 
256     auto result_0 = run_conv();
257     auto result_1 = run_conv();
258 
259     for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i)
260     {
261         ARM_COMPUTE_EXPECT(((float *)result_0.buffer())[i] == ((float *)result_1.buffer())[i], framework::LogLevel::ERRORS);
262     }
263 }
264 
265 TEST_SUITE(FP32)
266 
TEST_SUITE(Conv1x3)267 TEST_SUITE(Conv1x3)
268 FIXTURE_DATA_TEST_CASE(RunSmall, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT,
269                        combine(combine(combine(datasets::SmallWinogradConvolutionLayer1x3Dataset(),
270                                                framework::dataset::make("DataType", { DataType::F32 })),
271                                        ActivationFunctionsDataset),
272                                framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
273 {
274     // Validate output
275     validate(Accessor(_target), _reference, abs_tolerance_f32);
276 }
277 FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, NEWinogradConvolutionLayerMixedDataLayoutFixture<float>, framework::DatasetMode::PRECOMMIT,
278                        combine(combine(combine(combine(combine(combine(combine(combine(
279                                                                                    framework::dataset::make("Input", TensorShape(8U, 8U, 32U)),
280                                                                                    framework::dataset::make("Weight", TensorShape(1U, 3U, 32U, 1U))),
281                                                                                framework::dataset::make("Bias", TensorShape(1U))),
282                                                                        framework::dataset::make("Output", TensorShape(8U, 6U, 1U))),
283                                                                framework::dataset::make("PadStrideInfo", PadStrideInfo(1, 1, 0, 0))),
284                                                        framework::dataset::make("Dilation", Size2D(1U, 1U))),
285                                                framework::dataset::make("DataType", { DataType::F32 })),
286                                        ActivationFunctionsDataset),
287                                framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
288 {
289     // Validate output
290     validate(Accessor(_target), _reference, abs_tolerance_f32);
291 }
292 FIXTURE_DATA_TEST_CASE(RunLarge, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY,
293                        combine(combine(combine(datasets::LargeWinogradConvolutionLayer1x3Dataset(),
294                                                framework::dataset::make("DataType", { DataType::F32 })),
295                                        ActivationFunctionsDataset),
296                                framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
297 {
298     // Validate output
299     validate(Accessor(_target), _reference, abs_tolerance_1xN_f32);
300 }
301 
302 TEST_SUITE_END() // Conv1x3
303 
TEST_SUITE(Conv3x1)304 TEST_SUITE(Conv3x1)
305 FIXTURE_DATA_TEST_CASE(RunSmall, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT,
306                        combine(combine(combine(datasets::SmallWinogradConvolutionLayer3x1Dataset(),
307                                                framework::dataset::make("DataType", { DataType::F32 })),
308                                        ActivationFunctionsDataset),
309                                framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
310 {
311     // Validate output
312     validate(Accessor(_target), _reference, abs_tolerance_f32);
313 }
314 FIXTURE_DATA_TEST_CASE(RunLarge, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY,
315                        combine(combine(combine(datasets::LargeWinogradConvolutionLayer3x1Dataset(),
316                                                framework::dataset::make("DataType", { DataType::F32 })),
317                                        ActivationFunctionsDataset),
318                                framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
319 {
320     // Validate output
321     validate(Accessor(_target), _reference, abs_tolerance_1xN_f32);
322 }
323 
324 TEST_SUITE_END() // Conv3x1
325 
TEST_SUITE(Conv1x5)326 TEST_SUITE(Conv1x5)
327 FIXTURE_DATA_TEST_CASE(RunSmall, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT,
328                        combine(combine(combine(datasets::SmallWinogradConvolutionLayer1x5Dataset(),
329                                                framework::dataset::make("DataType", { DataType::F32 })),
330                                        ActivationFunctionsDataset),
331                                framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
332 {
333     // Validate output
334     validate(Accessor(_target), _reference, abs_tolerance_f32);
335 }
336 FIXTURE_DATA_TEST_CASE(RunLarge, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY,
337                        combine(combine(combine(datasets::LargeWinogradConvolutionLayer1x5Dataset(),
338                                                framework::dataset::make("DataType", { DataType::F32 })),
339                                        ActivationFunctionsDataset),
340                                framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
341 {
342     // Validate output
343     validate(Accessor(_target), _reference, abs_tolerance_1xN_f32);
344 }
345 
346 TEST_SUITE_END() // Conv1x5
347 
TEST_SUITE(Conv5x1)348 TEST_SUITE(Conv5x1)
349 FIXTURE_DATA_TEST_CASE(RunSmall, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT,
350                        combine(combine(combine(datasets::SmallWinogradConvolutionLayer5x1Dataset(),
351                                                framework::dataset::make("DataType", { DataType::F32 })),
352                                        ActivationFunctionsDataset),
353                                framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
354 {
355     // Validate output
356     validate(Accessor(_target), _reference, abs_tolerance_f32);
357 }
358 FIXTURE_DATA_TEST_CASE(RunLarge, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY,
359                        combine(combine(combine(datasets::LargeWinogradConvolutionLayer5x1Dataset(),
360                                                framework::dataset::make("DataType", { DataType::F32 })),
361                                        ActivationFunctionsDataset),
362                                framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
363 {
364     // Validate output
365     validate(Accessor(_target), _reference, abs_tolerance_1xN_f32);
366 }
367 
368 TEST_SUITE_END() // Conv5x1
369 
TEST_SUITE(Conv7x1)370 TEST_SUITE(Conv7x1)
371 FIXTURE_DATA_TEST_CASE(RunSmall, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT,
372                        combine(combine(combine(datasets::SmallWinogradConvolutionLayer7x1Dataset(),
373                                                framework::dataset::make("DataType", { DataType::F32 })),
374                                        ActivationFunctionsDataset),
375                                framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
376 {
377     // Validate output
378     validate(Accessor(_target), _reference, abs_tolerance_f32);
379 }
380 
381 FIXTURE_DATA_TEST_CASE(RunLarge, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY,
382                        combine(combine(combine(datasets::LargeWinogradConvolutionLayer7x1Dataset(),
383                                                framework::dataset::make("DataType", { DataType::F32 })),
384                                        ActivationFunctionsDataset),
385                                framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
386 {
387     // Validate output
388     validate(Accessor(_target), _reference, abs_tolerance_1xN_f32);
389 }
390 TEST_SUITE_END() // Conv7x1
391 
TEST_SUITE(Conv1x7)392 TEST_SUITE(Conv1x7)
393 FIXTURE_DATA_TEST_CASE(RunSmall, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT,
394                        combine(combine(combine(datasets::SmallWinogradConvolutionLayer1x7Dataset(),
395                                                framework::dataset::make("DataType", { DataType::F32 })),
396                                        ActivationFunctionsDataset),
397                                framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
398 {
399     // Validate output
400     validate(Accessor(_target), _reference, abs_tolerance_f32);
401 }
402 
403 FIXTURE_DATA_TEST_CASE(RunLarge, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY,
404                        combine(combine(combine(datasets::LargeWinogradConvolutionLayer7x1Dataset(),
405                                                framework::dataset::make("DataType", { DataType::F32 })),
406                                        ActivationFunctionsDataset),
407                                framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
408 {
409     // Validate output
410     validate(Accessor(_target), _reference, abs_tolerance_1xN_f32);
411 }
412 TEST_SUITE_END() // Conv1x7
413 
TEST_SUITE(Conv3x3)414 TEST_SUITE(Conv3x3)
415 FIXTURE_DATA_TEST_CASE(RunSmall, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT,
416                        combine(combine(combine(datasets::SmallWinogradConvolutionLayer3x3Dataset(),
417                                                framework::dataset::make("DataType", { DataType::F32 })),
418                                        ActivationFunctionsDataset),
419                                framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
420 
421 {
422     // Validate output
423     validate(Accessor(_target), _reference, abs_tolerance_f32);
424 }
425 FIXTURE_DATA_TEST_CASE(RunLarge, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY,
426                        combine(combine(combine(datasets::LargeWinogradConvolutionLayer3x3Dataset(),
427                                                framework::dataset::make("DataType", { DataType::F32 })),
428                                        ActivationFunctionsDataset),
429                                framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
430 
431 {
432     // Validate output
433     // floating point arithmetic the Winograd results will not be exactly the same as direct convolution, especially for big shapes
434     validate(Accessor(_target), _reference, rel_tolerance_winograd_3x3_f32, 0.f, float(abs_tolerance_f32));
435 }
436 TEST_SUITE_END() // Conv3x3
437 
TEST_SUITE(Conv5x5)438 TEST_SUITE(Conv5x5)
439 FIXTURE_DATA_TEST_CASE(RunSmall, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT,
440                        combine(combine(combine(datasets::SmallWinogradConvolutionLayer5x5Dataset(),
441                                                framework::dataset::make("DataType", { DataType::F32 })),
442                                        ActivationFunctionsDataset),
443                                framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
444 
445 {
446     // Validate output
447     validate(Accessor(_target), _reference, abs_tolerance_f32);
448 }
449 FIXTURE_DATA_TEST_CASE(RunLarge, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY,
450                        combine(combine(combine(datasets::LargeWinogradConvolutionLayer5x5Dataset(),
451                                                framework::dataset::make("DataType", { DataType::F32 })),
452                                        ActivationFunctionsDataset),
453                                framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
454 
455 {
456     // Validate output
457     validate(Accessor(_target), _reference, abs_tolerance_f32);
458 }
459 
460 TEST_SUITE_END() // Conv5x5
461 
462 FIXTURE_DATA_TEST_CASE(RunSmallNoBias, NEWinogradConvolutionLayerNoBiasFixture<float>, framework::DatasetMode::PRECOMMIT,
463                        combine(combine(combine(framework::dataset::concat(datasets::SmallWinogradConvolutionLayer3x3Dataset(),
464                                                                           datasets::SmallWinogradConvolutionLayer5x5Dataset()),
465                                                framework::dataset::make("DataType", { DataType::F32 })),
466                                        ActivationFunctionsDataset),
467 
468                                framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
469 {
470     // Validate output
471     validate(Accessor(_target), _reference, abs_tolerance_f32);
472 }
473 
474 TEST_SUITE_END() // FP32
475 
476 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
477 TEST_SUITE(FP16)
478 using CLWinogradConvolutionLayerFastMathFixture16 = WinogradConvolutionLayerFastMathValidationFixture<Tensor, Accessor, NEWinogradConvolutionLayer, half, float>;
479 
480 DATA_TEST_CASE(ValidateConvolutionMethod, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(
481                                           framework::dataset::make("InputInfo", { TensorInfo(TensorShape(18U, 18U, 32U), 1, DataType::F16),
482                                                                                   TensorInfo(TensorShape(18U, 18U, 32U), 1, DataType::F16)
483                                           }),
484                                           framework::dataset::make("WeightsInfo", { TensorInfo(TensorShape(3U, 3U, 32U, 21U), 1, DataType::F16),
485                                                                                     TensorInfo(TensorShape(3U, 3U, 32U, 21U), 1, DataType::F16)
486                                           })),
487                                           framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(16U, 16U, 21U), 1, DataType::F32),
488                                                                                    TensorInfo(TensorShape(16U, 16U, 21U), 1, DataType::F16)
489                                           })),
490                                           framework::dataset::make("ConvInfo", { PadStrideInfo(1, 1, 0, 0),
491                                                                                  PadStrideInfo(1, 1, 0, 0)
492                                           })),
493                                           framework::dataset::make("FastMath", { false, // case fp16 and fast_math False then disable Winograd
494                                                                                  true   // case fp16 and fast_math True then enable Winograd
495                                           })),
496                                                                            framework::dataset::make("Expected", { ConvolutionMethod::GEMM, ConvolutionMethod::WINOGRAD })),
497                input_info, weights_info, output_info, conv_info, fast_math, expected)
498 {
499     ConvolutionMethod is_valid = NEConvolutionLayer::get_convolution_method(&input_info.clone()->set_is_resizable(true),
500                                                                             &weights_info.clone()->set_is_resizable(true),
501                                                                             &output_info.clone()->set_is_resizable(true), conv_info, WeightsInfo(), Size2D(1U, 1U), ActivationLayerInfo(), fast_math);
502     ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS);
503 }
504 
505 TEST_SUITE(Conv3x3)
506 FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::PRECOMMIT,
507                        combine(combine(combine(datasets::SmallWinogradConvolutionLayer3x3Dataset(),
508                                                framework::dataset::make("DataType", { DataType::F16 })),
509                                        ActivationFunctionsDataset),
510                                framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
511 
512 {
513     // Validate output
514     validate(Accessor(_target), _reference, tolerance_convolution_layer_f16, tolerance_num_f16);
515 }
516 
517 FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY,
518                        combine(combine(combine(datasets::LargeWinogradConvolutionLayer3x3Dataset(),
519                                                framework::dataset::make("DataType", { DataType::F16 })),
520                                        ActivationFunctionsDataset),
521                                framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
522 
523 {
524     // Validate output
525     validate(Accessor(_target), _reference, tolerance_convolution_layer_f16, tolerance_num_f16);
526 }
527 TEST_SUITE_END() // Conv3x3
528 TEST_SUITE_END() // FP16
529 #endif           /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
530 TEST_SUITE_END() // WinogradLayer
531 
532 #ifdef ARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS
533 TEST_SUITE(FIXED_FORMAT_KERNELS)
534 TEST_SUITE(VariableWeightUtils)
535 
536 // UC2_1_* tests: the user requests a specific fixed format, but there is no kernel that supports it.
537 
538 template <typename ConvolutionClass>
539 using HasOptImplFixtureNoFastMath = HasOptImplFixture<ConvolutionClass, /*enable_fast_math*/ false>;
540 
541 template <typename ConvolutionClass>
542 using HasOptImplFixtureFastMath = HasOptImplFixture<ConvolutionClass, /*enable_fast_math*/ true>;
543 
544 // UC2_1
545 
546 FIXTURE_DATA_TEST_CASE(UC2_1_CpuGemmConv2d, HasOptImplFixtureNoFastMath<cpu::CpuGemmConv2d>, framework::DatasetMode::ALL,
547                        combine(framework::dataset::make("DataType", { DataType::F32 }),
548                                framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::OHWIo2 })))
549 {
550     ARM_COMPUTE_EXPECT(!_kernel_found, framework::LogLevel::ERRORS);
551 }
552 FIXTURE_DATA_TEST_CASE(UC2_1_NEGEMMConvolutionLayer, HasOptImplFixtureNoFastMath<NEGEMMConvolutionLayer>, framework::DatasetMode::ALL,
553                        combine(framework::dataset::make("DataType", { DataType::F32 }),
554                                framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::OHWIo2 })))
555 {
556     ARM_COMPUTE_EXPECT(!_kernel_found, framework::LogLevel::ERRORS);
557 }
558 
559 FIXTURE_DATA_TEST_CASE(UC2_1_CpuGemmConv2d_FastMath, HasOptImplFixtureFastMath<cpu::CpuGemmConv2d>, framework::DatasetMode::ALL,
560                        combine(framework::dataset::make("DataType", { DataType::F32 }),
561                                framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::OHWIo2 })))
562 {
563     ARM_COMPUTE_EXPECT(!_kernel_found, framework::LogLevel::ERRORS);
564 }
565 
566 FIXTURE_DATA_TEST_CASE(UC2_1_NEGEMMConvolutionLayer_FastMath, HasOptImplFixtureFastMath<NEGEMMConvolutionLayer>, framework::DatasetMode::ALL,
567                        combine(framework::dataset::make("DataType", { DataType::F32 }),
568                                framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::OHWIo2 })))
569 {
570     ARM_COMPUTE_EXPECT(!_kernel_found, framework::LogLevel::ERRORS);
571 }
572 
573 // UC2_2_* tests: the user requests a specific fixed format, and a
574 // kernel that support that fixed format is found.
575 
576 FIXTURE_DATA_TEST_CASE(UC2_2_CpuGemmConv2d, HasOptImplFixtureNoFastMath<cpu::CpuGemmConv2d>, framework::DatasetMode::ALL,
577                        combine(framework::dataset::make("DataType", { DataType::F32 }),
578                                framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::OHWIo4 })))
579 {
580     ARM_COMPUTE_EXPECT(_kernel_found, framework::LogLevel::ERRORS);
581     ARM_COMPUTE_EXPECT(_computed_weight_format == arm_compute::WeightFormat::OHWIo4, framework::LogLevel::ERRORS);
582 }
583 
584 FIXTURE_DATA_TEST_CASE(UC2_2_NEGEMMConvolutionLayer, HasOptImplFixtureNoFastMath<NEGEMMConvolutionLayer>, framework::DatasetMode::ALL,
585                        combine(framework::dataset::make("DataType", { DataType::F32 }),
586                                framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::OHWIo4 })))
587 {
588     ARM_COMPUTE_EXPECT(_kernel_found, framework::LogLevel::ERRORS);
589     ARM_COMPUTE_EXPECT(_computed_weight_format == arm_compute::WeightFormat::OHWIo4, framework::LogLevel::ERRORS);
590 }
591 
592 #if defined(ARM_COMPUTE_ENABLE_BF16)
593 
594 FIXTURE_DATA_TEST_CASE(UC2_2_CpuGemmConv2d_FastMath, HasOptImplFixtureFastMath<cpu::CpuGemmConv2d>, framework::DatasetMode::ALL,
595                        combine(framework::dataset::make("DataType", { DataType::F32 }),
596                                framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::OHWIo8i4_bf16 })))
597 {
598     ARM_COMPUTE_EXPECT(_kernel_found, framework::LogLevel::ERRORS);
599     ARM_COMPUTE_EXPECT_EQUAL(_computed_weight_format, arm_compute::WeightFormat::OHWIo8i4_bf16, framework::LogLevel::ERRORS);
600 }
601 
602 FIXTURE_DATA_TEST_CASE(UC2_2_NEGEMMConvolutionLayer_FastMath, HasOptImplFixtureFastMath<NEGEMMConvolutionLayer>, framework::DatasetMode::ALL,
603                        combine(framework::dataset::make("DataType", { DataType::F32 }),
604                                framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::OHWIo8i4_bf16 })))
605 {
606     ARM_COMPUTE_EXPECT(_kernel_found, framework::LogLevel::ERRORS);
607     ARM_COMPUTE_EXPECT(_computed_weight_format == arm_compute::WeightFormat::OHWIo8i4_bf16, framework::LogLevel::ERRORS);
608 }
609 
610 #endif // ARM_COMPUTE_ENABLE_BF16
611 
612 // UC3_1_* tests: the user queries for ANY fixed format, but there is
613 // no kernel that support the use case specified by the user (for
614 // example, there is no fixed format kernel for the datatype of the
615 // problem).
616 
617 FIXTURE_DATA_TEST_CASE(UC3_1_CpuGemmConv2d, HasOptImplFixtureNoFastMath<cpu::CpuGemmConv2d>, framework::DatasetMode::ALL,
618                        combine(framework::dataset::make("DataType", { DataType::S32 }),
619                                framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::ANY })))
620 {
621     ARM_COMPUTE_EXPECT(!_kernel_found, framework::LogLevel::ERRORS);
622 }
623 
624 FIXTURE_DATA_TEST_CASE(UC3_1_NEGEMMConvolutionLayer, HasOptImplFixtureNoFastMath<NEGEMMConvolutionLayer>, framework::DatasetMode::ALL,
625                        combine(framework::dataset::make("DataType", { DataType::S32 }),
626                                framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::ANY })))
627 {
628     ARM_COMPUTE_EXPECT(!_kernel_found, framework::LogLevel::ERRORS);
629 }
630 
631 FIXTURE_DATA_TEST_CASE(UC3_1_CpuGemmConv2d_FastMath, HasOptImplFixtureFastMath<cpu::CpuGemmConv2d>, framework::DatasetMode::ALL,
632                        combine(framework::dataset::make("DataType", { DataType::S32 }),
633                                framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::ANY })))
634 {
635     ARM_COMPUTE_EXPECT(!_kernel_found, framework::LogLevel::ERRORS);
636 }
637 
638 FIXTURE_DATA_TEST_CASE(UC3_1_NEGEMMConvolutionLayer_FastMath, HasOptImplFixtureFastMath<NEGEMMConvolutionLayer>, framework::DatasetMode::ALL,
639                        combine(framework::dataset::make("DataType", { DataType::S32 }),
640                                framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::ANY })))
641 {
642     ARM_COMPUTE_EXPECT(!_kernel_found, framework::LogLevel::ERRORS);
643 }
644 
645 // UC3_2_* tests: the user queries for ANY fixed format. The search
646 // succeeded and the fixed format found is prompted back for
647 // consumption by the user. Note that we just test the
648 // _computed_weight_format to be anything but not the formats that are
649 // not fixed formats (ANY and UNSPECIFIED). This is because the weight
650 // format that the runtime produces depends on the size of the vector
651 // units of the hardware where the tests is executed. For example, a
652 // format like OHWIo4 for FP32 data returned for 128-bit NEON hardware
653 // is replaced by OHWIo8 when running on 256-bit SVE.
654 
655 FIXTURE_DATA_TEST_CASE(UC3_2_CpuGemmConv2d, HasOptImplFixtureNoFastMath<cpu::CpuGemmConv2d>, framework::DatasetMode::ALL,
656                        combine(framework::dataset::make("DataType", { DataType::F32 }),
657                                framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::ANY })))
658 {
659     ARM_COMPUTE_EXPECT(_kernel_found, framework::LogLevel::ERRORS);
660     ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::ANY, framework::LogLevel::ERRORS);
661     ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::UNSPECIFIED, framework::LogLevel::ERRORS);
662 }
663 
664 FIXTURE_DATA_TEST_CASE(UC3_2_NEGEMMConvolutionLayer, HasOptImplFixtureNoFastMath<NEGEMMConvolutionLayer>, framework::DatasetMode::ALL,
665                        combine(framework::dataset::make("DataType", { DataType::F32 }),
666                                framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::ANY })))
667 {
668     ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::ANY, framework::LogLevel::ERRORS);
669     ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::UNSPECIFIED, framework::LogLevel::ERRORS);
670 }
671 
672 #if defined(ARM_COMPUTE_ENABLE_BF16)
673 
674 FIXTURE_DATA_TEST_CASE(UC3_2_CpuGemmConv2d_FastMath, HasOptImplFixtureFastMath<cpu::CpuGemmConv2d>, framework::DatasetMode::ALL,
675                        combine(framework::dataset::make("DataType", { DataType::F32 }),
676                                framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::ANY })))
677 {
678     ARM_COMPUTE_EXPECT(_kernel_found, framework::LogLevel::ERRORS);
679     ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::ANY, framework::LogLevel::ERRORS);
680     ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::UNSPECIFIED, framework::LogLevel::ERRORS);
681     ARM_COMPUTE_EXPECT(arm_compute::is_fixed_format_fast_math(_computed_weight_format), framework::LogLevel::ERRORS);
682 }
683 
684 FIXTURE_DATA_TEST_CASE(UC3_2_NEGEMMConvolutionLayer_FastMath, HasOptImplFixtureFastMath<NEGEMMConvolutionLayer>, framework::DatasetMode::ALL,
685                        combine(framework::dataset::make("DataType", { DataType::F32 }),
686                                framework::dataset::make("QueryWeightFormat", { arm_compute::WeightFormat::ANY })))
687 {
688     ARM_COMPUTE_EXPECT(_kernel_found, framework::LogLevel::ERRORS);
689     ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::ANY, framework::LogLevel::ERRORS);
690     ARM_COMPUTE_EXPECT(_computed_weight_format != arm_compute::WeightFormat::UNSPECIFIED, framework::LogLevel::ERRORS);
691     ARM_COMPUTE_EXPECT(arm_compute::is_fixed_format_fast_math(_computed_weight_format), framework::LogLevel::ERRORS);
692 }
693 
694 #endif // ARM_COMPUTE_ENABLE_BF16
695 
696 namespace
697 {
698 using TestCaseType          = std::tuple<TensorShape, TensorShape, arm_compute::WeightFormat>;
699 auto prepare_weights_shapes = framework::dataset::make("TensorShape",
700 {
701     // OHWIo<interleave_by>i<block_by>
702     //
703     // OHWI --> O'HWI', where:
704     //
705     //   O'= smallest multiple of <interleave_by> such that O<=O'
706     //   I'= smallest multiple of <block_by> such that I<=I'
707     //
708 
709     // Change N for OHWIo4
710     TestCaseType({ { 1U, 1U, 1U, 1U }, { 1U, 1U, 1U, 4U }, arm_compute::WeightFormat::OHWIo4 }),
711     TestCaseType({ { 1U, 1U, 1U, 2U }, { 1U, 1U, 1U, 4U }, arm_compute::WeightFormat::OHWIo4 }),
712     TestCaseType({ { 1U, 1U, 1U, 3U }, { 1U, 1U, 1U, 4U }, arm_compute::WeightFormat::OHWIo4 }),
713     TestCaseType({ { 1U, 1U, 1U, 4U }, { 1U, 1U, 1U, 4U }, arm_compute::WeightFormat::OHWIo4 }),
714     TestCaseType({ { 1U, 1U, 1U, 5U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo4 }),
715     TestCaseType({ { 1U, 1U, 1U, 6U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo4 }),
716     TestCaseType({ { 1U, 1U, 1U, 7U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo4 }),
717     TestCaseType({ { 1U, 1U, 1U, 8U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo4 }),
718     TestCaseType({ { 1U, 1U, 1U, 9U }, { 1U, 1U, 1U, 12U }, arm_compute::WeightFormat::OHWIo4 }),
719     // // Change N for OHWIo8
720     TestCaseType({ { 1U, 1U, 1U, 1U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo8 }),
721     TestCaseType({ { 1U, 1U, 1U, 2U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo8 }),
722     TestCaseType({ { 1U, 1U, 1U, 3U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo8 }),
723     TestCaseType({ { 1U, 1U, 1U, 4U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo8 }),
724     TestCaseType({ { 1U, 1U, 1U, 5U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo8 }),
725     TestCaseType({ { 1U, 1U, 1U, 6U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo8 }),
726     TestCaseType({ { 1U, 1U, 1U, 7U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo8 }),
727     TestCaseType({ { 1U, 1U, 1U, 8U }, { 1U, 1U, 1U, 8U }, arm_compute::WeightFormat::OHWIo8 }),
728     TestCaseType({ { 1U, 1U, 1U, 9U }, { 1U, 1U, 1U, 16U }, arm_compute::WeightFormat::OHWIo8 }),
729     // // Change N for OHWIo4 when H, W and C are not 1
730     TestCaseType({ { 3U, 4U, 2U, 1U }, { 3, 4, 2, 4 }, arm_compute::WeightFormat::OHWIo4 }),
731     TestCaseType({ { 3U, 4U, 2U, 2U }, { 3, 4, 2, 4 }, arm_compute::WeightFormat::OHWIo4 }),
732     TestCaseType({ { 3U, 4U, 2U, 3U }, { 3, 4, 2, 4 }, arm_compute::WeightFormat::OHWIo4 }),
733     TestCaseType({ { 3U, 4U, 2U, 4U }, { 3, 4, 2, 4 }, arm_compute::WeightFormat::OHWIo4 }),
734     TestCaseType({ { 3U, 4U, 2U, 5U }, { 3, 4, 2, 8 }, arm_compute::WeightFormat::OHWIo4 }),
735     TestCaseType({ { 3U, 4U, 2U, 6U }, { 3, 4, 2, 8 }, arm_compute::WeightFormat::OHWIo4 }),
736     TestCaseType({ { 3U, 4U, 2U, 7U }, { 3, 4, 2, 8 }, arm_compute::WeightFormat::OHWIo4 }),
737     TestCaseType({ { 3U, 4U, 2U, 8U }, { 3, 4, 2, 8 }, arm_compute::WeightFormat::OHWIo4 }),
738     TestCaseType({ { 3U, 4U, 2U, 9U }, { 3, 4, 2, 12 }, arm_compute::WeightFormat::OHWIo4 }),
739 
740     // // Fix N and move HWI around, with different data layouts and formats
741     TestCaseType({ { 2U, 4U, 3U, 5U }, { 2, 4, 3, 8 }, arm_compute::WeightFormat::OHWIo4 }),
742     TestCaseType({ { 3U, 4U, 2U, 5U }, { 3, 4, 2, 8 }, arm_compute::WeightFormat::OHWIo4 }),
743     TestCaseType({ { 2U, 4U, 3U, 9U }, { 2, 4, 3, 16 }, arm_compute::WeightFormat::OHWIo8 }),
744     TestCaseType({ { 3U, 4U, 2U, 9U }, { 3, 4, 2, 16 }, arm_compute::WeightFormat::OHWIo8 }),
745     TestCaseType({ { 1024U, 1U, 1U, 1001U }, { 1024, 1, 1, 1008 }, arm_compute::WeightFormat::OHWIo8 }),
746 
747     // // Adding <block_by> on I (=C)
748     TestCaseType({ { 1U, 4U, 3U, 5U }, { 2, 4, 3, 8 }, arm_compute::WeightFormat::OHWIo4i2 }),
749     TestCaseType({ { 2U, 4U, 3U, 5U }, { 2, 4, 3, 8 }, arm_compute::WeightFormat::OHWIo4i2 }),
750     TestCaseType({ { 3U, 4U, 3U, 5U }, { 4, 4, 3, 8 }, arm_compute::WeightFormat::OHWIo4i2 }),
751 
752     // ---------
753     TestCaseType({ { 2, 2, 1, 5 }, { 2, 2, 1, 8 }, arm_compute::WeightFormat::OHWIo4 }),
754     TestCaseType({ { 1, 2, 2, 5 }, { 1, 2, 2, 8 }, arm_compute::WeightFormat::OHWIo4 }),
755 
756 });
757 } // unnamed namespace
758 
DATA_TEST_CASE(PrepareWeightShape,framework::DatasetMode::ALL,prepare_weights_shapes,shapes)759 DATA_TEST_CASE(PrepareWeightShape, framework::DatasetMode::ALL,
760                prepare_weights_shapes, shapes)
761 {
762     const TensorShape               input_shape    = std::get<0>(shapes);
763     const TensorShape               expected_shape = std::get<1>(shapes);
764     const arm_compute::WeightFormat wf             = std::get<2>(shapes);
765     const DataType                  DT             = DataType::F32;
766     const DataLayout                DL             = DataLayout::NHWC;
767     const auto                      TI             = TensorInfo(input_shape, 1 /*num_channels, deprecated*/, DT, DL);
768     const TensorInfo                computed_info  = ::arm_compute::test::validation::prepare_weights(TI, wf);
769     ARM_COMPUTE_EXPECT_EQUAL(computed_info.tensor_shape(), expected_shape, framework::LogLevel::ERRORS);
770 }
771 
772 TEST_SUITE_END() // VariableWeightUtils
773 
774 TEST_SUITE(ExperimentalCpuAPIVariableWeightWithFixtures)
775 
776 template <typename ScalarType>
777 using VarWidth = VariableWeightsFixture<cpu::CpuGemmConv2d, Tensor, Accessor, ScalarType, /*enable_fast_math*/ false>;
778 
779 FIXTURE_DATA_TEST_CASE(RunSmallFloat, VarWidth<float>, framework::DatasetMode::ALL,
780                        combine(combine(datasets::SmallConvolutionLayerDataset(),
781                                        framework::dataset::make("DataLayout", { DataLayout::NHWC })),
782                                framework::dataset::make("ACL Scalar type", { DataType::F32 })))
783 {
784     // Validate output
785     validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, float(abs_tolerance_f32));
786 }
787 
788 FIXTURE_DATA_TEST_CASE(RunSmallHalf, VarWidth<half>, framework::DatasetMode::ALL,
789                        combine(combine(datasets::SmallConvolutionLayerDataset(),
790                                        framework::dataset::make("DataLayout", { DataLayout::NHWC })),
791                                framework::dataset::make("ACL Scalar type", { DataType::F16 })))
792 {
793     // Validate output
794     validate(Accessor(_target), _reference, rel_tolerance_f16, 0.f, half(abs_tolerance_f16));
795 }
796 
797 #if defined(ARM_COMPUTE_ENABLE_BF16)
798 template <typename ScalarType>
799 using VarWidthFastMath = VariableWeightsFixture<cpu::CpuGemmConv2d, Tensor, Accessor, ScalarType, /*enable_fast_math*/ true>;
800 
801 FIXTURE_DATA_TEST_CASE(RunSmallFloatFastMath, VarWidthFastMath<float>, framework::DatasetMode::ALL,
802                        combine(combine(datasets::SmallConvolutionLayerDataset(),
803                                        framework::dataset::make("DataLayout", { DataLayout::NHWC })),
804                                framework::dataset::make("ACL Scalar type", { DataType::F32 })))
805 {
806     // Validate output
807     validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, float(abs_tolerance_f32));
808 }
809 #endif // ARM_COMPUTE_ENABLE_BF16
810 
811 TEST_SUITE_END() // ExperimentalCpuAPIVariableWeightWithFixtures
812 
813 TEST_SUITE(ExperimentalNEAPIVariableWeightWithFixtures)
814 
815 template <typename ScalarType>
816 using NEGEMMVarWidth = VariableWeightsFixtureNEInterface<NEGEMMConvolutionLayer, Tensor, Accessor, ScalarType, /*enable_fast_math*/ false>;
817 
818 FIXTURE_DATA_TEST_CASE(NEGEMMRunSmallFloat, NEGEMMVarWidth<float>, framework::DatasetMode::ALL,
819                        combine(combine(datasets::SmallConvolutionLayerDataset(),
820                                        framework::dataset::make("DataLayout", { DataLayout::NHWC })),
821                                framework::dataset::make("ACL Scalar type", { DataType::F32 })))
822 {
823     // Validate output
824     validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, float(abs_tolerance_f32));
825 }
826 
827 FIXTURE_DATA_TEST_CASE(NEGEMMRunSmallHalf, NEGEMMVarWidth<half>, framework::DatasetMode::ALL,
828                        combine(combine(datasets::SmallConvolutionLayerDataset(),
829                                        framework::dataset::make("DataLayout", { DataLayout::NHWC })),
830                                framework::dataset::make("ACL Scalar type", { DataType::F16 })))
831 {
832     // Validate output
833     validate(Accessor(_target), _reference, rel_tolerance_f16, 0.f, half(abs_tolerance_f16));
834 }
835 
836 #if defined(ARM_COMPUTE_ENABLE_BF16)
837 template <typename ScalarType>
838 using NEGEMMVarWidthFastMath = VariableWeightsFixtureNEInterface<NEGEMMConvolutionLayer, Tensor, Accessor, ScalarType, /*enable_fast_math*/ true>;
839 
840 FIXTURE_DATA_TEST_CASE(NEGEMMRunSmallFloatFastMath, NEGEMMVarWidthFastMath<float>, framework::DatasetMode::ALL,
841                        combine(combine(datasets::SmallConvolutionLayerDataset(),
842                                        framework::dataset::make("DataLayout", { DataLayout::NHWC })),
843                                framework::dataset::make("ACL Scalar type", { DataType::F32 })))
844 {
845     // Validate output
846     validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, float(abs_tolerance_f32));
847 }
848 #endif // ARM_COMPUTE_ENABLE_BF16
849 
850 TEST_SUITE_END() // ExperimentalNEAPIVariableWeightWithFixtures
851 TEST_SUITE_END() // FIXED_FORMAT_KERNELS
852 
853 #endif // ARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS
854 
855 TEST_SUITE(GEMMConvolutionLayer)
856 template <typename T>
857 using NEGEMMConvolutionLayerFixture = ConvolutionValidationFixture<Tensor, Accessor, NEConvolutionLayer, T>;
858 template <typename T>
859 using NEGEMMConvolutionLayerMixedDataLayoutFixture = ConvolutionValidationFixture<Tensor, Accessor, NEConvolutionLayer, T, true>;
860 
861 /** Test case for memory injection in @ref cpu::CpuGemmConv2d.
862  *
863  * Configure the operator once and inject memory at run-time in multiple executions.
864  *
865  * Checks performed in order:
866  * - Both runs compute the same output
867  */
TEST_CASE(MemoryInjection,framework::DatasetMode::ALL)868 TEST_CASE(MemoryInjection, framework::DatasetMode::ALL)
869 {
870     auto        conv        = std::make_unique<cpu::CpuGemmConv2d>();
871     const auto  src_info    = TensorInfo(TensorShape(1U, 5U, 2U), 1, DataType::F32, DataLayout::NCHW);
872     const auto  weight_info = TensorInfo(TensorShape(1U, 3U, 2U, 3U), 1, DataType::F32, DataLayout::NCHW);
873     const auto  bias_info   = TensorInfo(TensorShape(3U), 1, DataType::F32, DataLayout::NCHW);
874     auto        dst_info    = TensorInfo(TensorShape(1U, 7U, 3U), 1, DataType::F32, DataLayout::NCHW);
875     const auto  conv_info   = PadStrideInfo(1, 1, 0, 0, 2, 2, DimensionRoundingType::FLOOR);
876     WeightsInfo weights_info(false, 3U, 3U, 1U);
877     conv->configure(&src_info, &weight_info, &bias_info, &dst_info, conv_info, weights_info);
878 
879     // tensors are newly created every call of this lambda function
880     auto src    = create_tensor<Tensor>(src_info);
881     auto weight = create_tensor<Tensor>(weight_info);
882     auto bias   = create_tensor<Tensor>(bias_info);
883     src.allocator()->allocate();
884     weight.allocator()->allocate();
885     bias.allocator()->allocate();
886 
887     ITensorPack run_pack{ { TensorType::ACL_SRC_0, &src }, { TensorType::ACL_SRC_1, &weight }, { TensorType::ACL_SRC_2, &bias } };
888     ITensorPack prep_pack{ { TensorType::ACL_SRC_1, &weight }, { TensorType::ACL_SRC_2, &bias } };
889 
890     auto mg = MemoryGroup{};
891     auto ws = manage_workspace<Tensor>(conv->workspace(), mg, run_pack, prep_pack);
892 
893     auto run_conv = [&]() -> Tensor
894     {
895         auto dst = create_tensor<Tensor>(dst_info);
896         dst.allocator()->allocate();
897         run_pack.add_tensor(TensorType::ACL_DST, &dst);
898 
899         library->fill_tensor_value(Accessor(src), 1.f);
900         library->fill_tensor_value(Accessor(weight), 2.f);
901         library->fill_tensor_value(Accessor(bias), 3.f);
902         // This operator is configured once and captured by this lambda.
903         conv->prepare(prep_pack);
904         conv->run(run_pack);
905         return dst;
906     };
907     auto result_0 = run_conv();
908     auto result_1 = run_conv();
909     for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i)
910     {
911         ARM_COMPUTE_EXPECT(((float *)result_0.buffer())[i] == ((float *)result_1.buffer())[i], framework::LogLevel::ERRORS);
912     }
913 }
914 
915 /** Test case for memory injection in @ref NEGEMMConvolutionLayer.
916  *
917  * Make sure @ref NEGEMMConvolutionLayer still works through injecting the memory at configure time using the old API.
918  *
919  * Checks performed in order:
920  * - Both runs compute the same output
921  */
TEST_CASE(MultipleExecutionWithConfigure,framework::DatasetMode::ALL)922 TEST_CASE(MultipleExecutionWithConfigure, framework::DatasetMode::ALL)
923 {
924     auto        conv        = std::make_unique<NEGEMMConvolutionLayer>();
925     const auto  src_info    = TensorInfo(TensorShape(1U, 5U, 2U), 1, DataType::F32, DataLayout::NCHW);
926     const auto  weight_info = TensorInfo(TensorShape(1U, 3U, 2U, 3U), 1, DataType::F32, DataLayout::NCHW);
927     const auto  bias_info   = TensorInfo(TensorShape(3U), 1, DataType::F32, DataLayout::NCHW);
928     auto        dst_info    = TensorInfo(TensorShape(1U, 7U, 3U), 1, DataType::F32, DataLayout::NCHW);
929     const auto  conv_info   = PadStrideInfo(1, 1, 0, 0, 2, 2, DimensionRoundingType::FLOOR);
930     WeightsInfo weights_info(false, 3U, 3U, 1U);
931     auto        run_conv = [&]()
932     {
933         auto src    = create_tensor<Tensor>(src_info);
934         auto weight = create_tensor<Tensor>(weight_info);
935         auto bias   = create_tensor<Tensor>(bias_info);
936         auto dst    = create_tensor<Tensor>(dst_info);
937         conv->configure(&src, &weight, &bias, &dst, conv_info, weights_info);
938         src.allocator()->allocate();
939         weight.allocator()->allocate();
940         bias.allocator()->allocate();
941         dst.allocator()->allocate();
942         library->fill_tensor_value(Accessor(src), 1.f);
943         library->fill_tensor_value(Accessor(weight), 2.f);
944         library->fill_tensor_value(Accessor(bias), 3.f);
945         conv->run();
946         return dst;
947     };
948     auto result_0 = run_conv();
949     auto result_1 = run_conv();
950     for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i)
951     {
952         ARM_COMPUTE_EXPECT(((float *)result_0.buffer())[i] == ((float *)result_1.buffer())[i], framework::LogLevel::ERRORS);
953     }
954 }
955 
956 TEST_SUITE(Float)
957 #if defined(ARM_COMPUTE_ENABLE_BF16)
TEST_SUITE(BFLOAT16)958 TEST_SUITE(BFLOAT16)
959 FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
960                                                                                                                     framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::BFLOAT16)), framework::dataset::make("DataLayout", { DataLayout::NHWC })),
961                                                                                                             ActivationFunctionsDataset))
962 {
963     // Validate output
964     validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, float(abs_tolerance_f32));
965 }
966 TEST_SUITE_END() // BFLOAT16
967 #endif           /* defined(ARM_COMPUTE_ENABLE_BF16) */
968 
969 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
TEST_SUITE(FP16)970 TEST_SUITE(FP16)
971 FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
972                                                                                                                    framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("DataLayout", { DataLayout::NCHW })), ActivationFunctionsDataset))
973 {
974     // Validate output
975     validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_f16);
976 }
977 TEST_SUITE_END() // FP16
978 #endif           /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
979 
TEST_SUITE(FP32)980 TEST_SUITE(FP32)
981 FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
982                                                                                                                     framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
983                                                                                                             ActivationFunctionsDataset))
984 {
985     // Validate output
986     validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, float(abs_tolerance_f32));
987 }
988 FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, NEGEMMConvolutionLayerMixedDataLayoutFixture<float>, framework::DatasetMode::ALL,
989                        combine(combine(combine(combine(combine(combine(combine(combine(combine(
990                                                                                            framework::dataset::make("Input", TensorShape(23U, 27U, 5U)),
991                                                                                            framework::dataset::make("Weights", TensorShape(3U, 3U, 5U, 2U))),
992                                                                                        framework::dataset::make("Bias", TensorShape(2U))),
993                                                                                framework::dataset::make("Output", TensorShape(11U, 25U, 2U))),
994                                                                        framework::dataset::make("PadStrideInfo", PadStrideInfo(2, 1, 0, 0))),
995                                                                framework::dataset::make("Dilation", Size2D(1, 1))),
996                                                        framework::dataset::make("ReshapeWeights", { true })),
997                                                framework::dataset::make("DataType", DataType::F32)),
998                                        framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
999                                ActivationFunctionsDataset))
1000 {
1001     // Validate output
1002     validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, float(abs_tolerance_f32));
1003 }
1004 TEST_SUITE_END() // FP32
1005 TEST_SUITE_END() // Float
1006 
1007 template <typename T>
1008 using NEGEMMConvolutionLayerQuantizedFixture = ConvolutionValidationQuantizedFixture<Tensor, Accessor, NEConvolutionLayer, T>;
1009 template <typename T>
1010 using NEGEMMConvolutionLayerQuantizedMixedDataLayoutFixture = ConvolutionValidationQuantizedFixture<Tensor, Accessor, NEConvolutionLayer, T, true>;
1011 
1012 template <typename T>
1013 using NEGEMMConvolutionLayerQuantizedPerChannelFixture = ConvolutionValidationQuantizedPerChannelFixture<Tensor, Accessor, NEConvolutionLayer, T, int8_t>;
1014 
1015 const auto QuantizedActivationFunctionsDataset = framework::dataset::make("ActivationInfo",
1016 {
1017     ActivationLayerInfo(),
1018     ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
1019     ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f)
1020 });
1021 TEST_SUITE(Quantized)
TEST_SUITE(QASYMM8)1022 TEST_SUITE(QASYMM8)
1023 FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
1024                                                                                                                        framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::QASYMM8)), framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
1025                                                                                                                        framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })), QuantizedActivationFunctionsDataset))
1026 {
1027     // Validate output
1028     validate(Accessor(_target), _reference, tolerance_qasymm8);
1029 }
1030 FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, NEGEMMConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::ALL,
1031                        combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
1032                                                                                                    framework::dataset::make("Input", TensorShape(23U, 27U, 5U)),
1033                                                                                                    framework::dataset::make("Weights", TensorShape(3U, 3U, 5U, 2U))),
1034                                                                                                framework::dataset::make("Bias", TensorShape(2U))),
1035                                                                                        framework::dataset::make("Output", TensorShape(11U, 25U, 2U))),
1036                                                                                framework::dataset::make("PadStrideInfo", PadStrideInfo(2, 1, 0, 0))),
1037                                                                        framework::dataset::make("Dilation", Size2D(1, 1))),
1038                                                                framework::dataset::make("ReshapeWeights", { true })),
1039                                                        framework::dataset::make("DataType", DataType::QASYMM8)),
1040                                                framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
1041                                        framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })),
1042                                QuantizedActivationFunctionsDataset))
1043 {
1044     // Validate output
1045     validate(Accessor(_target), _reference, tolerance_qasymm8);
1046 }
1047 TEST_SUITE_END() // QASYMM8
1048 
TEST_SUITE(QASYMM8_SIGNED)1049 TEST_SUITE(QASYMM8_SIGNED)
1050 FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
1051                                                                                                                       framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
1052                                                                                                                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.01f, -10) })), QuantizedActivationFunctionsDataset))
1053 {
1054     // Validate output
1055     validate(Accessor(_target), _reference, tolerance_qasymm8);
1056 }
1057 FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, NEGEMMConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::ALL,
1058                        combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
1059                                                                                                    framework::dataset::make("Input", TensorShape(23U, 27U, 5U)),
1060                                                                                                    framework::dataset::make("Weights", TensorShape(3U, 3U, 5U, 2U))),
1061                                                                                                framework::dataset::make("Bias", TensorShape(2U))),
1062                                                                                        framework::dataset::make("Output", TensorShape(11U, 25U, 2U))),
1063                                                                                framework::dataset::make("PadStrideInfo", PadStrideInfo(2, 1, 0, 0))),
1064                                                                        framework::dataset::make("Dilation", Size2D(1, 1))),
1065                                                                framework::dataset::make("ReshapeWeights", { true })),
1066                                                        framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
1067                                                framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
1068                                        framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })),
1069                                QuantizedActivationFunctionsDataset))
1070 {
1071     // Validate output
1072     validate(Accessor(_target), _reference, tolerance_qasymm8);
1073 }
1074 TEST_SUITE_END() // QASYMM8_SIGNED
1075 
TEST_SUITE(QSYMM8_PER_CHANNEL)1076 TEST_SUITE(QSYMM8_PER_CHANNEL)
1077 FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerQuantizedPerChannelFixture<uint8_t>, framework::DatasetMode::ALL,
1078                        combine(combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
1079                                                                        framework::dataset::make("ReshapeWeights", { true })),
1080                                                                framework::dataset::make("DataType", { DataType::QASYMM8 })),
1081                                                        framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
1082                                                QuantizationData),
1083                                        QuantizedActivationFunctionsDataset),
1084                                framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL })))
1085 {
1086     // Validate output
1087     validate(Accessor(_target), _reference, tolerance_qasymm8);
1088 }
1089 FIXTURE_DATA_TEST_CASE(RunSmallSigned, NEGEMMConvolutionLayerQuantizedPerChannelFixture<int8_t>, framework::DatasetMode::ALL,
1090                        combine(combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
1091                                                                        framework::dataset::make("ReshapeWeights", { true })),
1092                                                                framework::dataset::make("DataType", { DataType::QASYMM8_SIGNED })),
1093                                                        framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
1094                                                QuantizationData),
1095                                        QuantizedActivationFunctionsDataset),
1096                                framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL })))
1097 {
1098     // Validate output
1099     validate(Accessor(_target), _reference, tolerance_qasymm8);
1100 }
1101 TEST_SUITE_END() // QSYMM8_PER_CHANNEL
1102 TEST_SUITE_END() // Quantized
1103 
1104 TEST_SUITE_END() // GEMMConvolutionLayer
1105 
1106 TEST_SUITE(DirectGEMMConv2d)
1107 template <typename T>
1108 using NEDirectGEMMConv2dLayerFixture = ConvolutionValidationFixture<Tensor, Accessor, NEGEMMConv2d, T>;
1109 
1110 /** Test case for memory injection in @ref cpu::CpuGemmDirectConv2d.
1111  *
1112  * Configure the operator once and inject memory at run-time in multiple executions.
1113  *
1114  * Checks performed in order:
1115  * - Both runs compute the same output
1116  */
TEST_CASE(MemoryInjection,framework::DatasetMode::ALL)1117 TEST_CASE(MemoryInjection, framework::DatasetMode::ALL)
1118 {
1119     auto       conv        = std::make_unique<cpu::CpuGemmDirectConv2d>();
1120     const auto src_info    = TensorInfo(TensorShape(1U, 5U, 2U), 1, DataType::F32, DataLayout::NHWC);
1121     const auto weight_info = TensorInfo(TensorShape(1U, 3U, 2U, 3U), 1, DataType::F32, DataLayout::NHWC);
1122     const auto bias_info   = TensorInfo(TensorShape(3U), 1, DataType::F32, DataLayout::NHWC);
1123     auto       dst_info    = TensorInfo(TensorShape(1U, 7U, 3U), 1, DataType::F32, DataLayout::NHWC);
1124     const auto conv_info   = Conv2dInfo{};
1125     conv->configure(&src_info, &weight_info, &bias_info, &dst_info, conv_info);
1126 
1127     // tensors are newly created every call of this lambda function
1128     auto src    = create_tensor<Tensor>(src_info);
1129     auto weight = create_tensor<Tensor>(weight_info);
1130     auto bias   = create_tensor<Tensor>(bias_info);
1131     src.allocator()->allocate();
1132     weight.allocator()->allocate();
1133     bias.allocator()->allocate();
1134 
1135     ITensorPack run_pack{ { TensorType::ACL_SRC_0, &src }, { TensorType::ACL_SRC_1, &weight }, { TensorType::ACL_SRC_2, &bias } };
1136     ITensorPack prep_pack{ { TensorType::ACL_SRC_1, &weight }, { TensorType::ACL_SRC_2, &bias } };
1137 
1138     auto mg = MemoryGroup{};
1139     auto ws = manage_workspace<Tensor>(conv->workspace(), mg, run_pack, prep_pack);
1140 
1141     auto run_conv = [&]() -> Tensor
1142     {
1143         auto dst = create_tensor<Tensor>(dst_info);
1144         dst.allocator()->allocate();
1145         run_pack.add_tensor(TensorType::ACL_DST, &dst);
1146 
1147         library->fill_tensor_value(Accessor(src), 1.f);
1148         library->fill_tensor_value(Accessor(weight), 2.f);
1149         library->fill_tensor_value(Accessor(bias), 3.f);
1150         // This operator is configured once and captured by this lambda.
1151         conv->prepare(prep_pack);
1152         conv->run(run_pack);
1153         return dst;
1154     };
1155     auto result_0 = run_conv();
1156     auto result_1 = run_conv();
1157     for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i)
1158     {
1159         ARM_COMPUTE_EXPECT(((float *)result_0.buffer())[i] == ((float *)result_1.buffer())[i], framework::LogLevel::ERRORS);
1160     }
1161 }
1162 
1163 /** Test case for memory injection in @ref NEGEMMConv2d.
1164  *
1165  * Make sure @ref NEGEMMConv2d still works through injecting the memory at configure time using the old API.
1166  *
1167  * Checks performed in order:
1168  * - Both runs compute the same output
1169  */
TEST_CASE(MultipleExecutionWithConfigure,framework::DatasetMode::ALL)1170 TEST_CASE(MultipleExecutionWithConfigure, framework::DatasetMode::ALL)
1171 {
1172     auto       conv        = std::make_unique<NEGEMMConv2d>();
1173     const auto src_info    = TensorInfo(TensorShape(1U, 5U, 2U), 1, DataType::F32, DataLayout::NHWC);
1174     const auto weight_info = TensorInfo(TensorShape(1U, 3U, 2U, 3U), 1, DataType::F32, DataLayout::NHWC);
1175     const auto bias_info   = TensorInfo(TensorShape(3U), 1, DataType::F32, DataLayout::NHWC);
1176     auto       dst_info    = TensorInfo(TensorShape(1U, 7U, 3U), 1, DataType::F32, DataLayout::NHWC);
1177     const auto conv_info   = Conv2dInfo{};
1178     auto       run_conv    = [&]()
1179     {
1180         auto src    = create_tensor<Tensor>(src_info);
1181         auto weight = create_tensor<Tensor>(weight_info);
1182         auto bias   = create_tensor<Tensor>(bias_info);
1183         auto dst    = create_tensor<Tensor>(dst_info);
1184         conv->configure(&src, &weight, &bias, &dst, conv_info);
1185         src.allocator()->allocate();
1186         weight.allocator()->allocate();
1187         bias.allocator()->allocate();
1188         dst.allocator()->allocate();
1189         library->fill_tensor_value(Accessor(src), 1.f);
1190         library->fill_tensor_value(Accessor(weight), 2.f);
1191         library->fill_tensor_value(Accessor(bias), 3.f);
1192         conv->run();
1193         return dst;
1194     };
1195     auto result_0 = run_conv();
1196     auto result_1 = run_conv();
1197     for(size_t i = 0; i < result_0.info()->tensor_shape().total_size(); ++i)
1198     {
1199         ARM_COMPUTE_EXPECT(((float *)result_0.buffer())[i] == ((float *)result_1.buffer())[i], framework::LogLevel::ERRORS);
1200     }
1201 }
1202 
1203 TEST_SUITE(Float)
TEST_SUITE(FP32)1204 TEST_SUITE(FP32)
1205 FIXTURE_DATA_TEST_CASE(RunSmall, NEDirectGEMMConv2dLayerFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
1206                                                                                                                      framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("DataLayout", { DataLayout::NHWC })), ActivationFunctionsDataset))
1207 {
1208     // Validate output
1209     validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, float(abs_tolerance_f32));
1210 }
1211 TEST_SUITE_END() // FP32
1212 TEST_SUITE_END() // Float
1213 
1214 #ifdef __aarch64__
1215 template <typename T>
1216 using NEDirectGEMMConv2dLayerQuantizedFixture = ConvolutionValidationQuantizedFixture<Tensor, Accessor, NEGEMMConv2d, T>;
1217 
1218 template <typename T>
1219 using NEDirectGEMMConv2dLayerQuantizedPerChannelFixture = ConvolutionValidationQuantizedPerChannelFixture<Tensor, Accessor, NEGEMMConv2d, T, int8_t>;
1220 
1221 const auto QuantizedActivationFunctionsDataset = framework::dataset::make("ActivationInfo",
1222 {
1223     ActivationLayerInfo(),
1224     ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
1225     ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f)
1226 });
1227 TEST_SUITE(Quantized)
TEST_SUITE(QASYMM8)1228 TEST_SUITE(QASYMM8)
1229 FIXTURE_DATA_TEST_CASE(RunSmall, NEDirectGEMMConv2dLayerQuantizedFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
1230                                                                                                                         framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::QASYMM8)), framework::dataset::make("DataLayout", { DataLayout::NHWC })),
1231                                                                                                                         framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })), QuantizedActivationFunctionsDataset))
1232 {
1233     // Validate output
1234     validate(Accessor(_target), _reference, tolerance_qasymm8);
1235 }
1236 TEST_SUITE_END() // QASYMM8
1237 
TEST_SUITE(QASYMM8_SIGNED)1238 TEST_SUITE(QASYMM8_SIGNED)
1239 FIXTURE_DATA_TEST_CASE(RunSmall, NEDirectGEMMConv2dLayerQuantizedFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
1240                                                                                                                        framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), framework::dataset::make("DataLayout", { DataLayout::NHWC })),
1241                                                                                                                        framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.01f, -10) })), QuantizedActivationFunctionsDataset))
1242 {
1243     // Validate output
1244     validate(Accessor(_target), _reference, tolerance_qasymm8);
1245 }
1246 TEST_SUITE_END() // QASYMM8_SIGNED
1247 
TEST_SUITE(QSYMM8_PER_CHANNEL)1248 TEST_SUITE(QSYMM8_PER_CHANNEL)
1249 FIXTURE_DATA_TEST_CASE(RunSmallSigned, NEDirectGEMMConv2dLayerQuantizedPerChannelFixture<int8_t>, framework::DatasetMode::ALL,
1250                        combine(combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
1251                                                                        framework::dataset::make("ReshapeWeights", { true })),
1252                                                                framework::dataset::make("DataType", { DataType::QASYMM8_SIGNED })),
1253                                                        framework::dataset::make("DataLayout", { DataLayout::NHWC })),
1254                                                QuantizationData),
1255                                        QuantizedActivationFunctionsDataset),
1256                                framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL })))
1257 {
1258     // Validate output
1259     validate(Accessor(_target), _reference, tolerance_qasymm8);
1260 }
1261 TEST_SUITE_END() // QSYMM8_PER_CHANNEL
1262 TEST_SUITE_END() // Quantized
1263 #endif           // __aarch64__
1264 
1265 TEST_SUITE_END() // DirectGEMMConv2d
1266 
1267 TEST_SUITE_END() // Neon
1268 } // namespace validation
1269 } // namespace test
1270 } // namespace arm_compute
1271