xref: /aosp_15_r20/external/ComputeLibrary/tests/validation/fixtures/ConvolutionLayerFixture.h (revision c217d954acce2dbc11938adb493fc0abd69584f3)
1 /*
2  * Copyright (c) 2017-2023 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifndef ARM_COMPUTE_TEST_CONVOLUTION_LAYER_FIXTURE
25 #define ARM_COMPUTE_TEST_CONVOLUTION_LAYER_FIXTURE
26 
27 #include "arm_compute/core/TensorShape.h"
28 #include "arm_compute/core/Types.h"
29 #include "arm_compute/graph/Utils.h"
30 #ifdef ARM_COMPUTE_OPENCL_ENABLED
31 #include "arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h"
32 #endif // ARM_COMPUTE_OPENCL_ENABLED
33 #include "arm_compute/runtime/NEON/NEScheduler.h"
34 #include "src/core/NEON/kernels/arm_gemm/utils.hpp"
35 #include "src/graph/mutators/MutatorUtils.h"
36 #include "tests/AssetsLibrary.h"
37 #include "tests/Globals.h"
38 #include "tests/IAccessor.h"
39 #include "tests/framework/Asserts.h"
40 #include "tests/framework/Fixture.h"
41 #include "tests/validation/Helpers.h"
42 #include "tests/validation/reference/ActivationLayer.h"
43 #include "tests/validation/reference/ConvolutionLayer.h"
44 #include "tests/validation/reference/PadLayer.h"
45 #include "tests/validation/reference/Permute.h"
46 #include "tests/validation/reference/Utils.h"
47 
48 #include <random>
49 #include <type_traits>
50 
51 namespace arm_compute
52 {
53 namespace test
54 {
55 namespace validation
56 {
57 namespace detail
58 {
59 template <typename ConvolutionFunction, typename TensorType>
60 #ifdef ARM_COMPUTE_OPENCL_ENABLED
61 std::enable_if_t<!std::is_same<ConvolutionFunction, CLGEMMConvolutionLayer>::value, void>
62 #else // ARM_COMPUTE_OPENCL_ENABLED
63 void
64 #endif // ARM_COMPUTE_OPENCL_ENABLED
configure_conv_function(ConvolutionFunction & func,TensorType * src,const TensorType * weights,const TensorType * bias,TensorType * dst,const PadStrideInfo & info,const WeightsInfo & weights_info,const Size2D & dilation,const ActivationLayerInfo & act_info,unsigned int num_groups)65 configure_conv_function(ConvolutionFunction &func,
66                              TensorType *src, const TensorType *weights, const TensorType *bias, TensorType *dst,
67                              const PadStrideInfo &info, const WeightsInfo &weights_info,
68                              const Size2D &dilation, const ActivationLayerInfo &act_info, unsigned int num_groups)
69 {
70     func.configure(src, weights, bias, dst, info, weights_info, dilation, act_info, false /* enable_fast_math */, num_groups);
71 }
72 
73 #ifdef ARM_COMPUTE_OPENCL_ENABLED
74 template <typename ConvolutionFunction, typename TensorType>
75 std::enable_if_t<std::is_same<ConvolutionFunction, CLGEMMConvolutionLayer>::value, void>
configure_conv_function(ConvolutionFunction & func,TensorType * src,const TensorType * weights,const TensorType * bias,TensorType * dst,const PadStrideInfo & info,const WeightsInfo & weights_info,const Size2D & dilation,const ActivationLayerInfo & act_info,unsigned int num_groups)76 configure_conv_function(ConvolutionFunction &func,
77                              TensorType *src, const TensorType *weights, const TensorType *bias, TensorType *dst,
78                              const PadStrideInfo &info, const WeightsInfo &weights_info,
79                              const Size2D &dilation, const ActivationLayerInfo &act_info, unsigned int num_groups)
80 {
81     func.configure(src, weights, bias, dst, info, weights_info, dilation, act_info, num_groups);
82 }
83 #endif // ARM_COMPUTE_OPENCL_ENABLED
84 } // namespace detail
85 
86 template <typename TensorType, typename AccessorType, typename FunctionType, typename T, typename TW>
87 class ConvolutionValidationGenericFixture : public framework::Fixture
88 {
89 public:
90     using TBias = typename std::conditional < std::is_same<typename std::decay<T>::type, uint8_t>::value
91                   || std::is_same<typename std::decay<T>::type, int8_t>::value,
92                   int32_t, T >::type;
93 
94 public:
95     template <typename...>
96     void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, Size2D dilation, bool reshape_weights,
97                DataType data_type, DataType weights_data_type, DataLayout data_layout, QuantizationInfo quantization_info, QuantizationInfo weight_quantization_info, ActivationLayerInfo act_info,
98                bool mixed_layout = false, PaddingList pre_pad_layer = PaddingList({}))
99     {
100         _mixed_layout             = mixed_layout;
101         _data_type                = data_type;
102         _weights_data_type        = weights_data_type;
103         _is_quantized             = is_data_type_quantized_asymmetric(data_type);
104         _is_bfloat16              = data_type == DataType::BFLOAT16;
105         _bias_data_type           = _is_quantized ? DataType::S32 : (_is_bfloat16 ? DataType::F32 : data_type);
106         _output_data_type         = _is_bfloat16 ? DataType::F32 : data_type;
107         _quantization_info        = quantization_info;
108         _weight_quantization_info = weight_quantization_info;
109         _data_layout              = data_layout;
110 
111         _target    = compute_target(input_shape, weights_shape, bias_shape, output_shape, info, reshape_weights, dilation, act_info, pre_pad_layer);
112         _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, info, dilation, act_info, pre_pad_layer);
113     }
114 
115 protected:
mix_layout(FunctionType & layer,TensorType & src,TensorType & dst)116     void mix_layout(FunctionType &layer, TensorType &src, TensorType &dst)
117     {
118         // Test Multi DataLayout graph cases, when the data layout changes after configure
119         src.info()->set_data_layout(_data_layout == DataLayout::NCHW ? DataLayout::NHWC : DataLayout::NCHW);
120         dst.info()->set_data_layout(_data_layout == DataLayout::NCHW ? DataLayout::NHWC : DataLayout::NCHW);
121 
122         // Compute Convolution function
123         layer.run();
124 
125         // Reinstating original data layout for the test suite to properly check the values
126         src.info()->set_data_layout(_data_layout);
127         dst.info()->set_data_layout(_data_layout);
128     }
129 
regularize_values(void * values,size_t size)130     void regularize_values(void *values, size_t size)
131     {
132         float *fvalues = static_cast<float *>(values);
133         for(size_t i = 0; i < size; ++i)
134         {
135             fvalues[i] = float(bfloat16(fvalues[i]));
136         }
137     }
138 
139     template <typename U>
fill(U && tensor,int i)140     void fill(U &&tensor, int i)
141     {
142         switch(tensor.data_type())
143         {
144             case DataType::QASYMM8:
145             {
146                 std::pair<int, int>                     bounds = get_quantized_bounds(tensor.quantization_info(), -1.0f, 1.0f);
147                 std::uniform_int_distribution<uint32_t> distribution(bounds.first, bounds.second);
148                 library->fill(tensor, distribution, i);
149                 break;
150             }
151             case DataType::QASYMM8_SIGNED:
152             {
153                 std::pair<int, int>                    bounds = get_quantized_qasymm8_signed_bounds(tensor.quantization_info(), -1.0f, 1.0f);
154                 std::uniform_int_distribution<int32_t> distribution(bounds.first, bounds.second);
155                 library->fill(tensor, distribution, i);
156                 break;
157             }
158             case DataType::QSYMM8_PER_CHANNEL:
159             {
160                 int min_bound = 128;
161                 int max_bound = -127;
162                 for(size_t i = 0; i < _weight_quantization_info.scale().size(); i++)
163                 {
164                     std::pair<int, int> bounds = get_symm_quantized_per_channel_bounds(tensor.quantization_info(), -1.0f, 1.0f, i);
165                     if(bounds.first < min_bound)
166                     {
167                         min_bound = bounds.first;
168                     }
169                     if(bounds.second > max_bound)
170                     {
171                         max_bound = bounds.second;
172                     }
173                 }
174                 std::uniform_int_distribution<int32_t> distribution(min_bound, max_bound);
175                 library->fill(tensor, distribution, i);
176                 break;
177             }
178             case DataType::S32:
179             {
180                 std::uniform_int_distribution<int32_t> distribution(-100, 100);
181                 library->fill(tensor, distribution, i);
182                 break;
183             }
184             case DataType::BFLOAT16:
185             {
186                 arm_compute::utils::uniform_real_distribution_16bit<bfloat16> distribution{ -1.0f, 1.0f };
187                 library->fill(tensor, distribution, i);
188                 break;
189             }
190             case DataType::F16:
191             {
192                 arm_compute::utils::uniform_real_distribution_16bit<half> distribution{ -1.0f, 1.0f };
193                 library->fill(tensor, distribution, i);
194                 break;
195             }
196             case DataType::F32:
197             {
198                 std::uniform_real_distribution<float> distribution(-1.0f, 1.0f);
199                 library->fill(tensor, distribution, i);
200                 break;
201             }
202             default:
203                 library->fill_tensor_uniform(tensor, i);
204         }
205     }
206 
207     // given input is IN nchw format
208     TensorType compute_target(TensorShape input_shape, TensorShape weights_shape, const TensorShape &bias_shape, TensorShape output_shape, const PadStrideInfo &info,
209                               bool reshape_weights, const Size2D &dilation, const ActivationLayerInfo act_info, PaddingList pre_pad_layer = PaddingList({}))
210     {
211         ARM_COMPUTE_ERROR_ON((input_shape[2] % weights_shape[2]) != 0);
212 
213         const unsigned int num_groups = input_shape[2] / weights_shape[2];
214 
215         if(_data_layout == DataLayout::NHWC)
216         {
217             permute(input_shape, PermutationVector(2U, 0U, 1U));
218             permute(weights_shape, PermutationVector(2U, 0U, 1U));
219             permute(output_shape, PermutationVector(2U, 0U, 1U));
220 
221             if(pre_pad_layer.size() > 0)
222             {
223                 // make sure paddings exist for each c,h,w dimensions
224                 for(unsigned int i = 0; i < 3 - pre_pad_layer.size(); ++i)
225                 {
226                     pre_pad_layer.push_back({ 0, 0 });
227                 }
228 
229                 // rotate padding info from nchw to nhwc
230                 std::rotate(pre_pad_layer.begin(), pre_pad_layer.begin() + 2, pre_pad_layer.begin() + 3);
231             }
232         }
233 
234         const int idx_width  = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::WIDTH);
235         const int idx_height = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::HEIGHT);
236 
237         WeightsInfo weights_info(!reshape_weights, weights_shape[idx_width], weights_shape[idx_height], weights_shape[3]);
238         TensorShape reshaped_weights_shape(weights_shape);
239 
240         // Create tensors
241         TensorType src     = create_tensor<TensorType>(input_shape, _data_type, 1, _quantization_info, _data_layout);
242         TensorType weights = create_tensor<TensorType>(reshaped_weights_shape, _weights_data_type, 1, _weight_quantization_info, _data_layout);
243         TensorType bias    = create_tensor<TensorType>(bias_shape, _bias_data_type, 1, _quantization_info, _data_layout);
244         TensorType dst     = create_tensor<TensorType>(output_shape, _output_data_type, 1, _quantization_info, _data_layout);
245 
246         // Create and configure function
247         FunctionType conv;
248 
249         const unsigned int height_index = arm_compute::graph::get_dimension_idx(_data_layout, DataLayoutDimension::HEIGHT);
250         const unsigned int width_index  = arm_compute::graph::get_dimension_idx(_data_layout, DataLayoutDimension::WIDTH);
251 
252         const PaddingInfo pad_w = width_index < pre_pad_layer.size() ? pre_pad_layer[width_index] : PaddingInfo(0, 0);
253         const PaddingInfo pad_h = height_index < pre_pad_layer.size() ? pre_pad_layer[height_index] : PaddingInfo(0, 0);
254 
255         if(pre_pad_layer.size() > 0 && arm_compute::graph::is_padding_in_height_or_width(_data_layout, pre_pad_layer))
256         {
257             // this is the logic implemented in NodeFusionMutator -> fuse_pad_with_convolution
258             const PadStrideInfo new_conv_info(
259                 info.stride().first,
260                 info.stride().second,
261                 info.pad_left() + pad_w.first,
262                 info.pad_right() + pad_w.second,
263                 info.pad_top() + pad_h.first,
264                 info.pad_bottom() + pad_h.second,
265                 info.round());
266             detail::configure_conv_function(conv, &src, &weights, &bias, &dst, new_conv_info, weights_info, dilation, act_info, num_groups);
267         }
268         else
269         {
270             detail::configure_conv_function(conv, &src, &weights, &bias, &dst, info, weights_info, dilation, act_info, num_groups);
271         }
272 
273         ARM_COMPUTE_ASSERT(src.info()->is_resizable());
274         ARM_COMPUTE_ASSERT(weights.info()->is_resizable());
275         ARM_COMPUTE_ASSERT(bias.info()->is_resizable());
276         ARM_COMPUTE_ASSERT(dst.info()->is_resizable());
277 
278         add_padding_x({ &src, &weights, &bias, &dst }, _data_layout);
279 
280         // Allocate tensors
281         src.allocator()->allocate();
282         weights.allocator()->allocate();
283         bias.allocator()->allocate();
284         dst.allocator()->allocate();
285 
286         ARM_COMPUTE_ASSERT(!src.info()->is_resizable());
287         ARM_COMPUTE_ASSERT(!weights.info()->is_resizable());
288         ARM_COMPUTE_ASSERT(!bias.info()->is_resizable());
289         ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
290 
291         // Fill tensors
292         fill(AccessorType(src), 0);
293         fill(AccessorType(weights), 1);
294         fill(AccessorType(bias), 2);
295 
296         if(_mixed_layout)
297         {
298             mix_layout(conv, src, dst);
299         }
300         else
301         {
302             // Compute Convolution function
303             conv.run();
304         }
305 
306         return dst;
307     }
308 
309     SimpleTensor<T> compute_reference(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape, const PadStrideInfo &info,
310                                       const Size2D &dilation, const ActivationLayerInfo act_info, PaddingList pre_pad_layer = PaddingList({}))
311     {
312         ARM_COMPUTE_ERROR_ON((input_shape[2] % weights_shape[2]) != 0);
313 
314         const unsigned int num_groups = input_shape[2] / weights_shape[2];
315 
316         // Setup reference data types
317         const DataType src_dt     = _is_bfloat16 ? DataType::F32 : _data_type;
318         const DataType weights_dt = _is_bfloat16 ? DataType::F32 : _weights_data_type;
319         const DataType bias_dt    = _is_bfloat16 ? DataType::F32 : _bias_data_type;
320 
321         // Create reference
322         SimpleTensor<T>     src{ input_shape, src_dt, 1, _quantization_info };
323         SimpleTensor<TW>    weights{ weights_shape, weights_dt, 1, _weight_quantization_info };
324         SimpleTensor<TBias> bias{ bias_shape, bias_dt, 1, _quantization_info };
325 
326         fill(src, 0);
327         fill(weights, 1);
328         fill(bias, 2);
329 
330         // Fill with bfloat16 to perform the conversion and reduce the mismatches in the output
331         if(_is_bfloat16)
332         {
333             regularize_values(static_cast<void *>(src.data()), src.num_elements());
334             regularize_values(static_cast<void *>(weights.data()), weights.num_elements());
335         }
336 
337         if(pre_pad_layer.size() > 0)
338         {
339             src = reference::pad_layer<T>(src, pre_pad_layer, PixelValue(0), PaddingMode::CONSTANT);
340         }
341 
342         return (act_info.enabled()) ? reference::activation_layer<T>(reference::convolution_layer<T>(src, weights, bias, output_shape, info, dilation, num_groups),
343                                                                      act_info) :
344                reference::convolution_layer<T>(src, weights, bias, output_shape, info, dilation, num_groups);
345     }
346 
347     TensorType       _target{};
348     SimpleTensor<T>  _reference{};
349     DataType         _data_type{};
350     DataType         _weights_data_type{};
351     DataType         _bias_data_type{};
352     DataType         _output_data_type{};
353     DataLayout       _data_layout{};
354     QuantizationInfo _quantization_info{};
355     QuantizationInfo _weight_quantization_info{};
356     bool             _is_quantized = false;
357     bool             _is_bfloat16  = false;
358     bool             _mixed_layout = false;
359 };
360 
361 template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool mixed_layout = false>
362 class ConvolutionValidationFixture : public ConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T, T>
363 {
364 public:
365     template <typename...>
setup(TensorShape input_shape,TensorShape weights_shape,TensorShape bias_shape,TensorShape output_shape,PadStrideInfo info,Size2D dilation,bool reshape_weights,DataType data_type,DataLayout data_layout,ActivationLayerInfo act_info)366     void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, Size2D dilation, bool reshape_weights, DataType data_type,
367                DataLayout data_layout, ActivationLayerInfo act_info)
368     {
369         ConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T, T>::setup(input_shape, weights_shape, bias_shape, output_shape, info, dilation, reshape_weights,
370                                                                                                  data_type, data_type, data_layout,
371                                                                                                  QuantizationInfo(), QuantizationInfo(), act_info, mixed_layout);
372     }
373 };
374 
375 template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool mixed_layout = false>
376 class ConvolutionValidationWithPaddingFixture : public ConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T, T>
377 {
378 public:
379     template <typename...>
380     void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, Size2D dilation, bool reshape_weights, DataType data_type,
381                DataLayout data_layout, ActivationLayerInfo act_info, PaddingList pre_pad_layer = PaddingList({}))
382     {
383         ConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T, T>::setup(input_shape, weights_shape, bias_shape, output_shape, info, dilation, reshape_weights,
384                                                                                                  data_type, data_type, data_layout,
385                                                                                                  QuantizationInfo(), QuantizationInfo(), act_info, mixed_layout, pre_pad_layer);
386     }
387 };
388 
389 template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool mixed_layout = false>
390 class ConvolutionValidationQuantizedFixture : public ConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T, T>
391 {
392 public:
393     template <typename...>
setup(TensorShape input_shape,TensorShape weights_shape,TensorShape bias_shape,TensorShape output_shape,PadStrideInfo info,Size2D dilation,bool reshape_weights,DataType data_type,DataLayout data_layout,QuantizationInfo quantization_info,ActivationLayerInfo act_info)394     void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, Size2D dilation, bool reshape_weights, DataType data_type,
395                DataLayout data_layout, QuantizationInfo quantization_info, ActivationLayerInfo act_info)
396     {
397         ConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T, T>::setup(input_shape, weights_shape, bias_shape, output_shape, info, dilation, reshape_weights,
398                                                                                                  data_type, data_type, data_layout, quantization_info, quantization_info, act_info, mixed_layout);
399     }
400 };
401 
402 template <typename TensorType, typename AccessorType, typename FunctionType, typename T, typename TW>
403 class ConvolutionValidationQuantizedPerChannelFixture : public ConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T, TW>
404 {
405 public:
406     template <typename...>
setup(TensorShape input_shape,TensorShape weights_shape,TensorShape bias_shape,TensorShape output_shape,PadStrideInfo info,Size2D dilation,bool reshape_weights,DataType data_type,DataLayout data_layout,QuantizationInfo quantization_info,ActivationLayerInfo act_info,DataType weights_data_type)407     void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, Size2D dilation, bool reshape_weights, DataType data_type,
408                DataLayout data_layout, QuantizationInfo quantization_info, ActivationLayerInfo act_info, DataType weights_data_type)
409     {
410         std::vector<float>                    weights_scales{};
411         std::mt19937                          gen(library->seed());
412         std::uniform_real_distribution<float> dis(0.01f, 1.f);
413         for(size_t i = 0; i < output_shape[2]; ++i)
414         {
415             weights_scales.push_back(dis(gen));
416         }
417         ConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T, TW>::setup(input_shape, weights_shape, bias_shape, output_shape, info, dilation,
418                                                                                                   reshape_weights, data_type, weights_data_type, data_layout,
419                                                                                                   quantization_info, QuantizationInfo(weights_scales), act_info);
420     }
421 };
422 
423 #ifdef ARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS
prepare_weights(const TensorInfo tensor_info,const arm_compute::WeightFormat weight_format)424 inline TensorInfo prepare_weights(const TensorInfo tensor_info, const arm_compute::WeightFormat weight_format)
425 {
426     const DataLayout data_layout = tensor_info.data_layout();
427     ARM_COMPUTE_EXPECT(data_layout == DataLayout::NHWC, framework::LogLevel::ERRORS);
428     const DataType    data_type    = tensor_info.data_type();
429     const TensorShape tensor_shape = tensor_info.tensor_shape();
430     const int         N            = tensor_shape[get_data_layout_dimension_index(data_layout, DataLayoutDimension::BATCHES)]; // N=O
431     const int         H            = tensor_shape[get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT)];
432     const int         W            = tensor_shape[get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH)];
433     const int         C            = tensor_shape[get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL)]; // C=I
434 
435     const int interleave_by = arm_compute::interleave_by(weight_format);
436     const int block_by      = arm_compute::block_by(weight_format);
437     const int Ip            = arm_gemm::roundup<unsigned int>(C, block_by);      // C'=I'
438     const int Op            = arm_gemm::roundup<unsigned int>(N, interleave_by); // O'=N'
439 
440     arm_compute::Strides strides_in_bytes = tensor_info.strides_in_bytes();
441     strides_in_bytes.set(1, Ip * interleave_by * H * W * tensor_info.element_size());
442     strides_in_bytes.set(2, Ip * Op * tensor_info.element_size());
443 
444     const size_t offset_first_element_in_bytes = tensor_info.offset_first_element_in_bytes();
445 
446     // Total size needs to include padded dimensions
447     const size_t total_size_in_bytes = Op * H * W * Ip * tensor_info.element_size();
448 
449     const TensorShape TS(Ip, W, H, Op);
450 
451     TensorInfo new_tensor_info = tensor_info;
452     new_tensor_info.init(TS, 1 /*num_channels, deprecated*/, data_type, strides_in_bytes,
453         offset_first_element_in_bytes, total_size_in_bytes);
454     return new_tensor_info;
455 }
456 
457 template <typename ScalarType, typename AccessorType>
rearrange_data(const AccessorType src,AccessorType dst,const arm_compute::WeightFormat weight_format)458 inline void rearrange_data(const AccessorType src, AccessorType dst, const arm_compute::WeightFormat weight_format)
459 {
460     ARM_COMPUTE_EXPECT(arm_compute::is_fixed_format(weight_format), framework::LogLevel::ERRORS);
461     // Data Layout: OHWIo<interleave_by>i<block_by>
462     const int         interleave_by    = arm_compute::interleave_by(weight_format);
463     const int         block_by         = arm_compute::block_by(weight_format);
464     const TensorShape src_tensor_shape = src.shape();
465     const DataLayout  data_layout      = src.data_layout();
466     ARM_COMPUTE_EXPECT(data_layout == DataLayout::NHWC, framework::LogLevel::ERRORS);
467     const unsigned int O  = src_tensor_shape[get_data_layout_dimension_index(data_layout, DataLayoutDimension::BATCHES)]; // N=O
468     const unsigned int H  = src_tensor_shape[get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT)];
469     const unsigned int W  = src_tensor_shape[get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH)];
470     const unsigned int I  = src_tensor_shape[get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL)]; // C=I
471     const unsigned int Ip = arm_gemm::roundup<unsigned int>(I, block_by);                                                 // C'=I'
472     const unsigned int Op = arm_gemm::roundup<unsigned int>(O, interleave_by);                                            // N'=O'
473 
474     ARM_COMPUTE_EXPECT_EQUAL(Op * H * W * Ip, (unsigned)dst.num_elements(), framework::LogLevel::ERRORS);
475     ARM_COMPUTE_EXPECT(src.num_elements() <= dst.num_elements(), framework::LogLevel::ERRORS);
476 
477     const ScalarType *src_ptr = reinterpret_cast<const ScalarType *>(src.data());
478     ScalarType       *dst_ptr = reinterpret_cast<ScalarType *>(dst.data());
479     for(unsigned i = 0; i < I; ++i)
480         for(unsigned w = 0; w < W; ++w)
481             for(unsigned h = 0; h < H; ++h)
482                 for(unsigned o = 0; o < O; ++o)
483                 {
484                     ScalarType src_element;
485                     switch(data_layout)
486                     {
487                         case DataLayout::NHWC:
488                         {
489                             src_element = src_ptr[o * H * W * I + h * W * I + w * I + i];
490                         }
491                         break;
492                         default:
493                         {
494                             ARM_COMPUTE_ERROR("Unsupported memory layout.");
495                         }
496                     }
497                     const int x5      = std::floor(((float)o) / interleave_by);
498                     const int x4      = h;
499                     const int x3      = w;
500                     const int x2      = std::floor((float)i / block_by);
501                     const int x1      = o % interleave_by;
502                     const int x0      = i % block_by;
503                     unsigned  dst_idx = x5 * H * W * Ip * interleave_by
504                                         + x4 * W * Ip * interleave_by
505                                         + x3 * Ip * interleave_by
506                                         + x2 * interleave_by * block_by
507                                         + x1 * block_by
508                                         + x0;
509                     dst_ptr[dst_idx] = src_element;
510                 }
511 }
512 
513 template <typename ConvolutionFunction, typename TensorClass, typename AccessorType, typename ScalarType, bool enable_fast_math>
514 class VariableWeightsFixtureBaseClass : public framework::Fixture
515 {
516 public:
517     template <typename...>
setup(TensorShape input_shape,TensorShape weights_shape,TensorShape bias_shape,TensorShape output_shape,PadStrideInfo info,Size2D dilation,DataLayout data_layout,const DataType data_type)518     void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, Size2D dilation, DataLayout data_layout,
519                const DataType data_type)
520     {
521         conv = std::make_unique<ConvolutionFunction>();
522         // prepare data
523         _data_layout = data_layout;
524         // Fixed format kernels for variable weights can work only with NHWC format.
525         ARM_COMPUTE_EXPECT_EQUAL(_data_layout, DataLayout::NHWC, framework::LogLevel::ERRORS);
526         _data_type = data_type;
527         // run the code
528         compute_target(input_shape, weights_shape, bias_shape, output_shape, info, dilation);
529         compute_reference(input_shape, weights_shape, bias_shape, output_shape, info, dilation);
530     }
teardown()531     void teardown()
532     {
533         _target.allocator()->free();
534     }
535 
536 protected:
537     template <typename U>
fill(U && tensor,int i)538     void fill(U &&tensor, int i)
539     {
540         switch(tensor.data_type())
541         {
542             case DataType::F16:
543             {
544                 arm_compute::utils::uniform_real_distribution_16bit<half> distribution{ -1.0f, 1.0f };
545                 library->fill(tensor, distribution, i);
546                 break;
547             }
548             case DataType::F32:
549             {
550                 std::uniform_real_distribution<float> distribution(-1.0f, 1.0f);
551                 library->fill(tensor, distribution, i);
552                 break;
553             }
554             default:
555                 library->fill_tensor_uniform(tensor, i);
556         }
557     }
558 
559 private:
560     virtual void configure_and_execute_kernel(TensorInfo src_tensor_info, TensorInfo weight_tensor_info, TensorInfo bias_tensor_info, TensorInfo dst_tensor_info, const WeightsInfo weights_info,
561                                               const PadStrideInfo &conv_info,
562                                               const Size2D        &dilation) = 0;
563 
compute_target(TensorShape input_shape,TensorShape weights_shape,const TensorShape & bias_shape,TensorShape output_shape,const PadStrideInfo & conv_info,const Size2D & dilation)564     void compute_target(TensorShape input_shape, TensorShape weights_shape, const TensorShape &bias_shape, TensorShape output_shape, const PadStrideInfo &conv_info,
565                         const Size2D &dilation)
566     {
567         // The dataset is always in NCHW format - we need to make C the
568         // innermost dimension because the fixed-format kernel work only
569         // with NHWC layout.
570         permute(input_shape, PermutationVector(2U, 0U, 1U));
571         permute(weights_shape, PermutationVector(2U, 0U, 1U));
572         permute(output_shape, PermutationVector(2U, 0U, 1U));
573         const auto src_tensor_info    = TensorInfo(input_shape, 1, _data_type, _data_layout);
574         const auto weight_tensor_info = TensorInfo(weights_shape, 1, _data_type, _data_layout);
575         const auto bias_tensor_info   = TensorInfo(bias_shape, 1, _data_type, _data_layout);
576         auto       dst_tensor_info    = TensorInfo(output_shape, 1, _data_type, _data_layout);
577 
578         const int kernel_height = weights_shape[get_data_layout_dimension_index(_data_layout, DataLayoutDimension::HEIGHT)];
579         const int kernel_width  = weights_shape[get_data_layout_dimension_index(_data_layout, DataLayoutDimension::WIDTH)];
580         const int num_kernels   = weights_shape[get_data_layout_dimension_index(_data_layout, DataLayoutDimension::BATCHES)];
581 
582         const WeightsInfo query_weights_info(/*reshape_weights*/ false, kernel_width, kernel_height, num_kernels, false, arm_compute::WeightFormat::ANY);
583         const bool        kernel_found = bool(ConvolutionFunction::has_opt_impl(_computed_weight_format, &src_tensor_info, &weight_tensor_info,
584                                                                                 &bias_tensor_info, &dst_tensor_info, conv_info, query_weights_info));
585         // Make surethat the setup founds a fixed-format kernel as requested by the test case.
586         ARM_COMPUTE_EXPECT(kernel_found, framework::LogLevel::ERRORS);
587         ARM_COMPUTE_EXPECT(arm_compute::is_fixed_format(_computed_weight_format), framework::LogLevel::ERRORS);
588 
589         const WeightsInfo weights_info(/*reshape_weights*/ false, kernel_width, kernel_height, num_kernels, false, _computed_weight_format);
590         configure_and_execute_kernel(src_tensor_info, weight_tensor_info, bias_tensor_info, dst_tensor_info, weights_info, conv_info,
591                                      dilation);
592     }
compute_reference(const TensorShape & input_shape,const TensorShape & weights_shape,const TensorShape & bias_shape,const TensorShape & output_shape,const PadStrideInfo & info,const Size2D & dilation)593     void compute_reference(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape, const PadStrideInfo &info,
594                            const Size2D &dilation)
595     {
596         ARM_COMPUTE_UNUSED(input_shape, weights_shape, bias_shape, output_shape, info,
597                            dilation);
598 
599         // Create reference
600         SimpleTensor<ScalarType> src{ input_shape, _data_type };
601         SimpleTensor<ScalarType> weights{ weights_shape, _data_type };
602         SimpleTensor<ScalarType> bias{ bias_shape, _data_type };
603         fill(src, 0);
604         fill(bias, 1);
605         fill(weights, 3);
606         _reference = reference::convolution_layer<ScalarType>(src, weights, bias, output_shape, info, dilation, 1 /*num_groups*/);
607     }
608     DataLayout _data_layout{};
609     DataType   _data_type{};
610 
611 protected:
612     std::unique_ptr<ConvolutionFunction> conv{};
613     arm_compute::WeightFormat            _computed_weight_format{ arm_compute::WeightFormat::UNSPECIFIED };
614     TensorClass                          _target{};
615     SimpleTensor<ScalarType>             _reference{};
616 };
617 
618 template <typename ConvolutionFunction, typename TensorClass, typename AccessorType, typename ScalarType, bool enable_fast_math>
619 class VariableWeightsFixture : public VariableWeightsFixtureBaseClass<ConvolutionFunction, TensorClass, AccessorType, ScalarType, enable_fast_math>
620 {
configure_and_execute_kernel(TensorInfo src_tensor_info,TensorInfo weight_tensor_info,TensorInfo bias_tensor_info,TensorInfo dst_tensor_info,const WeightsInfo weights_info,const PadStrideInfo & conv_info,const Size2D & dilation)621     void configure_and_execute_kernel(TensorInfo src_tensor_info, TensorInfo weight_tensor_info, TensorInfo bias_tensor_info, TensorInfo dst_tensor_info, const WeightsInfo weights_info,
622                                       const PadStrideInfo &conv_info,
623                                       const Size2D        &dilation)
624     {
625         this->conv->configure(&src_tensor_info, &weight_tensor_info, &bias_tensor_info, &dst_tensor_info, conv_info, weights_info, dilation, ActivationLayerInfo(), enable_fast_math);
626 
627         // Allocate input tensors
628         auto             src                 = create_tensor<TensorClass>(src_tensor_info);
629         auto             weights_original    = create_tensor<TensorClass>(weight_tensor_info);
630         const TensorInfo new_tensor_info     = prepare_weights(weight_tensor_info, this->_computed_weight_format);
631         auto             weights_transformed = create_tensor<TensorClass>(new_tensor_info);
632         auto             bias                = create_tensor<TensorClass>(bias_tensor_info);
633         src.allocator()->allocate();
634         weights_original.allocator()->allocate();
635         weights_transformed.allocator()->allocate();
636         bias.allocator()->allocate();
637         // Allocate destination tensor
638         this->_target = create_tensor<TensorClass>(dst_tensor_info);
639         this->_target.allocator()->allocate();
640 
641         // Prepare source and biases that are left unchanged.
642         this->fill(AccessorType(src), 0);
643         this->fill(AccessorType(bias), 1);
644 
645         // First run
646         this->fill(AccessorType(weights_original), 2);
647         rearrange_data<ScalarType, AccessorType>(AccessorType(weights_original), AccessorType(weights_transformed), this->_computed_weight_format);
648         ITensorPack run_pack{ { TensorType::ACL_SRC_0, &src }, { TensorType::ACL_SRC_1, &weights_transformed }, { TensorType::ACL_SRC_2, &bias }, { TensorType::ACL_DST, &(this->_target) } };
649         this->conv->run(run_pack);
650         // Second run, with new weights
651         this->fill(AccessorType(weights_original), 3);
652         rearrange_data<ScalarType, AccessorType>(AccessorType(weights_original), AccessorType(weights_transformed), this->_computed_weight_format);
653         this->conv->run(run_pack);
654         src.allocator()->free();
655         weights_original.allocator()->free();
656         weights_transformed.allocator()->free();
657         bias.allocator()->free();
658     }
659 };
660 
661 template <typename ConvolutionFunction, typename TensorClass, typename AccessorType, typename ScalarType, bool enable_fast_math>
662 class VariableWeightsFixtureNEInterface : public VariableWeightsFixtureBaseClass<ConvolutionFunction, TensorClass, AccessorType, ScalarType, enable_fast_math>
663 {
configure_and_execute_kernel(TensorInfo src_tensor_info,TensorInfo weight_tensor_info,TensorInfo bias_tensor_info,TensorInfo dst_tensor_info,const WeightsInfo weights_info,const PadStrideInfo & conv_info,const Size2D & dilation)664     void configure_and_execute_kernel(TensorInfo src_tensor_info, TensorInfo weight_tensor_info, TensorInfo bias_tensor_info, TensorInfo dst_tensor_info, const WeightsInfo weights_info,
665                                       const PadStrideInfo &conv_info,
666                                       const Size2D        &dilation)
667     {
668         // Allocate input tensors
669         auto             src                 = create_tensor<TensorClass>(src_tensor_info);
670         auto             weights_original    = create_tensor<TensorClass>(weight_tensor_info);
671         const TensorInfo new_tensor_info     = prepare_weights(weight_tensor_info, this->_computed_weight_format);
672         auto             weights_transformed = create_tensor<TensorClass>(new_tensor_info);
673         auto             bias                = create_tensor<TensorClass>(bias_tensor_info);
674         src.allocator()->allocate();
675         weights_original.allocator()->allocate();
676         weights_transformed.allocator()->allocate();
677         bias.allocator()->allocate();
678         // Allocate destination tensor
679         this->_target = create_tensor<TensorClass>(dst_tensor_info);
680         this->_target.allocator()->allocate();
681         this->conv->configure(&src, &weights_transformed, &bias, &(this->_target), conv_info, weights_info, dilation, ActivationLayerInfo(), enable_fast_math);
682         // Prepare source and biases that are left unchanged.
683         this->fill(AccessorType(src), 0);
684         this->fill(AccessorType(bias), 1);
685 
686         // First run
687         this->fill(AccessorType(weights_original), 2);
688         rearrange_data<ScalarType, AccessorType>(AccessorType(weights_original), AccessorType(weights_transformed), this->_computed_weight_format);
689         this->conv->run();
690         // Second run, with new weights
691         this->fill(AccessorType(weights_original), 3);
692         rearrange_data<ScalarType, AccessorType>(AccessorType(weights_original), AccessorType(weights_transformed), this->_computed_weight_format);
693         this->conv->run();
694         src.allocator()->free();
695         weights_original.allocator()->free();
696         weights_transformed.allocator()->free();
697         bias.allocator()->free();
698     }
699 };
700 
701 template <typename ConvolutionClass, bool enable_fast_math>
702 class HasOptImplFixture : public framework::Fixture
703 {
704 public:
705     template <typename...>
setup(DataType data_type,arm_compute::WeightFormat query_weight_format)706     void setup(DataType data_type, arm_compute::WeightFormat query_weight_format)
707     {
708         auto              conv        = std::make_unique<ConvolutionClass>();
709         const auto        src_info    = TensorInfo(TensorShape(56U, 56U, 64U), 1, data_type, DataLayout::NHWC);
710         const auto        weight_info = TensorInfo(TensorShape(64, 3U, 3U, 64U), 1, enable_fast_math ? DataType::BFLOAT16 : data_type, DataLayout::NHWC);
711         const auto        bias_info   = TensorInfo(TensorShape(64U), 1, data_type, DataLayout::NHWC);
712         auto              dst_info    = TensorInfo(TensorShape(56U, 56U, 64U), 1, data_type, DataLayout::NHWC);
713         const auto        conv_info   = PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::FLOOR);
714         const WeightsInfo weights_info(false, 3U, 3U, 64U, false, query_weight_format);
715         _kernel_found = bool(ConvolutionClass::has_opt_impl(_computed_weight_format, &src_info, &weight_info,
716                                                             &bias_info, &dst_info, conv_info, weights_info,
717                                                             /*dilation*/ Size2D(1U, 1U), /*act_info*/ ActivationLayerInfo(), enable_fast_math));
718     }
719 
720 protected:
721     bool                      _kernel_found{ false };
722     arm_compute::WeightFormat _computed_weight_format{ arm_compute::WeightFormat::UNSPECIFIED };
723 };
724 #endif // ARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS
725 
726 } // namespace validation
727 } // namespace test
728 } // namespace arm_compute
729 #endif /* ARM_COMPUTE_TEST_CONVOLUTION_LAYER_FIXTURE */
730