xref: /aosp_15_r20/external/ComputeLibrary/tests/validation/reference/ReductionOperation.cpp (revision c217d954acce2dbc11938adb493fc0abd69584f3)
1 /*
2  * Copyright (c) 2017-2020, 2023 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #include "ReductionOperation.h"
25 #include "tests/validation/Helpers.h"
26 
27 #include <algorithm>
28 #include <cmath>
29 
30 namespace arm_compute
31 {
32 namespace test
33 {
34 namespace validation
35 {
36 namespace reference
37 {
38 namespace
39 {
40 template <typename T, typename OT>
reduce_operation(const T * ptr,int reduce_elements,ReductionOperation op,int stride,RoundingPolicy policy)41 OT reduce_operation(const T *ptr, int reduce_elements, ReductionOperation op, int stride, RoundingPolicy policy)
42 {
43     using type = typename std::remove_cv<OT>::type;
44     T res;
45     switch(op)
46     {
47         case ReductionOperation::PROD:
48         {
49             res = type(1);
50         }
51         break;
52         case ReductionOperation::MIN:
53         case ReductionOperation::MAX:
54         {
55             res = *ptr;
56         }
57         break;
58         default:
59         {
60             res = type(0);
61         }
62     }
63 
64     if(std::is_integral<type>::value)
65     {
66         auto int_res = static_cast<int32_t>(res);
67         for(int i = 0; i < reduce_elements; ++i)
68         {
69             auto elem = *(ptr + stride * i);
70 
71             switch(op)
72             {
73                 case ReductionOperation::MIN:
74                     if(static_cast<T>(int_res) > elem)
75                     {
76                         int_res = elem;
77                     }
78                     break;
79                 case ReductionOperation::MAX:
80                     if(static_cast<T>(int_res) < elem)
81                     {
82                         int_res = elem;
83                     }
84                     break;
85                 case ReductionOperation::SUM_SQUARE:
86                     int_res += elem * elem;
87                     break;
88                 case ReductionOperation::MEAN_SUM:
89                 case ReductionOperation::SUM:
90                     int_res += elem;
91                     break;
92                 case ReductionOperation::PROD:
93                     int_res *= elem;
94                     break;
95                 default:
96                     ARM_COMPUTE_ERROR("Operation not supported");
97             }
98         }
99         if(op == ReductionOperation::MEAN_SUM && reduce_elements > 0)
100         {
101             // Only use rounding in aarch64 to be consistent with kernel
102 #ifdef __aarch64__
103             // Divide in float format, then rounded to nearest and implicitly cast back to int
104             int_res = round(static_cast<float>(int_res) / static_cast<float>(reduce_elements), policy);
105 #else  // defined(__aarch64__)
106             ARM_COMPUTE_UNUSED(policy);
107             int_res /= reduce_elements; // Legacy compatibility
108 #endif // __aarch64
109         }
110         res = static_cast<type>(int_res);
111     }
112     else
113     {
114         for(int i = 0; i < reduce_elements; ++i)
115         {
116             auto elem = *(ptr + stride * i);
117             switch(op)
118             {
119                 case ReductionOperation::MIN:
120                     if(res > elem)
121                     {
122                         res = elem;
123                     }
124                     break;
125                 case ReductionOperation::MAX:
126                     if(res < elem)
127                     {
128                         res = elem;
129                     }
130                     break;
131                 case ReductionOperation::SUM_SQUARE:
132                     res += elem * elem;
133                     break;
134                 case ReductionOperation::MEAN_SUM:
135                 case ReductionOperation::SUM:
136                     res += elem;
137                     break;
138                 case ReductionOperation::PROD:
139                     res *= elem;
140                     break;
141                 default:
142                     ARM_COMPUTE_ERROR("Operation not supported");
143             }
144         }
145         if(op == ReductionOperation::MEAN_SUM && reduce_elements > 0)
146         {
147             res /= reduce_elements;
148         }
149     }
150     return res;
151 }
152 
153 template <typename T, typename OT>
reduce_operation_arg_min_max(const T * ptr,int reduce_elements,ReductionOperation op,int stride)154 OT reduce_operation_arg_min_max(const T *ptr, int reduce_elements, ReductionOperation op, int stride)
155 {
156     uint32_t res = 0;
157     for(int i = 0; i < reduce_elements; ++i)
158     {
159         auto elem = *(ptr + stride * i);
160         switch(op)
161         {
162             case ReductionOperation::ARG_IDX_MIN:
163                 if(*(ptr + stride * res) > elem)
164                 {
165                     res = static_cast<uint32_t>(i);
166                 }
167                 break;
168             case ReductionOperation::ARG_IDX_MAX:
169                 if(*(ptr + stride * res) < elem)
170                 {
171                     res = static_cast<uint32_t>(i);
172                 }
173                 break;
174             default:
175                 ARM_COMPUTE_ERROR("Operation not supported");
176         }
177     }
178     return static_cast<OT>(res);
179 }
180 
181 } // namespace
182 
183 template <typename T, typename OT>
compute_reduction_operation(const SimpleTensor<T> & src,const TensorShape & dst_shape,unsigned int axis,ReductionOperation op,RoundingPolicy policy)184 SimpleTensor<OT> compute_reduction_operation(const SimpleTensor<T> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op, RoundingPolicy policy)
185 {
186     // Create reference
187     const bool         is_arg_min_max   = (op == ReductionOperation::ARG_IDX_MIN || op == ReductionOperation::ARG_IDX_MAX);
188     DataType           output_data_type = is_arg_min_max ? DataType::S32 : src.data_type();
189     SimpleTensor<OT>   dst{ dst_shape, output_data_type, 1, src.quantization_info() };
190     const unsigned int src_width    = src.shape().x();
191     const unsigned int src_height   = src.shape().y();
192     const unsigned int src_depth    = src.shape().z();
193     const unsigned int src_batch    = src.shape()[3];
194     const int          reduce_elems = src.shape()[axis];
195 
196     switch(axis)
197     {
198         case 0:
199         {
200             const unsigned int upper_dims = src.shape().total_size_upper(1);
201             for(unsigned int du = 0; du < upper_dims; ++du)
202             {
203                 const T *src_row_ptr = src.data() + du * reduce_elems;
204                 dst[du]              = is_arg_min_max ?
205                                        reduce_operation_arg_min_max<T, OT>(src_row_ptr, reduce_elems, op, 1) :
206                                        reduce_operation<T, OT>(src_row_ptr, reduce_elems, op, 1, policy);
207             }
208         }
209         break;
210         case 1:
211         {
212             const unsigned int upper_dims = src.shape().total_size_upper(2);
213             for(unsigned int du = 0; du < upper_dims; ++du)
214             {
215                 for(unsigned int x = 0; x < src_width; ++x)
216                 {
217                     const int in_offset   = du * src_height * src_width + x;
218                     const int out_offset  = du * src_width + x;
219                     const T *src_row_ptr = src.data() + in_offset;
220                     dst[out_offset]       = is_arg_min_max ?
221                                             reduce_operation_arg_min_max<T, OT>(src_row_ptr, reduce_elems, op, src_width) :
222                                             reduce_operation<T, OT>(src_row_ptr, reduce_elems, op, src_width, policy);
223                 }
224             }
225         }
226         break;
227         case 2:
228         {
229             const unsigned int upper_dims = src.shape().total_size_upper(3);
230             for(unsigned int du = 0; du < upper_dims; ++du)
231             {
232                 for(unsigned int x = 0; x < src_width; ++x)
233                 {
234                     for(unsigned int y = 0; y < src_height; ++y)
235                     {
236                         const int in_offset   = du * src_depth * src_height * src_width + y * src_width + x;
237                         const int out_offset  = du * src_width * src_height + y * src_width + x;
238                         const T *src_row_ptr = src.data() + in_offset;
239                         dst[out_offset]       = is_arg_min_max ?
240                                                 reduce_operation_arg_min_max<T, OT>(src_row_ptr, reduce_elems, op, src_width * src_height) :
241                                                 reduce_operation<T, OT>(src_row_ptr, reduce_elems, op, src_width * src_height, policy);
242                     }
243                 }
244             }
245         }
246         break;
247         case 3:
248         {
249             const unsigned int upper_dims = src.shape().total_size_upper(4);
250             for(unsigned int du = 0; du < upper_dims; ++du)
251             {
252                 for(unsigned int z = 0; z < src_depth; ++z)
253                 {
254                     for(unsigned int y = 0; y < src_height; ++y)
255                     {
256                         for(unsigned int x = 0; x < src_width; ++x)
257                         {
258                             const int in_offset   = du * src_batch * src_depth * src_height * src_width + z * src_width * src_height + y * src_width + x;
259                             const int out_offset  = du * src_depth * src_height * src_width + z * src_width * src_height + y * src_width + x;
260                             const T *src_row_ptr = src.data() + in_offset;
261                             dst[out_offset]       = is_arg_min_max ?
262                                                     reduce_operation_arg_min_max<T, OT>(src_row_ptr, reduce_elems, op, src_width * src_height * src_depth) :
263                                                     reduce_operation<T, OT>(src_row_ptr, reduce_elems, op, src_width * src_height * src_depth, policy);
264                         }
265                     }
266                 }
267             }
268         }
269         break;
270         default:
271             ARM_COMPUTE_ERROR("Unsupported reduction axis");
272     }
273 
274     return dst;
275 }
276 
277 template <typename T, typename OT>
reduction_operation(const SimpleTensor<T> & src,const TensorShape & dst_shape,unsigned int axis,ReductionOperation op,QuantizationInfo quantization_info_output,RoundingPolicy policy)278 SimpleTensor<OT> reduction_operation(const SimpleTensor<T> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op, QuantizationInfo quantization_info_output, RoundingPolicy policy)
279 {
280     ARM_COMPUTE_UNUSED(quantization_info_output);
281     return compute_reduction_operation<T, OT>(src, dst_shape, axis, op, policy);
282 }
283 
284 template <>
reduction_operation(const SimpleTensor<uint8_t> & src,const TensorShape & dst_shape,unsigned int axis,ReductionOperation op,QuantizationInfo quantization_info_output,RoundingPolicy policy)285 SimpleTensor<uint8_t> reduction_operation(const SimpleTensor<uint8_t> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op, QuantizationInfo quantization_info_output, RoundingPolicy policy)
286 {
287     if(src.data_type() == DataType::QASYMM8)
288     {
289         // If the operation is MEAN_SUM, we can directly use the uint8 implementation without taking into account scale and offset
290         if(op == ReductionOperation::MEAN_SUM && src.quantization_info() == quantization_info_output)
291         {
292             return compute_reduction_operation<uint8_t, uint8_t>(src, dst_shape, axis, op, policy);
293         }
294         else
295         {
296             SimpleTensor<float> src_f = convert_from_asymmetric(src);
297             SimpleTensor<float> dst_f = reference::reduction_operation<float, float>(src_f, dst_shape, axis, op);
298             return convert_to_asymmetric<uint8_t>(dst_f, quantization_info_output);
299         }
300     }
301     else
302     {
303         return compute_reduction_operation<uint8_t, uint8_t>(src, dst_shape, axis, op, policy);
304     }
305 }
306 
307 template <>
reduction_operation(const SimpleTensor<int8_t> & src,const TensorShape & dst_shape,unsigned int axis,ReductionOperation op,QuantizationInfo quantization_info_output,RoundingPolicy policy)308 SimpleTensor<int8_t> reduction_operation(const SimpleTensor<int8_t> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op, QuantizationInfo quantization_info_output, RoundingPolicy policy)
309 {
310     if(src.data_type() == DataType::QASYMM8_SIGNED)
311     {
312         // If the operation is MEAN_SUM, we can directly use the int8 implementation without taking into account scale and offset
313         if(op == ReductionOperation::MEAN_SUM && src.quantization_info() == quantization_info_output)
314         {
315             return compute_reduction_operation<int8_t, int8_t>(src, dst_shape, axis, op, policy);
316         }
317         else
318         {
319             SimpleTensor<float> src_f = convert_from_asymmetric(src);
320             SimpleTensor<float> dst_f = reference::reduction_operation<float, float>(src_f, dst_shape, axis, op);
321             return convert_to_asymmetric<int8_t>(dst_f, quantization_info_output);
322         }
323     }
324     else
325     {
326         return compute_reduction_operation<int8_t, int8_t>(src, dst_shape, axis, op, policy);
327     }
328 }
329 
330 template SimpleTensor<float> reduction_operation(const SimpleTensor<float> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op,
331                                                  QuantizationInfo quantization_info_output = QuantizationInfo(), RoundingPolicy policy = RoundingPolicy::TO_ZERO);
332 template SimpleTensor<half> reduction_operation(const SimpleTensor<half> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op,
333                                                 QuantizationInfo quantization_info_output = QuantizationInfo(), RoundingPolicy policy = RoundingPolicy::TO_ZERO);
334 
335 template SimpleTensor<int32_t> reduction_operation(const SimpleTensor<float> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op,
336                                                    QuantizationInfo quantization_info_output = QuantizationInfo(), RoundingPolicy policy = RoundingPolicy::TO_ZERO);
337 template SimpleTensor<int32_t> reduction_operation(const SimpleTensor<int32_t> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op,
338                                                    QuantizationInfo quantization_info_output = QuantizationInfo(), RoundingPolicy policy = RoundingPolicy::TO_ZERO);
339 template SimpleTensor<int32_t> reduction_operation(const SimpleTensor<half> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op,
340                                                    QuantizationInfo quantization_info_output = QuantizationInfo(), RoundingPolicy policy = RoundingPolicy::TO_ZERO);
341 template SimpleTensor<int32_t> reduction_operation(const SimpleTensor<uint8_t> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op,
342                                                    QuantizationInfo quantization_info_output = QuantizationInfo(), RoundingPolicy policy = RoundingPolicy::TO_ZERO);
343 template SimpleTensor<int32_t> reduction_operation(const SimpleTensor<int8_t> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op,
344                                                    QuantizationInfo quantization_info_output = QuantizationInfo(), RoundingPolicy policy = RoundingPolicy::TO_ZERO);
345 
346 } // namespace reference
347 } // namespace validation
348 } // namespace test
349 } // namespace arm_compute
350