1 /*
2 * Copyright (c) 2017-2020, 2023 Arm Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24 #include "ReductionOperation.h"
25 #include "tests/validation/Helpers.h"
26
27 #include <algorithm>
28 #include <cmath>
29
30 namespace arm_compute
31 {
32 namespace test
33 {
34 namespace validation
35 {
36 namespace reference
37 {
38 namespace
39 {
40 template <typename T, typename OT>
reduce_operation(const T * ptr,int reduce_elements,ReductionOperation op,int stride,RoundingPolicy policy)41 OT reduce_operation(const T *ptr, int reduce_elements, ReductionOperation op, int stride, RoundingPolicy policy)
42 {
43 using type = typename std::remove_cv<OT>::type;
44 T res;
45 switch(op)
46 {
47 case ReductionOperation::PROD:
48 {
49 res = type(1);
50 }
51 break;
52 case ReductionOperation::MIN:
53 case ReductionOperation::MAX:
54 {
55 res = *ptr;
56 }
57 break;
58 default:
59 {
60 res = type(0);
61 }
62 }
63
64 if(std::is_integral<type>::value)
65 {
66 auto int_res = static_cast<int32_t>(res);
67 for(int i = 0; i < reduce_elements; ++i)
68 {
69 auto elem = *(ptr + stride * i);
70
71 switch(op)
72 {
73 case ReductionOperation::MIN:
74 if(static_cast<T>(int_res) > elem)
75 {
76 int_res = elem;
77 }
78 break;
79 case ReductionOperation::MAX:
80 if(static_cast<T>(int_res) < elem)
81 {
82 int_res = elem;
83 }
84 break;
85 case ReductionOperation::SUM_SQUARE:
86 int_res += elem * elem;
87 break;
88 case ReductionOperation::MEAN_SUM:
89 case ReductionOperation::SUM:
90 int_res += elem;
91 break;
92 case ReductionOperation::PROD:
93 int_res *= elem;
94 break;
95 default:
96 ARM_COMPUTE_ERROR("Operation not supported");
97 }
98 }
99 if(op == ReductionOperation::MEAN_SUM && reduce_elements > 0)
100 {
101 // Only use rounding in aarch64 to be consistent with kernel
102 #ifdef __aarch64__
103 // Divide in float format, then rounded to nearest and implicitly cast back to int
104 int_res = round(static_cast<float>(int_res) / static_cast<float>(reduce_elements), policy);
105 #else // defined(__aarch64__)
106 ARM_COMPUTE_UNUSED(policy);
107 int_res /= reduce_elements; // Legacy compatibility
108 #endif // __aarch64
109 }
110 res = static_cast<type>(int_res);
111 }
112 else
113 {
114 for(int i = 0; i < reduce_elements; ++i)
115 {
116 auto elem = *(ptr + stride * i);
117 switch(op)
118 {
119 case ReductionOperation::MIN:
120 if(res > elem)
121 {
122 res = elem;
123 }
124 break;
125 case ReductionOperation::MAX:
126 if(res < elem)
127 {
128 res = elem;
129 }
130 break;
131 case ReductionOperation::SUM_SQUARE:
132 res += elem * elem;
133 break;
134 case ReductionOperation::MEAN_SUM:
135 case ReductionOperation::SUM:
136 res += elem;
137 break;
138 case ReductionOperation::PROD:
139 res *= elem;
140 break;
141 default:
142 ARM_COMPUTE_ERROR("Operation not supported");
143 }
144 }
145 if(op == ReductionOperation::MEAN_SUM && reduce_elements > 0)
146 {
147 res /= reduce_elements;
148 }
149 }
150 return res;
151 }
152
153 template <typename T, typename OT>
reduce_operation_arg_min_max(const T * ptr,int reduce_elements,ReductionOperation op,int stride)154 OT reduce_operation_arg_min_max(const T *ptr, int reduce_elements, ReductionOperation op, int stride)
155 {
156 uint32_t res = 0;
157 for(int i = 0; i < reduce_elements; ++i)
158 {
159 auto elem = *(ptr + stride * i);
160 switch(op)
161 {
162 case ReductionOperation::ARG_IDX_MIN:
163 if(*(ptr + stride * res) > elem)
164 {
165 res = static_cast<uint32_t>(i);
166 }
167 break;
168 case ReductionOperation::ARG_IDX_MAX:
169 if(*(ptr + stride * res) < elem)
170 {
171 res = static_cast<uint32_t>(i);
172 }
173 break;
174 default:
175 ARM_COMPUTE_ERROR("Operation not supported");
176 }
177 }
178 return static_cast<OT>(res);
179 }
180
181 } // namespace
182
183 template <typename T, typename OT>
compute_reduction_operation(const SimpleTensor<T> & src,const TensorShape & dst_shape,unsigned int axis,ReductionOperation op,RoundingPolicy policy)184 SimpleTensor<OT> compute_reduction_operation(const SimpleTensor<T> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op, RoundingPolicy policy)
185 {
186 // Create reference
187 const bool is_arg_min_max = (op == ReductionOperation::ARG_IDX_MIN || op == ReductionOperation::ARG_IDX_MAX);
188 DataType output_data_type = is_arg_min_max ? DataType::S32 : src.data_type();
189 SimpleTensor<OT> dst{ dst_shape, output_data_type, 1, src.quantization_info() };
190 const unsigned int src_width = src.shape().x();
191 const unsigned int src_height = src.shape().y();
192 const unsigned int src_depth = src.shape().z();
193 const unsigned int src_batch = src.shape()[3];
194 const int reduce_elems = src.shape()[axis];
195
196 switch(axis)
197 {
198 case 0:
199 {
200 const unsigned int upper_dims = src.shape().total_size_upper(1);
201 for(unsigned int du = 0; du < upper_dims; ++du)
202 {
203 const T *src_row_ptr = src.data() + du * reduce_elems;
204 dst[du] = is_arg_min_max ?
205 reduce_operation_arg_min_max<T, OT>(src_row_ptr, reduce_elems, op, 1) :
206 reduce_operation<T, OT>(src_row_ptr, reduce_elems, op, 1, policy);
207 }
208 }
209 break;
210 case 1:
211 {
212 const unsigned int upper_dims = src.shape().total_size_upper(2);
213 for(unsigned int du = 0; du < upper_dims; ++du)
214 {
215 for(unsigned int x = 0; x < src_width; ++x)
216 {
217 const int in_offset = du * src_height * src_width + x;
218 const int out_offset = du * src_width + x;
219 const T *src_row_ptr = src.data() + in_offset;
220 dst[out_offset] = is_arg_min_max ?
221 reduce_operation_arg_min_max<T, OT>(src_row_ptr, reduce_elems, op, src_width) :
222 reduce_operation<T, OT>(src_row_ptr, reduce_elems, op, src_width, policy);
223 }
224 }
225 }
226 break;
227 case 2:
228 {
229 const unsigned int upper_dims = src.shape().total_size_upper(3);
230 for(unsigned int du = 0; du < upper_dims; ++du)
231 {
232 for(unsigned int x = 0; x < src_width; ++x)
233 {
234 for(unsigned int y = 0; y < src_height; ++y)
235 {
236 const int in_offset = du * src_depth * src_height * src_width + y * src_width + x;
237 const int out_offset = du * src_width * src_height + y * src_width + x;
238 const T *src_row_ptr = src.data() + in_offset;
239 dst[out_offset] = is_arg_min_max ?
240 reduce_operation_arg_min_max<T, OT>(src_row_ptr, reduce_elems, op, src_width * src_height) :
241 reduce_operation<T, OT>(src_row_ptr, reduce_elems, op, src_width * src_height, policy);
242 }
243 }
244 }
245 }
246 break;
247 case 3:
248 {
249 const unsigned int upper_dims = src.shape().total_size_upper(4);
250 for(unsigned int du = 0; du < upper_dims; ++du)
251 {
252 for(unsigned int z = 0; z < src_depth; ++z)
253 {
254 for(unsigned int y = 0; y < src_height; ++y)
255 {
256 for(unsigned int x = 0; x < src_width; ++x)
257 {
258 const int in_offset = du * src_batch * src_depth * src_height * src_width + z * src_width * src_height + y * src_width + x;
259 const int out_offset = du * src_depth * src_height * src_width + z * src_width * src_height + y * src_width + x;
260 const T *src_row_ptr = src.data() + in_offset;
261 dst[out_offset] = is_arg_min_max ?
262 reduce_operation_arg_min_max<T, OT>(src_row_ptr, reduce_elems, op, src_width * src_height * src_depth) :
263 reduce_operation<T, OT>(src_row_ptr, reduce_elems, op, src_width * src_height * src_depth, policy);
264 }
265 }
266 }
267 }
268 }
269 break;
270 default:
271 ARM_COMPUTE_ERROR("Unsupported reduction axis");
272 }
273
274 return dst;
275 }
276
277 template <typename T, typename OT>
reduction_operation(const SimpleTensor<T> & src,const TensorShape & dst_shape,unsigned int axis,ReductionOperation op,QuantizationInfo quantization_info_output,RoundingPolicy policy)278 SimpleTensor<OT> reduction_operation(const SimpleTensor<T> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op, QuantizationInfo quantization_info_output, RoundingPolicy policy)
279 {
280 ARM_COMPUTE_UNUSED(quantization_info_output);
281 return compute_reduction_operation<T, OT>(src, dst_shape, axis, op, policy);
282 }
283
284 template <>
reduction_operation(const SimpleTensor<uint8_t> & src,const TensorShape & dst_shape,unsigned int axis,ReductionOperation op,QuantizationInfo quantization_info_output,RoundingPolicy policy)285 SimpleTensor<uint8_t> reduction_operation(const SimpleTensor<uint8_t> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op, QuantizationInfo quantization_info_output, RoundingPolicy policy)
286 {
287 if(src.data_type() == DataType::QASYMM8)
288 {
289 // If the operation is MEAN_SUM, we can directly use the uint8 implementation without taking into account scale and offset
290 if(op == ReductionOperation::MEAN_SUM && src.quantization_info() == quantization_info_output)
291 {
292 return compute_reduction_operation<uint8_t, uint8_t>(src, dst_shape, axis, op, policy);
293 }
294 else
295 {
296 SimpleTensor<float> src_f = convert_from_asymmetric(src);
297 SimpleTensor<float> dst_f = reference::reduction_operation<float, float>(src_f, dst_shape, axis, op);
298 return convert_to_asymmetric<uint8_t>(dst_f, quantization_info_output);
299 }
300 }
301 else
302 {
303 return compute_reduction_operation<uint8_t, uint8_t>(src, dst_shape, axis, op, policy);
304 }
305 }
306
307 template <>
reduction_operation(const SimpleTensor<int8_t> & src,const TensorShape & dst_shape,unsigned int axis,ReductionOperation op,QuantizationInfo quantization_info_output,RoundingPolicy policy)308 SimpleTensor<int8_t> reduction_operation(const SimpleTensor<int8_t> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op, QuantizationInfo quantization_info_output, RoundingPolicy policy)
309 {
310 if(src.data_type() == DataType::QASYMM8_SIGNED)
311 {
312 // If the operation is MEAN_SUM, we can directly use the int8 implementation without taking into account scale and offset
313 if(op == ReductionOperation::MEAN_SUM && src.quantization_info() == quantization_info_output)
314 {
315 return compute_reduction_operation<int8_t, int8_t>(src, dst_shape, axis, op, policy);
316 }
317 else
318 {
319 SimpleTensor<float> src_f = convert_from_asymmetric(src);
320 SimpleTensor<float> dst_f = reference::reduction_operation<float, float>(src_f, dst_shape, axis, op);
321 return convert_to_asymmetric<int8_t>(dst_f, quantization_info_output);
322 }
323 }
324 else
325 {
326 return compute_reduction_operation<int8_t, int8_t>(src, dst_shape, axis, op, policy);
327 }
328 }
329
330 template SimpleTensor<float> reduction_operation(const SimpleTensor<float> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op,
331 QuantizationInfo quantization_info_output = QuantizationInfo(), RoundingPolicy policy = RoundingPolicy::TO_ZERO);
332 template SimpleTensor<half> reduction_operation(const SimpleTensor<half> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op,
333 QuantizationInfo quantization_info_output = QuantizationInfo(), RoundingPolicy policy = RoundingPolicy::TO_ZERO);
334
335 template SimpleTensor<int32_t> reduction_operation(const SimpleTensor<float> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op,
336 QuantizationInfo quantization_info_output = QuantizationInfo(), RoundingPolicy policy = RoundingPolicy::TO_ZERO);
337 template SimpleTensor<int32_t> reduction_operation(const SimpleTensor<int32_t> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op,
338 QuantizationInfo quantization_info_output = QuantizationInfo(), RoundingPolicy policy = RoundingPolicy::TO_ZERO);
339 template SimpleTensor<int32_t> reduction_operation(const SimpleTensor<half> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op,
340 QuantizationInfo quantization_info_output = QuantizationInfo(), RoundingPolicy policy = RoundingPolicy::TO_ZERO);
341 template SimpleTensor<int32_t> reduction_operation(const SimpleTensor<uint8_t> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op,
342 QuantizationInfo quantization_info_output = QuantizationInfo(), RoundingPolicy policy = RoundingPolicy::TO_ZERO);
343 template SimpleTensor<int32_t> reduction_operation(const SimpleTensor<int8_t> &src, const TensorShape &dst_shape, unsigned int axis, ReductionOperation op,
344 QuantizationInfo quantization_info_output = QuantizationInfo(), RoundingPolicy policy = RoundingPolicy::TO_ZERO);
345
346 } // namespace reference
347 } // namespace validation
348 } // namespace test
349 } // namespace arm_compute
350