1*c217d954SCole Faust /*
2*c217d954SCole Faust * Copyright (c) 2017-2020 Arm Limited.
3*c217d954SCole Faust *
4*c217d954SCole Faust * SPDX-License-Identifier: MIT
5*c217d954SCole Faust *
6*c217d954SCole Faust * Permission is hereby granted, free of charge, to any person obtaining a copy
7*c217d954SCole Faust * of this software and associated documentation files (the "Software"), to
8*c217d954SCole Faust * deal in the Software without restriction, including without limitation the
9*c217d954SCole Faust * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10*c217d954SCole Faust * sell copies of the Software, and to permit persons to whom the Software is
11*c217d954SCole Faust * furnished to do so, subject to the following conditions:
12*c217d954SCole Faust *
13*c217d954SCole Faust * The above copyright notice and this permission notice shall be included in all
14*c217d954SCole Faust * copies or substantial portions of the Software.
15*c217d954SCole Faust *
16*c217d954SCole Faust * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17*c217d954SCole Faust * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18*c217d954SCole Faust * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19*c217d954SCole Faust * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20*c217d954SCole Faust * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21*c217d954SCole Faust * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22*c217d954SCole Faust * SOFTWARE.
23*c217d954SCole Faust */
24*c217d954SCole Faust #include "DepthwiseConvolutionLayer.h"
25*c217d954SCole Faust
26*c217d954SCole Faust #include "ConvolutionLayer.h"
27*c217d954SCole Faust #include "Utils.h"
28*c217d954SCole Faust
29*c217d954SCole Faust #include "tests/validation/Helpers.h"
30*c217d954SCole Faust #include "tests/validation/reference/Utils.h"
31*c217d954SCole Faust #include "tests/validation/reference/UtilsQuantizedAsymm.h"
32*c217d954SCole Faust
33*c217d954SCole Faust #include "arm_compute/core/utils/quantization/AsymmHelpers.h"
34*c217d954SCole Faust
35*c217d954SCole Faust namespace arm_compute
36*c217d954SCole Faust {
37*c217d954SCole Faust namespace test
38*c217d954SCole Faust {
39*c217d954SCole Faust namespace validation
40*c217d954SCole Faust {
41*c217d954SCole Faust namespace reference
42*c217d954SCole Faust {
43*c217d954SCole Faust namespace
44*c217d954SCole Faust {
45*c217d954SCole Faust /** Perform a depthwise convolution for floating-point types
46*c217d954SCole Faust *
47*c217d954SCole Faust * - Three dimensions tensors
48*c217d954SCole Faust * - Third dimention is number of channels
49*c217d954SCole Faust * - Depths of input tensor and filter are equals
50*c217d954SCole Faust * - Padding, stride and output shape "match"
51*c217d954SCole Faust *
52*c217d954SCole Faust */
53*c217d954SCole Faust template <typename T>
depthwise_convolution_fp(const SimpleTensor<T> & src,const SimpleTensor<T> & weights,const SimpleTensor<T> & biases,const TensorShape & dst_shape,const PadStrideInfo & conv_info,unsigned int depth_multiplier,const Size2D & dilation,const QuantizationInfo & out_quant_info)54*c217d954SCole Faust SimpleTensor<T> depthwise_convolution_fp(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<T> &biases, const TensorShape &dst_shape, const PadStrideInfo &conv_info,
55*c217d954SCole Faust unsigned int depth_multiplier, const Size2D &dilation, const QuantizationInfo &out_quant_info)
56*c217d954SCole Faust {
57*c217d954SCole Faust ARM_COMPUTE_UNUSED(out_quant_info);
58*c217d954SCole Faust
59*c217d954SCole Faust SimpleTensor<T> dst{ dst_shape, src.data_type(), 1 };
60*c217d954SCole Faust
61*c217d954SCole Faust // Compute reference
62*c217d954SCole Faust const int filter_width = weights.shape().x();
63*c217d954SCole Faust const int filter_height = weights.shape().y();
64*c217d954SCole Faust const int filter_plane = filter_width * filter_height;
65*c217d954SCole Faust const int input_width = src.shape().x();
66*c217d954SCole Faust const int input_height = src.shape().y();
67*c217d954SCole Faust const int input_depth = src.shape().z();
68*c217d954SCole Faust const int num_batches = src.shape().total_size() / (input_width * input_height * input_depth);
69*c217d954SCole Faust
70*c217d954SCole Faust const int pad_left = conv_info.pad_left();
71*c217d954SCole Faust const int pad_top = conv_info.pad_top();
72*c217d954SCole Faust
73*c217d954SCole Faust const float patch_width = (filter_width + (dilation.x() - 1) * (filter_width - 1));
74*c217d954SCole Faust const float patch_height = (filter_height + (dilation.y() - 1) * (filter_height - 1));
75*c217d954SCole Faust
76*c217d954SCole Faust const int patch_half_width_floor = patch_width / 2;
77*c217d954SCole Faust const int patch_half_height_floor = patch_height / 2;
78*c217d954SCole Faust
79*c217d954SCole Faust const auto patch_half_width_ceil = static_cast<int>(std::ceil(patch_width / 2));
80*c217d954SCole Faust const auto patch_half_height_ceil = static_cast<int>(std::ceil(patch_height / 2));
81*c217d954SCole Faust
82*c217d954SCole Faust const int minimum_x = -pad_left + patch_half_width_floor;
83*c217d954SCole Faust const int minimum_y = -pad_top + patch_half_height_floor;
84*c217d954SCole Faust const int maximum_x = (conv_info.stride().first * (dst_shape[0] - 1));
85*c217d954SCole Faust const int maximum_y = (conv_info.stride().second * (dst_shape[1] - 1));
86*c217d954SCole Faust
87*c217d954SCole Faust const T border_value(0);
88*c217d954SCole Faust
89*c217d954SCole Faust int out_pos = 0;
90*c217d954SCole Faust for(int r = 0; r < num_batches; ++r)
91*c217d954SCole Faust {
92*c217d954SCole Faust for(int z = 0; z < input_depth; ++z)
93*c217d954SCole Faust {
94*c217d954SCole Faust for(unsigned int m = 0; m < depth_multiplier; ++m)
95*c217d954SCole Faust {
96*c217d954SCole Faust const int out_z = z * depth_multiplier + m;
97*c217d954SCole Faust
98*c217d954SCole Faust for(int y = minimum_y; y <= minimum_y + maximum_y; y += conv_info.stride().second)
99*c217d954SCole Faust {
100*c217d954SCole Faust for(int x = minimum_x; x <= minimum_x + maximum_x; x += conv_info.stride().first)
101*c217d954SCole Faust {
102*c217d954SCole Faust Coordinates coords(static_cast<int>(x), static_cast<int>(y), static_cast<int>(z), static_cast<int>(r));
103*c217d954SCole Faust size_t filter_offset = filter_plane * out_z;
104*c217d954SCole Faust
105*c217d954SCole Faust T val(0);
106*c217d954SCole Faust for(int j = y - patch_half_height_floor; j < y + patch_half_height_ceil; j += dilation.y())
107*c217d954SCole Faust {
108*c217d954SCole Faust for(int i = x - patch_half_width_floor; i < x + patch_half_width_ceil; i += dilation.x())
109*c217d954SCole Faust {
110*c217d954SCole Faust coords.set(0, i);
111*c217d954SCole Faust coords.set(1, j);
112*c217d954SCole Faust val += *(weights.data() + filter_offset) * tensor_elem_at(src, coords, BorderMode::CONSTANT, border_value);
113*c217d954SCole Faust ++filter_offset;
114*c217d954SCole Faust }
115*c217d954SCole Faust }
116*c217d954SCole Faust
117*c217d954SCole Faust dst[out_pos++] = saturate_cast<T>(val + *static_cast<const T *>(biases(Coordinates(out_z))));
118*c217d954SCole Faust }
119*c217d954SCole Faust }
120*c217d954SCole Faust }
121*c217d954SCole Faust }
122*c217d954SCole Faust }
123*c217d954SCole Faust
124*c217d954SCole Faust return dst;
125*c217d954SCole Faust }
126*c217d954SCole Faust
127*c217d954SCole Faust /** Perform a quantized depthwise convolution
128*c217d954SCole Faust *
129*c217d954SCole Faust * - Three dimensions tensors
130*c217d954SCole Faust * - Third dimention is number of channels
131*c217d954SCole Faust * - Depths of input tensor and filter are equals
132*c217d954SCole Faust * - Padding, stride and output shape "match"
133*c217d954SCole Faust * - QASYMM8/QASYMM8_SIGNED input, output
134*c217d954SCole Faust * - QASYMM8/QASYMM8_SIGNED or QSYMM8_PER_CHANNEL filter
135*c217d954SCole Faust *
136*c217d954SCole Faust */
137*c217d954SCole Faust template <typename T, typename TW, typename TB>
depthwise_convolution_quantized(const SimpleTensor<T> & src,const SimpleTensor<TW> & weights,const SimpleTensor<int32_t> & biases,const TensorShape & dst_shape,const PadStrideInfo & conv_info,unsigned int depth_multiplier,const Size2D & dilation,const QuantizationInfo & out_quant_info)138*c217d954SCole Faust SimpleTensor<T> depthwise_convolution_quantized(const SimpleTensor<T> &src, const SimpleTensor<TW> &weights, const SimpleTensor<int32_t> &biases, const TensorShape &dst_shape,
139*c217d954SCole Faust const PadStrideInfo &conv_info, unsigned int depth_multiplier, const Size2D &dilation, const QuantizationInfo &out_quant_info)
140*c217d954SCole Faust {
141*c217d954SCole Faust // if no explicit quantization has been set you the same as src
142*c217d954SCole Faust const QuantizationInfo &dst_qinfo = out_quant_info.uniform().empty() ? src.quantization_info() : out_quant_info;
143*c217d954SCole Faust SimpleTensor<T> dst{ dst_shape, src.data_type(), 1, dst_qinfo };
144*c217d954SCole Faust
145*c217d954SCole Faust // Create reference
146*c217d954SCole Faust const int input_offset = -src.quantization_info().uniform().offset;
147*c217d954SCole Faust const float input_scale = src.quantization_info().uniform().scale;
148*c217d954SCole Faust const int weights_offset = -weights.quantization_info().uniform().offset;
149*c217d954SCole Faust const int output_offset = dst_qinfo.uniform().offset;
150*c217d954SCole Faust const float output_scale = dst_qinfo.uniform().scale;
151*c217d954SCole Faust
152*c217d954SCole Faust const std::vector<float> weights_scale_vec = weights.quantization_info().scale();
153*c217d954SCole Faust
154*c217d954SCole Faust // Compute reference
155*c217d954SCole Faust const int filter_width = weights.shape().x();
156*c217d954SCole Faust const int filter_height = weights.shape().y();
157*c217d954SCole Faust const int filter_plane = filter_width * filter_height;
158*c217d954SCole Faust const int input_width = src.shape().x();
159*c217d954SCole Faust const int input_height = src.shape().y();
160*c217d954SCole Faust const int input_depth = src.shape().z();
161*c217d954SCole Faust const int num_batches = src.shape().total_size() / (input_width * input_height * input_depth);
162*c217d954SCole Faust
163*c217d954SCole Faust const int pad_left = conv_info.pad_left();
164*c217d954SCole Faust const int pad_top = conv_info.pad_top();
165*c217d954SCole Faust
166*c217d954SCole Faust const float patch_width = (filter_width + (dilation.x() - 1) * (filter_width - 1));
167*c217d954SCole Faust const float patch_height = (filter_height + (dilation.y() - 1) * (filter_height - 1));
168*c217d954SCole Faust
169*c217d954SCole Faust const int patch_half_width_floor = patch_width / 2;
170*c217d954SCole Faust const int patch_half_height_floor = patch_height / 2;
171*c217d954SCole Faust
172*c217d954SCole Faust const auto patch_half_width_ceil = static_cast<int>(std::ceil(patch_width / 2));
173*c217d954SCole Faust const auto patch_half_height_ceil = static_cast<int>(std::ceil(patch_height / 2));
174*c217d954SCole Faust
175*c217d954SCole Faust const int minimum_x = -pad_left + patch_half_width_floor;
176*c217d954SCole Faust const int minimum_y = -pad_top + patch_half_height_floor;
177*c217d954SCole Faust const int maximum_x = (conv_info.stride().first * (dst_shape[0] - 1));
178*c217d954SCole Faust const int maximum_y = (conv_info.stride().second * (dst_shape[1] - 1));
179*c217d954SCole Faust
180*c217d954SCole Faust const bool is_quantized_per_channel = is_data_type_quantized_per_channel(weights.data_type());
181*c217d954SCole Faust
182*c217d954SCole Faust const int min = std::numeric_limits<T>::lowest();
183*c217d954SCole Faust const int max = std::numeric_limits<T>::max();
184*c217d954SCole Faust
185*c217d954SCole Faust int out_pos = 0;
186*c217d954SCole Faust for(int r = 0; r < num_batches; ++r)
187*c217d954SCole Faust {
188*c217d954SCole Faust for(int z = 0; z < input_depth; ++z)
189*c217d954SCole Faust {
190*c217d954SCole Faust for(unsigned int m = 0; m < depth_multiplier; ++m)
191*c217d954SCole Faust {
192*c217d954SCole Faust const int out_z = z * depth_multiplier + m;
193*c217d954SCole Faust const int32_t bias_val = *static_cast<const int32_t *>(biases(Coordinates(out_z)));
194*c217d954SCole Faust
195*c217d954SCole Faust int output_multiplier = 0;
196*c217d954SCole Faust int output_shift = 0;
197*c217d954SCole Faust const float weights_scale = (is_quantized_per_channel) ? weights_scale_vec[out_z] : weights_scale_vec[0];
198*c217d954SCole Faust const float multiplier = input_scale * weights_scale / output_scale;
199*c217d954SCole Faust arm_compute::quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift);
200*c217d954SCole Faust
201*c217d954SCole Faust for(int y = minimum_y; y <= minimum_y + maximum_y; y += conv_info.stride().second)
202*c217d954SCole Faust {
203*c217d954SCole Faust for(int x = minimum_x; x <= minimum_x + maximum_x; x += conv_info.stride().first)
204*c217d954SCole Faust {
205*c217d954SCole Faust Coordinates coords(x, y, z, r);
206*c217d954SCole Faust int filter_offset = filter_plane * out_z;
207*c217d954SCole Faust
208*c217d954SCole Faust int32_t val = 0;
209*c217d954SCole Faust for(int j = y - patch_half_height_floor; j < y + patch_half_height_ceil; j += dilation.y())
210*c217d954SCole Faust {
211*c217d954SCole Faust for(int i = x - patch_half_width_floor; i < x + patch_half_width_ceil; i += dilation.x())
212*c217d954SCole Faust {
213*c217d954SCole Faust coords.set(0, i);
214*c217d954SCole Faust coords.set(1, j);
215*c217d954SCole Faust const auto in_val = tensor_elem_at<T>(src, coords, BorderMode::CONSTANT, -input_offset);
216*c217d954SCole Faust const TW w_val = *(weights.data() + filter_offset);
217*c217d954SCole Faust val += (in_val + input_offset) * (w_val + weights_offset);
218*c217d954SCole Faust ++filter_offset;
219*c217d954SCole Faust }
220*c217d954SCole Faust }
221*c217d954SCole Faust val += bias_val;
222*c217d954SCole Faust // Quantize down
223*c217d954SCole Faust val = quantize_down_scale_by_fixedpoint(val, output_multiplier, output_shift, output_offset, min, max);
224*c217d954SCole Faust
225*c217d954SCole Faust // Store the result
226*c217d954SCole Faust dst[out_pos++] = val;
227*c217d954SCole Faust }
228*c217d954SCole Faust }
229*c217d954SCole Faust }
230*c217d954SCole Faust }
231*c217d954SCole Faust }
232*c217d954SCole Faust
233*c217d954SCole Faust return dst;
234*c217d954SCole Faust }
235*c217d954SCole Faust } // namespace
236*c217d954SCole Faust
237*c217d954SCole Faust template <>
depthwise_convolution(const SimpleTensor<float> & src,const SimpleTensor<float> & weights,const SimpleTensor<float> & biases,const TensorShape & dst_shape,const PadStrideInfo & conv_info,unsigned int depth_multiplier,const Size2D & dilation,const QuantizationInfo & out_quant_info)238*c217d954SCole Faust SimpleTensor<float> depthwise_convolution(const SimpleTensor<float> &src, const SimpleTensor<float> &weights, const SimpleTensor<float> &biases, const TensorShape &dst_shape,
239*c217d954SCole Faust const PadStrideInfo &conv_info, unsigned int depth_multiplier, const Size2D &dilation, const QuantizationInfo &out_quant_info)
240*c217d954SCole Faust {
241*c217d954SCole Faust return depthwise_convolution_fp(src, weights, biases, dst_shape, conv_info, depth_multiplier, dilation, out_quant_info);
242*c217d954SCole Faust }
243*c217d954SCole Faust
244*c217d954SCole Faust template <>
depthwise_convolution(const SimpleTensor<half> & src,const SimpleTensor<half> & weights,const SimpleTensor<half> & biases,const TensorShape & dst_shape,const PadStrideInfo & conv_info,unsigned int depth_multiplier,const Size2D & dilation,const QuantizationInfo & out_quant_info)245*c217d954SCole Faust SimpleTensor<half> depthwise_convolution(const SimpleTensor<half> &src, const SimpleTensor<half> &weights, const SimpleTensor<half> &biases, const TensorShape &dst_shape,
246*c217d954SCole Faust const PadStrideInfo &conv_info, unsigned int depth_multiplier, const Size2D &dilation, const QuantizationInfo &out_quant_info)
247*c217d954SCole Faust {
248*c217d954SCole Faust return depthwise_convolution_fp(src, weights, biases, dst_shape, conv_info, depth_multiplier, dilation, out_quant_info);
249*c217d954SCole Faust }
250*c217d954SCole Faust
251*c217d954SCole Faust template <>
depthwise_convolution(const SimpleTensor<uint8_t> & src,const SimpleTensor<uint8_t> & weights,const SimpleTensor<int32_t> & biases,const TensorShape & dst_shape,const PadStrideInfo & conv_info,unsigned int depth_multiplier,const Size2D & dilation,const QuantizationInfo & out_quant_info)252*c217d954SCole Faust SimpleTensor<uint8_t> depthwise_convolution(const SimpleTensor<uint8_t> &src, const SimpleTensor<uint8_t> &weights, const SimpleTensor<int32_t> &biases, const TensorShape &dst_shape,
253*c217d954SCole Faust const PadStrideInfo &conv_info, unsigned int depth_multiplier, const Size2D &dilation, const QuantizationInfo &out_quant_info)
254*c217d954SCole Faust {
255*c217d954SCole Faust return depthwise_convolution_quantized<uint8_t, uint8_t, int32_t>(src, weights, biases, dst_shape, conv_info, depth_multiplier, dilation, out_quant_info);
256*c217d954SCole Faust }
257*c217d954SCole Faust
258*c217d954SCole Faust template <>
depthwise_convolution(const SimpleTensor<uint8_t> & src,const SimpleTensor<int8_t> & weights,const SimpleTensor<int32_t> & biases,const TensorShape & dst_shape,const PadStrideInfo & conv_info,unsigned int depth_multiplier,const Size2D & dilation,const QuantizationInfo & out_quant_info)259*c217d954SCole Faust SimpleTensor<uint8_t> depthwise_convolution(const SimpleTensor<uint8_t> &src, const SimpleTensor<int8_t> &weights, const SimpleTensor<int32_t> &biases, const TensorShape &dst_shape,
260*c217d954SCole Faust const PadStrideInfo &conv_info, unsigned int depth_multiplier, const Size2D &dilation, const QuantizationInfo &out_quant_info)
261*c217d954SCole Faust {
262*c217d954SCole Faust return depthwise_convolution_quantized<uint8_t, int8_t, int32_t>(src, weights, biases, dst_shape, conv_info, depth_multiplier, dilation, out_quant_info);
263*c217d954SCole Faust }
264*c217d954SCole Faust
265*c217d954SCole Faust template <>
depthwise_convolution(const SimpleTensor<int8_t> & src,const SimpleTensor<int8_t> & weights,const SimpleTensor<int32_t> & biases,const TensorShape & dst_shape,const PadStrideInfo & conv_info,unsigned int depth_multiplier,const Size2D & dilation,const QuantizationInfo & out_quant_info)266*c217d954SCole Faust SimpleTensor<int8_t> depthwise_convolution(const SimpleTensor<int8_t> &src, const SimpleTensor<int8_t> &weights, const SimpleTensor<int32_t> &biases, const TensorShape &dst_shape,
267*c217d954SCole Faust const PadStrideInfo &conv_info, unsigned int depth_multiplier, const Size2D &dilation, const QuantizationInfo &out_quant_info)
268*c217d954SCole Faust {
269*c217d954SCole Faust return depthwise_convolution_quantized<int8_t, int8_t, int32_t>(src, weights, biases, dst_shape, conv_info, depth_multiplier, dilation, out_quant_info);
270*c217d954SCole Faust }
271*c217d954SCole Faust } // namespace reference
272*c217d954SCole Faust } // namespace validation
273*c217d954SCole Faust } // namespace test
274*c217d954SCole Faust } // namespace arm_compute
275