1 /*
2  * Copyright (c) 2022-2023 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifndef TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_DEPTHWISECONV2DFIXTURE
25 #define TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_DEPTHWISECONV2DFIXTURE
26 
27 #include "arm_compute/core/CL/CLKernelLibrary.h"
28 #include "arm_compute/core/TensorInfo.h"
29 #include "arm_compute/core/Types.h"
30 #include "arm_compute/core/utils/misc/ShapeCalculator.h"
31 
32 #include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h"
33 #include "arm_compute/dynamic_fusion/sketch/attributes/DepthwiseConv2dAttributes.h"
34 #include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h"
35 #include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.h"
36 #include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h"
37 
38 #include "tests/CL/CLAccessor.h"
39 
40 #include "tests/framework/Asserts.h"
41 #include "tests/framework/Fixture.h"
42 #include "tests/framework/Macros.h"
43 
44 #include "tests/validation/Validation.h"
45 #include "tests/validation/reference/DepthwiseConvolutionLayer.h"
46 
47 using namespace arm_compute::experimental::dynamic_fusion;
48 
49 namespace arm_compute
50 {
51 namespace test
52 {
53 namespace validation
54 {
55 template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
56 class DynamicFusionGpuDepthwiseConv2dValidationGenericFixture : public framework::Fixture
57 {
58 public:
59     using TBias = typename std::conditional < std::is_same<typename std::decay<T>::type, uint8_t>::value
60                   || std::is_same<typename std::decay<T>::type, int8_t>::value,
61                   int32_t, T >::type; // If T: uint8_t or int8_t then TBias: int32_t, otherwise TBias: T
62 
63     template <typename...>
setup(TensorShape input_shape,Size2D kernel_size,const PadStrideInfo & pad_stride,const Size2D & dilation,const unsigned int depth_multiplier,const DataType data_type,const DataLayout data_layout)64     void setup(TensorShape input_shape, Size2D kernel_size, const PadStrideInfo &pad_stride, const Size2D &dilation,
65                const unsigned int depth_multiplier, const DataType data_type, const DataLayout data_layout)
66     {
67         ARM_COMPUTE_ERROR_ON(data_layout != DataLayout::NHWC); // Dynamic fusion depthwise conv2d only supports NHWC layout
68 
69         DepthwiseConv2dAttributes dwc_conv2d_attr;
70         const Padding2D           padding_2d(pad_stride.pad_left(), pad_stride.pad_right(), pad_stride.pad_top(), pad_stride.pad_bottom());
71         dwc_conv2d_attr.pad(padding_2d)
72         .stride(Size2D(pad_stride.stride().first, pad_stride.stride().second))
73         .dilation(dilation)
74         .depth_multiplier(depth_multiplier)
75         .dimension_rounding_type(pad_stride.round());
76 
77         // Calculate Output and Weight Shapes
78         TensorShape weights_shape = TensorShape(kernel_size.width, kernel_size.height);
79 
80         const TensorInfo in_info(input_shape, 1, data_type);
81         const TensorInfo we_info(weights_shape, 1, data_type);
82 
83         const ConvolutionInfo info{ pad_stride, depth_multiplier, ActivationLayerInfo(), dilation };
84         const TensorShape     output_shape = misc::shape_calculator::compute_depthwise_convolution_shape(in_info, we_info, info);
85 
86         weights_shape.set(2, output_shape.z());
87         const TensorShape bias_shape = TensorShape(weights_shape[2]);
88 
89         _data_type   = data_type;
90         _data_layout = data_layout;
91         _target      = compute_target(input_shape, weights_shape, bias_shape, dwc_conv2d_attr);
92         _reference   = compute_reference(input_shape, weights_shape, bias_shape, output_shape, dwc_conv2d_attr);
93     }
94 
95 protected:
96     template <typename U>
fill(U && tensor,int i)97     void fill(U &&tensor, int i)
98     {
99         switch(tensor.data_type())
100         {
101             case DataType::F16:
102             {
103                 arm_compute::utils::uniform_real_distribution_16bit<half> distribution{ -1.0f, 1.0f };
104                 library->fill(tensor, distribution, i);
105                 break;
106             }
107             case DataType::F32:
108             {
109                 std::uniform_real_distribution<float> distribution(-1.0f, 1.0f);
110                 library->fill(tensor, distribution, i);
111                 break;
112             }
113             default:
114                 library->fill_tensor_uniform(tensor, i);
115         }
116     }
117 
118     // Given input is in nchw format
compute_target(TensorShape input_shape,TensorShape weights_shape,const TensorShape & bias_shape,const DepthwiseConv2dAttributes dwc_conv2d_attr)119     TensorType compute_target(TensorShape input_shape, TensorShape weights_shape, const TensorShape &bias_shape, const DepthwiseConv2dAttributes dwc_conv2d_attr)
120     {
121         ARM_COMPUTE_ERROR_ON(_data_layout != DataLayout::NHWC);
122 
123         // Our test shapes are assumed in NCHW data layout, thus the permutation
124         permute(input_shape, PermutationVector(2U, 0U, 1U));
125         permute(weights_shape, PermutationVector(2U, 0U, 1U));
126 
127         // Create a new workload sketch
128         auto              cl_compile_ctx = CLKernelLibrary::get().get_compile_context();
129         auto              gpu_ctx        = GpuWorkloadContext{ &cl_compile_ctx };
130         GpuWorkloadSketch sketch{ &gpu_ctx };
131 
132         // Create sketch tensors
133         TensorInfo input_info  = sketch.create_tensor_info(TensorInfo(input_shape, 1, _data_type, _data_layout));
134         TensorInfo weight_info = sketch.create_tensor_info(TensorInfo(weights_shape, 1, _data_type, _data_layout));
135         TensorInfo bias_info   = sketch.create_tensor_info(TensorInfo(bias_shape, 1, _data_type, _data_layout));
136         TensorInfo dst_info    = sketch.create_tensor_info();
137 
138         ITensorInfo *ans_info = FunctionType::create_op(sketch, &input_info, &weight_info, &bias_info, dwc_conv2d_attr);
139         GpuOutput::create_op(sketch, ans_info, &dst_info);
140 
141         // Configure runtime
142         ClWorkloadRuntime runtime;
143         runtime.configure(sketch);
144 
145         // (Important) Allocate auxiliary tensor memory if there are any
146         for(auto &data : runtime.get_auxiliary_tensors())
147         {
148             CLTensor     *tensor      = std::get<0>(data);
149             TensorInfo    info        = std::get<1>(data);
150             AuxMemoryInfo aux_mem_req = std::get<2>(data);
151             tensor->allocator()->init(info, aux_mem_req.alignment);
152             tensor->allocator()->allocate(); // Use ACL allocated memory
153         }
154 
155         // Construct user tensors
156         TensorType t_input{};
157         TensorType t_weight{};
158         TensorType t_bias{};
159         TensorType t_dst{};
160 
161         // Initialize user tensors
162         t_input.allocator()->init(input_info);
163         t_weight.allocator()->init(weight_info);
164         t_bias.allocator()->init(bias_info);
165         t_dst.allocator()->init(dst_info);
166 
167         // Allocate and fill user tensors
168         t_input.allocator()->allocate();
169         t_weight.allocator()->allocate();
170         t_bias.allocator()->allocate();
171         t_dst.allocator()->allocate();
172 
173         fill(AccessorType(t_input), 0);
174         fill(AccessorType(t_weight), 1);
175         fill(AccessorType(t_bias), 2);
176 
177         // Run runtime
178         runtime.run({ &t_input, &t_weight, &t_bias, &t_dst });
179         return t_dst;
180     }
181 
compute_reference(const TensorShape & input_shape,const TensorShape & weights_shape,const TensorShape & bias_shape,const TensorShape & output_shape,DepthwiseConv2dAttributes dwc_conv2d_attr)182     SimpleTensor<T> compute_reference(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape,
183                                       const TensorShape &output_shape, DepthwiseConv2dAttributes dwc_conv2d_attr)
184     {
185         // Create reference
186         SimpleTensor<T>     src{ input_shape, _data_type, 1 };
187         SimpleTensor<T>     weight{ weights_shape, _data_type, 1 };
188         SimpleTensor<TBias> bias{ bias_shape, _data_type, 1 };
189 
190         fill(src, 0);
191         fill(weight, 1);
192         fill(bias, 2);
193 
194         auto src_nchw          = src;
195         auto weights_nchw      = weight;
196         auto bias_nchw         = bias;
197         auto output_shape_nchw = output_shape;
198 
199         PadStrideInfo legacy_pad_stride(dwc_conv2d_attr.stride().x(), dwc_conv2d_attr.stride().y(), dwc_conv2d_attr.pad().left, dwc_conv2d_attr.pad().right, dwc_conv2d_attr.pad().top,
200                                         dwc_conv2d_attr.pad().bottom,
201                                         DimensionRoundingType{});
202         auto dst_nchw = reference::depthwise_convolution(src_nchw, weights_nchw, bias_nchw, output_shape_nchw, legacy_pad_stride, dwc_conv2d_attr.depth_multiplier(), dwc_conv2d_attr.dilation());
203         return dst_nchw;
204     }
205 
206     TensorType      _target{};
207     SimpleTensor<T> _reference{};
208     DataType        _data_type{};
209     DataLayout      _data_layout{};
210 };
211 
212 template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
213 class DynamicFusionGpuDepthwiseConv2dValidationFixture : public DynamicFusionGpuDepthwiseConv2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
214 {
215 public:
216     template <typename...>
setup(TensorShape input_shape,Size2D kernel_size,const PadStrideInfo & info,const Size2D & dilation,const unsigned int depth_multiplier,DataType data_type,DataLayout data_layout)217     void setup(TensorShape input_shape, Size2D kernel_size, const PadStrideInfo &info, const Size2D &dilation, const unsigned int depth_multiplier, DataType data_type, DataLayout data_layout)
218     {
219         DynamicFusionGpuDepthwiseConv2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(input_shape, kernel_size, info, dilation,
220                                                                                                                   depth_multiplier, data_type, data_layout);
221     }
222 };
223 } // namespace validation
224 } // namespace test
225 } // namespace arm_compute
226 #endif /* TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_DEPTHWISECONV2DFIXTURE */
227