1 /* 2 * Copyright (c) 2022-2023 Arm Limited. 3 * 4 * SPDX-License-Identifier: MIT 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to 8 * deal in the Software without restriction, including without limitation the 9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 10 * sell copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in all 14 * copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 */ 24 #ifndef TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_DEPTHWISECONV2DFIXTURE 25 #define TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_DEPTHWISECONV2DFIXTURE 26 27 #include "arm_compute/core/CL/CLKernelLibrary.h" 28 #include "arm_compute/core/TensorInfo.h" 29 #include "arm_compute/core/Types.h" 30 #include "arm_compute/core/utils/misc/ShapeCalculator.h" 31 32 #include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h" 33 #include "arm_compute/dynamic_fusion/sketch/attributes/DepthwiseConv2dAttributes.h" 34 #include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h" 35 #include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.h" 36 #include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h" 37 38 #include "tests/CL/CLAccessor.h" 39 40 #include "tests/framework/Asserts.h" 41 #include "tests/framework/Fixture.h" 42 #include "tests/framework/Macros.h" 43 44 #include "tests/validation/Validation.h" 45 #include "tests/validation/reference/DepthwiseConvolutionLayer.h" 46 47 using namespace arm_compute::experimental::dynamic_fusion; 48 49 namespace arm_compute 50 { 51 namespace test 52 { 53 namespace validation 54 { 55 template <typename TensorType, typename AccessorType, typename FunctionType, typename T> 56 class DynamicFusionGpuDepthwiseConv2dValidationGenericFixture : public framework::Fixture 57 { 58 public: 59 using TBias = typename std::conditional < std::is_same<typename std::decay<T>::type, uint8_t>::value 60 || std::is_same<typename std::decay<T>::type, int8_t>::value, 61 int32_t, T >::type; // If T: uint8_t or int8_t then TBias: int32_t, otherwise TBias: T 62 63 template <typename...> setup(TensorShape input_shape,Size2D kernel_size,const PadStrideInfo & pad_stride,const Size2D & dilation,const unsigned int depth_multiplier,const DataType data_type,const DataLayout data_layout)64 void setup(TensorShape input_shape, Size2D kernel_size, const PadStrideInfo &pad_stride, const Size2D &dilation, 65 const unsigned int depth_multiplier, const DataType data_type, const DataLayout data_layout) 66 { 67 ARM_COMPUTE_ERROR_ON(data_layout != DataLayout::NHWC); // Dynamic fusion depthwise conv2d only supports NHWC layout 68 69 DepthwiseConv2dAttributes dwc_conv2d_attr; 70 const Padding2D padding_2d(pad_stride.pad_left(), pad_stride.pad_right(), pad_stride.pad_top(), pad_stride.pad_bottom()); 71 dwc_conv2d_attr.pad(padding_2d) 72 .stride(Size2D(pad_stride.stride().first, pad_stride.stride().second)) 73 .dilation(dilation) 74 .depth_multiplier(depth_multiplier) 75 .dimension_rounding_type(pad_stride.round()); 76 77 // Calculate Output and Weight Shapes 78 TensorShape weights_shape = TensorShape(kernel_size.width, kernel_size.height); 79 80 const TensorInfo in_info(input_shape, 1, data_type); 81 const TensorInfo we_info(weights_shape, 1, data_type); 82 83 const ConvolutionInfo info{ pad_stride, depth_multiplier, ActivationLayerInfo(), dilation }; 84 const TensorShape output_shape = misc::shape_calculator::compute_depthwise_convolution_shape(in_info, we_info, info); 85 86 weights_shape.set(2, output_shape.z()); 87 const TensorShape bias_shape = TensorShape(weights_shape[2]); 88 89 _data_type = data_type; 90 _data_layout = data_layout; 91 _target = compute_target(input_shape, weights_shape, bias_shape, dwc_conv2d_attr); 92 _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, dwc_conv2d_attr); 93 } 94 95 protected: 96 template <typename U> fill(U && tensor,int i)97 void fill(U &&tensor, int i) 98 { 99 switch(tensor.data_type()) 100 { 101 case DataType::F16: 102 { 103 arm_compute::utils::uniform_real_distribution_16bit<half> distribution{ -1.0f, 1.0f }; 104 library->fill(tensor, distribution, i); 105 break; 106 } 107 case DataType::F32: 108 { 109 std::uniform_real_distribution<float> distribution(-1.0f, 1.0f); 110 library->fill(tensor, distribution, i); 111 break; 112 } 113 default: 114 library->fill_tensor_uniform(tensor, i); 115 } 116 } 117 118 // Given input is in nchw format compute_target(TensorShape input_shape,TensorShape weights_shape,const TensorShape & bias_shape,const DepthwiseConv2dAttributes dwc_conv2d_attr)119 TensorType compute_target(TensorShape input_shape, TensorShape weights_shape, const TensorShape &bias_shape, const DepthwiseConv2dAttributes dwc_conv2d_attr) 120 { 121 ARM_COMPUTE_ERROR_ON(_data_layout != DataLayout::NHWC); 122 123 // Our test shapes are assumed in NCHW data layout, thus the permutation 124 permute(input_shape, PermutationVector(2U, 0U, 1U)); 125 permute(weights_shape, PermutationVector(2U, 0U, 1U)); 126 127 // Create a new workload sketch 128 auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); 129 auto gpu_ctx = GpuWorkloadContext{ &cl_compile_ctx }; 130 GpuWorkloadSketch sketch{ &gpu_ctx }; 131 132 // Create sketch tensors 133 TensorInfo input_info = sketch.create_tensor_info(TensorInfo(input_shape, 1, _data_type, _data_layout)); 134 TensorInfo weight_info = sketch.create_tensor_info(TensorInfo(weights_shape, 1, _data_type, _data_layout)); 135 TensorInfo bias_info = sketch.create_tensor_info(TensorInfo(bias_shape, 1, _data_type, _data_layout)); 136 TensorInfo dst_info = sketch.create_tensor_info(); 137 138 ITensorInfo *ans_info = FunctionType::create_op(sketch, &input_info, &weight_info, &bias_info, dwc_conv2d_attr); 139 GpuOutput::create_op(sketch, ans_info, &dst_info); 140 141 // Configure runtime 142 ClWorkloadRuntime runtime; 143 runtime.configure(sketch); 144 145 // (Important) Allocate auxiliary tensor memory if there are any 146 for(auto &data : runtime.get_auxiliary_tensors()) 147 { 148 CLTensor *tensor = std::get<0>(data); 149 TensorInfo info = std::get<1>(data); 150 AuxMemoryInfo aux_mem_req = std::get<2>(data); 151 tensor->allocator()->init(info, aux_mem_req.alignment); 152 tensor->allocator()->allocate(); // Use ACL allocated memory 153 } 154 155 // Construct user tensors 156 TensorType t_input{}; 157 TensorType t_weight{}; 158 TensorType t_bias{}; 159 TensorType t_dst{}; 160 161 // Initialize user tensors 162 t_input.allocator()->init(input_info); 163 t_weight.allocator()->init(weight_info); 164 t_bias.allocator()->init(bias_info); 165 t_dst.allocator()->init(dst_info); 166 167 // Allocate and fill user tensors 168 t_input.allocator()->allocate(); 169 t_weight.allocator()->allocate(); 170 t_bias.allocator()->allocate(); 171 t_dst.allocator()->allocate(); 172 173 fill(AccessorType(t_input), 0); 174 fill(AccessorType(t_weight), 1); 175 fill(AccessorType(t_bias), 2); 176 177 // Run runtime 178 runtime.run({ &t_input, &t_weight, &t_bias, &t_dst }); 179 return t_dst; 180 } 181 compute_reference(const TensorShape & input_shape,const TensorShape & weights_shape,const TensorShape & bias_shape,const TensorShape & output_shape,DepthwiseConv2dAttributes dwc_conv2d_attr)182 SimpleTensor<T> compute_reference(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, 183 const TensorShape &output_shape, DepthwiseConv2dAttributes dwc_conv2d_attr) 184 { 185 // Create reference 186 SimpleTensor<T> src{ input_shape, _data_type, 1 }; 187 SimpleTensor<T> weight{ weights_shape, _data_type, 1 }; 188 SimpleTensor<TBias> bias{ bias_shape, _data_type, 1 }; 189 190 fill(src, 0); 191 fill(weight, 1); 192 fill(bias, 2); 193 194 auto src_nchw = src; 195 auto weights_nchw = weight; 196 auto bias_nchw = bias; 197 auto output_shape_nchw = output_shape; 198 199 PadStrideInfo legacy_pad_stride(dwc_conv2d_attr.stride().x(), dwc_conv2d_attr.stride().y(), dwc_conv2d_attr.pad().left, dwc_conv2d_attr.pad().right, dwc_conv2d_attr.pad().top, 200 dwc_conv2d_attr.pad().bottom, 201 DimensionRoundingType{}); 202 auto dst_nchw = reference::depthwise_convolution(src_nchw, weights_nchw, bias_nchw, output_shape_nchw, legacy_pad_stride, dwc_conv2d_attr.depth_multiplier(), dwc_conv2d_attr.dilation()); 203 return dst_nchw; 204 } 205 206 TensorType _target{}; 207 SimpleTensor<T> _reference{}; 208 DataType _data_type{}; 209 DataLayout _data_layout{}; 210 }; 211 212 template <typename TensorType, typename AccessorType, typename FunctionType, typename T> 213 class DynamicFusionGpuDepthwiseConv2dValidationFixture : public DynamicFusionGpuDepthwiseConv2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T> 214 { 215 public: 216 template <typename...> setup(TensorShape input_shape,Size2D kernel_size,const PadStrideInfo & info,const Size2D & dilation,const unsigned int depth_multiplier,DataType data_type,DataLayout data_layout)217 void setup(TensorShape input_shape, Size2D kernel_size, const PadStrideInfo &info, const Size2D &dilation, const unsigned int depth_multiplier, DataType data_type, DataLayout data_layout) 218 { 219 DynamicFusionGpuDepthwiseConv2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(input_shape, kernel_size, info, dilation, 220 depth_multiplier, data_type, data_layout); 221 } 222 }; 223 } // namespace validation 224 } // namespace test 225 } // namespace arm_compute 226 #endif /* TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_DEPTHWISECONV2DFIXTURE */ 227