1 /* 2 * Copyright (c) 2022-2023 Arm Limited. 3 * 4 * SPDX-License-Identifier: MIT 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to 8 * deal in the Software without restriction, including without limitation the 9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 10 * sell copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in all 14 * copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 */ 24 #ifndef TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_ELEMENTWISEBINARYFIXTURE 25 #define TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_ELEMENTWISEBINARYFIXTURE 26 27 #include "arm_compute/core/CL/CLKernelLibrary.h" 28 #include "arm_compute/core/TensorInfo.h" 29 #include "arm_compute/core/Types.h" 30 #include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h" 31 #include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h" 32 #include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h" 33 34 #include "tests/framework/Fixture.h" 35 #include "tests/framework/Macros.h" 36 #include "tests/validation/reference/ElementwiseOperations.h" 37 38 using namespace arm_compute::experimental::dynamic_fusion; 39 40 namespace arm_compute 41 { 42 namespace test 43 { 44 namespace validation 45 { 46 template <typename TensorType, typename AccessorType, typename FunctionType, typename T> 47 class DynamicFusionGpuElementwiseBinaryValidationGenericFixture : public framework::Fixture 48 { 49 public: 50 template <typename...> 51 void setup(ArithmeticOperation ref_op, const TensorShape &shape0, const TensorShape &shape1, const TensorShape &shape2, DataType data_type, bool is_inplace, bool fuse_two_ops = false) 52 { 53 _ref_op = ref_op; 54 _is_inplace = is_inplace; 55 _data_type = data_type; 56 _fuse = fuse_two_ops; 57 ARM_COMPUTE_ERROR_ON_MSG(_fuse && shape2.total_size() == 0, "No shape2 provided for fusion of two ops."); 58 ARM_COMPUTE_ERROR_ON_MSG(_fuse && _is_inplace, "In place for fusing case not supported yet."); 59 _target = compute_target(shape0, shape1, shape2); 60 _reference = compute_reference(shape0, shape1, shape2); 61 } 62 63 protected: 64 template <typename U> fill(U && tensor,int i)65 void fill(U &&tensor, int i) 66 { 67 if(is_data_type_float(tensor.data_type())) 68 { 69 switch(_ref_op) 70 { 71 case ArithmeticOperation::DIV: 72 library->fill_tensor_uniform_ranged(tensor, i, { std::pair<float, float>(-0.001f, 0.001f) }); 73 break; 74 case ArithmeticOperation::POWER: 75 library->fill_tensor_uniform(tensor, i, 0.0f, 5.0f); 76 break; 77 default: 78 library->fill_tensor_uniform(tensor, i); 79 } 80 } 81 else if(tensor.data_type() == DataType::S32) 82 { 83 switch(_ref_op) 84 { 85 case ArithmeticOperation::DIV: 86 library->fill_tensor_uniform_ranged(tensor, i, { std::pair<int32_t, int32_t>(-1U, 1U) }); 87 break; 88 default: 89 library->fill_tensor_uniform(tensor, i); 90 } 91 } 92 else 93 { 94 library->fill_tensor_uniform(tensor, i); 95 } 96 } 97 compute_target(const TensorShape & shape0,const TensorShape & shape1,const TensorShape & shape2)98 TensorType compute_target(const TensorShape &shape0, const TensorShape &shape1, const TensorShape &shape2) 99 { 100 // Create a new workload sketch 101 auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); 102 auto gpu_ctx = GpuWorkloadContext{ &cl_compile_ctx }; 103 GpuWorkloadSketch sketch{ &gpu_ctx }; 104 105 // Fuse first element wise binary Op 106 TensorInfo lhs_info = sketch.create_tensor_info(TensorInfo(shape0, 1, _data_type)); 107 TensorInfo rhs_info = sketch.create_tensor_info(TensorInfo(shape1, 1, _data_type)); 108 TensorInfo dst_info = sketch.create_tensor_info(); 109 110 TensorInfo rhs_info_fuse; 111 112 ITensorInfo *ans_info = FunctionType::create_op(sketch, &lhs_info, &rhs_info); 113 114 if(_fuse) 115 { 116 rhs_info_fuse = sketch.create_tensor_info(TensorInfo(shape2, 1, _data_type)); 117 ITensorInfo *ans2_info = FunctionType::create_op(sketch, ans_info, &rhs_info_fuse); 118 GpuOutput::create_op(sketch, ans2_info, &dst_info); 119 } 120 else 121 { 122 GpuOutput::create_op(sketch, ans_info, &dst_info); 123 } 124 125 // Configure runtime 126 ClWorkloadRuntime runtime; 127 runtime.configure(sketch); 128 129 // (Important) Allocate auxiliary tensor memory if there are any 130 for(auto &data : runtime.get_auxiliary_tensors()) 131 { 132 CLTensor *tensor = std::get<0>(data); 133 TensorInfo info = std::get<1>(data); 134 AuxMemoryInfo aux_mem_req = std::get<2>(data); 135 tensor->allocator()->init(info, aux_mem_req.alignment); 136 tensor->allocator()->allocate(); // Use ACL allocated memory 137 } 138 139 // Construct user tensors 140 TensorType t_lhs{}; 141 TensorType t_rhs{}; 142 TensorType t_rhs_fuse{}; 143 TensorType t_dst{}; 144 145 // Initialize user tensors 146 t_lhs.allocator()->init(lhs_info); 147 t_rhs.allocator()->init(rhs_info); 148 t_dst.allocator()->init(dst_info); 149 if(_fuse) 150 { 151 t_rhs_fuse.allocator()->init(rhs_info_fuse); 152 } 153 154 // Allocate and fill user tensors 155 // Instead of using ACL allocator, the user can choose to import memory into the tensors 156 t_lhs.allocator()->allocate(); 157 t_rhs.allocator()->allocate(); 158 t_dst.allocator()->allocate(); 159 if(_fuse) 160 { 161 t_rhs_fuse.allocator()->allocate(); 162 } 163 164 fill(AccessorType(t_lhs), 0); 165 fill(AccessorType(t_rhs), 1); 166 if(_fuse) 167 { 168 fill(AccessorType(t_rhs_fuse), 2); 169 } 170 171 // Run runtime 172 if(_fuse) 173 { 174 runtime.run({ &t_lhs, &t_rhs, &t_rhs_fuse, &t_dst }); 175 } 176 else 177 { 178 runtime.run({ &t_lhs, &t_rhs, &t_dst }); 179 } 180 181 return t_dst; 182 } 183 compute_reference(const TensorShape & shape0,const TensorShape & shape1,const TensorShape & shape2)184 SimpleTensor<T> compute_reference(const TensorShape &shape0, const TensorShape &shape1, const TensorShape &shape2) 185 { 186 const TensorShape out_shape = TensorShape::broadcast_shape(shape0, shape1); 187 const TensorShape out_shape_fuse = TensorShape::broadcast_shape(out_shape, shape1); 188 189 // Create reference 190 SimpleTensor<T> ref_lhs{ shape0, _data_type, 1, QuantizationInfo() }; 191 SimpleTensor<T> ref_rhs{ shape1, _data_type, 1, QuantizationInfo() }; 192 SimpleTensor<T> ref_rhs_fuse{ shape2, _data_type, 1, QuantizationInfo() }; 193 SimpleTensor<T> ref_dst{ out_shape, _data_type, 1, QuantizationInfo() }; 194 SimpleTensor<T> ref_dst_fuse{ out_shape_fuse, _data_type, 1, QuantizationInfo() }; 195 196 // Fill reference 197 fill(ref_lhs, 0); 198 fill(ref_rhs, 1); 199 200 reference::arithmetic_operation<T>(_ref_op, ref_lhs, ref_rhs, ref_dst, ConvertPolicy::WRAP); 201 if(_fuse) 202 { 203 fill(ref_rhs_fuse, 2); 204 reference::arithmetic_operation<T>(_ref_op, ref_dst, ref_rhs_fuse, ref_dst_fuse, ConvertPolicy::WRAP); 205 } 206 SimpleTensor<T> *ret = _fuse ? &ref_dst_fuse : &ref_dst; 207 return *ret; 208 } 209 210 ArithmeticOperation _ref_op{ ArithmeticOperation::ADD }; 211 TensorType _target{}; 212 SimpleTensor<T> _reference{}; 213 DataType _data_type{}; 214 DataLayout _data_layout{}; 215 bool _is_inplace{ false }; 216 bool _fuse{ false }; 217 }; 218 219 template <typename TensorType, typename AccessorType, typename FunctionType, typename T> 220 class DynamicFusionGpuElementwiseBinaryOneOpValidationFixture : public DynamicFusionGpuElementwiseBinaryValidationGenericFixture<TensorType, AccessorType, FunctionType, T> 221 { 222 public: 223 template <typename...> setup(ArithmeticOperation ref_op,const TensorShape & shape0,DataType data_type,bool is_inplace)224 void setup(ArithmeticOperation ref_op, const TensorShape &shape0, DataType data_type, bool is_inplace) 225 { 226 DynamicFusionGpuElementwiseBinaryValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ref_op, shape0, shape0, TensorShape(), data_type, is_inplace); 227 } 228 }; 229 230 template <typename TensorType, typename AccessorType, typename FunctionType, typename T> 231 class DynamicFusionGpuElementwiseBinaryBroadcastOneOpValidationFixture : public DynamicFusionGpuElementwiseBinaryValidationGenericFixture<TensorType, AccessorType, FunctionType, T> 232 { 233 public: 234 template <typename...> setup(ArithmeticOperation ref_op,const TensorShape & shape0,const TensorShape & shape1,DataType data_type,bool is_inplace)235 void setup(ArithmeticOperation ref_op, const TensorShape &shape0, const TensorShape &shape1, DataType data_type, bool is_inplace) 236 { 237 DynamicFusionGpuElementwiseBinaryValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ref_op, shape0, shape1, TensorShape(), data_type, is_inplace); 238 } 239 }; 240 241 template <typename TensorType, typename AccessorType, typename FunctionType, typename T> 242 class DynamicFusionGpuElementwiseBinaryTwoOpsValidationFixture : public DynamicFusionGpuElementwiseBinaryValidationGenericFixture<TensorType, AccessorType, FunctionType, T> 243 { 244 public: 245 template <typename...> setup(ArithmeticOperation ref_op,const TensorShape & shape0,const TensorShape & shape1,const TensorShape & shape2,DataType data_type,bool is_inplace,bool fuse_two_ops)246 void setup(ArithmeticOperation ref_op, const TensorShape &shape0, const TensorShape &shape1, const TensorShape &shape2, DataType data_type, bool is_inplace, bool fuse_two_ops) 247 { 248 DynamicFusionGpuElementwiseBinaryValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ref_op, shape0, shape1, shape2, data_type, is_inplace, fuse_two_ops); 249 } 250 }; 251 252 } // namespace validation 253 } // namespace test 254 } // namespace arm_compute 255 #endif /* TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_ELEMENTWISEBINARYFIXTURE */ 256