1 /*
2  * Copyright (c) 2022-2023 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifndef TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_ELEMENTWISEBINARYFIXTURE
25 #define TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_ELEMENTWISEBINARYFIXTURE
26 
27 #include "arm_compute/core/CL/CLKernelLibrary.h"
28 #include "arm_compute/core/TensorInfo.h"
29 #include "arm_compute/core/Types.h"
30 #include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h"
31 #include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h"
32 #include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h"
33 
34 #include "tests/framework/Fixture.h"
35 #include "tests/framework/Macros.h"
36 #include "tests/validation/reference/ElementwiseOperations.h"
37 
38 using namespace arm_compute::experimental::dynamic_fusion;
39 
40 namespace arm_compute
41 {
42 namespace test
43 {
44 namespace validation
45 {
46 template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
47 class DynamicFusionGpuElementwiseBinaryValidationGenericFixture : public framework::Fixture
48 {
49 public:
50     template <typename...>
51     void setup(ArithmeticOperation ref_op, const TensorShape &shape0, const TensorShape &shape1, const TensorShape &shape2, DataType data_type, bool is_inplace, bool fuse_two_ops = false)
52     {
53         _ref_op         = ref_op;
54         _is_inplace = is_inplace;
55         _data_type  = data_type;
56         _fuse       = fuse_two_ops;
57         ARM_COMPUTE_ERROR_ON_MSG(_fuse && shape2.total_size() == 0, "No shape2 provided for fusion of two ops.");
58         ARM_COMPUTE_ERROR_ON_MSG(_fuse && _is_inplace, "In place for fusing case not supported yet.");
59         _target    = compute_target(shape0, shape1, shape2);
60         _reference = compute_reference(shape0, shape1, shape2);
61     }
62 
63 protected:
64     template <typename U>
fill(U && tensor,int i)65     void fill(U &&tensor, int i)
66     {
67         if(is_data_type_float(tensor.data_type()))
68         {
69             switch(_ref_op)
70             {
71                 case ArithmeticOperation::DIV:
72                     library->fill_tensor_uniform_ranged(tensor, i, { std::pair<float, float>(-0.001f, 0.001f) });
73                     break;
74                 case ArithmeticOperation::POWER:
75                     library->fill_tensor_uniform(tensor, i, 0.0f, 5.0f);
76                     break;
77                 default:
78                     library->fill_tensor_uniform(tensor, i);
79             }
80         }
81         else if(tensor.data_type() == DataType::S32)
82         {
83             switch(_ref_op)
84             {
85                 case ArithmeticOperation::DIV:
86                     library->fill_tensor_uniform_ranged(tensor, i, { std::pair<int32_t, int32_t>(-1U, 1U) });
87                     break;
88                 default:
89                     library->fill_tensor_uniform(tensor, i);
90             }
91         }
92         else
93         {
94             library->fill_tensor_uniform(tensor, i);
95         }
96     }
97 
compute_target(const TensorShape & shape0,const TensorShape & shape1,const TensorShape & shape2)98     TensorType compute_target(const TensorShape &shape0, const TensorShape &shape1, const TensorShape &shape2)
99     {
100         // Create a new workload sketch
101         auto              cl_compile_ctx = CLKernelLibrary::get().get_compile_context();
102         auto              gpu_ctx        = GpuWorkloadContext{ &cl_compile_ctx };
103         GpuWorkloadSketch sketch{ &gpu_ctx };
104 
105         // Fuse first element wise binary Op
106         TensorInfo lhs_info = sketch.create_tensor_info(TensorInfo(shape0, 1, _data_type));
107         TensorInfo rhs_info = sketch.create_tensor_info(TensorInfo(shape1, 1, _data_type));
108         TensorInfo dst_info = sketch.create_tensor_info();
109 
110         TensorInfo rhs_info_fuse;
111 
112         ITensorInfo *ans_info = FunctionType::create_op(sketch, &lhs_info, &rhs_info);
113 
114         if(_fuse)
115         {
116             rhs_info_fuse          = sketch.create_tensor_info(TensorInfo(shape2, 1, _data_type));
117             ITensorInfo *ans2_info = FunctionType::create_op(sketch, ans_info, &rhs_info_fuse);
118             GpuOutput::create_op(sketch, ans2_info, &dst_info);
119         }
120         else
121         {
122             GpuOutput::create_op(sketch, ans_info, &dst_info);
123         }
124 
125         // Configure runtime
126         ClWorkloadRuntime runtime;
127         runtime.configure(sketch);
128 
129         // (Important) Allocate auxiliary tensor memory if there are any
130         for(auto &data : runtime.get_auxiliary_tensors())
131         {
132             CLTensor     *tensor      = std::get<0>(data);
133             TensorInfo    info        = std::get<1>(data);
134             AuxMemoryInfo aux_mem_req = std::get<2>(data);
135             tensor->allocator()->init(info, aux_mem_req.alignment);
136             tensor->allocator()->allocate(); // Use ACL allocated memory
137         }
138 
139         // Construct user tensors
140         TensorType t_lhs{};
141         TensorType t_rhs{};
142         TensorType t_rhs_fuse{};
143         TensorType t_dst{};
144 
145         // Initialize user tensors
146         t_lhs.allocator()->init(lhs_info);
147         t_rhs.allocator()->init(rhs_info);
148         t_dst.allocator()->init(dst_info);
149         if(_fuse)
150         {
151             t_rhs_fuse.allocator()->init(rhs_info_fuse);
152         }
153 
154         // Allocate and fill user tensors
155         // Instead of using ACL allocator, the user can choose to import memory into the tensors
156         t_lhs.allocator()->allocate();
157         t_rhs.allocator()->allocate();
158         t_dst.allocator()->allocate();
159         if(_fuse)
160         {
161             t_rhs_fuse.allocator()->allocate();
162         }
163 
164         fill(AccessorType(t_lhs), 0);
165         fill(AccessorType(t_rhs), 1);
166         if(_fuse)
167         {
168             fill(AccessorType(t_rhs_fuse), 2);
169         }
170 
171         // Run runtime
172         if(_fuse)
173         {
174             runtime.run({ &t_lhs, &t_rhs, &t_rhs_fuse, &t_dst });
175         }
176         else
177         {
178             runtime.run({ &t_lhs, &t_rhs, &t_dst });
179         }
180 
181         return t_dst;
182     }
183 
compute_reference(const TensorShape & shape0,const TensorShape & shape1,const TensorShape & shape2)184     SimpleTensor<T> compute_reference(const TensorShape &shape0, const TensorShape &shape1, const TensorShape &shape2)
185     {
186         const TensorShape out_shape      = TensorShape::broadcast_shape(shape0, shape1);
187         const TensorShape out_shape_fuse = TensorShape::broadcast_shape(out_shape, shape1);
188 
189         // Create reference
190         SimpleTensor<T> ref_lhs{ shape0, _data_type, 1, QuantizationInfo() };
191         SimpleTensor<T> ref_rhs{ shape1, _data_type, 1, QuantizationInfo() };
192         SimpleTensor<T> ref_rhs_fuse{ shape2, _data_type, 1, QuantizationInfo() };
193         SimpleTensor<T> ref_dst{ out_shape, _data_type, 1, QuantizationInfo() };
194         SimpleTensor<T> ref_dst_fuse{ out_shape_fuse, _data_type, 1, QuantizationInfo() };
195 
196         // Fill reference
197         fill(ref_lhs, 0);
198         fill(ref_rhs, 1);
199 
200         reference::arithmetic_operation<T>(_ref_op, ref_lhs, ref_rhs, ref_dst, ConvertPolicy::WRAP);
201         if(_fuse)
202         {
203             fill(ref_rhs_fuse, 2);
204             reference::arithmetic_operation<T>(_ref_op, ref_dst, ref_rhs_fuse, ref_dst_fuse, ConvertPolicy::WRAP);
205         }
206         SimpleTensor<T> *ret = _fuse ? &ref_dst_fuse : &ref_dst;
207         return *ret;
208     }
209 
210     ArithmeticOperation _ref_op{ ArithmeticOperation::ADD };
211     TensorType          _target{};
212     SimpleTensor<T>     _reference{};
213     DataType            _data_type{};
214     DataLayout          _data_layout{};
215     bool                _is_inplace{ false };
216     bool                _fuse{ false };
217 };
218 
219 template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
220 class DynamicFusionGpuElementwiseBinaryOneOpValidationFixture : public DynamicFusionGpuElementwiseBinaryValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
221 {
222 public:
223     template <typename...>
setup(ArithmeticOperation ref_op,const TensorShape & shape0,DataType data_type,bool is_inplace)224     void setup(ArithmeticOperation ref_op, const TensorShape &shape0, DataType data_type, bool is_inplace)
225     {
226         DynamicFusionGpuElementwiseBinaryValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ref_op, shape0, shape0, TensorShape(), data_type, is_inplace);
227     }
228 };
229 
230 template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
231 class DynamicFusionGpuElementwiseBinaryBroadcastOneOpValidationFixture : public DynamicFusionGpuElementwiseBinaryValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
232 {
233 public:
234     template <typename...>
setup(ArithmeticOperation ref_op,const TensorShape & shape0,const TensorShape & shape1,DataType data_type,bool is_inplace)235     void setup(ArithmeticOperation ref_op, const TensorShape &shape0, const TensorShape &shape1, DataType data_type, bool is_inplace)
236     {
237         DynamicFusionGpuElementwiseBinaryValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ref_op, shape0, shape1, TensorShape(), data_type, is_inplace);
238     }
239 };
240 
241 template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
242 class DynamicFusionGpuElementwiseBinaryTwoOpsValidationFixture : public DynamicFusionGpuElementwiseBinaryValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
243 {
244 public:
245     template <typename...>
setup(ArithmeticOperation ref_op,const TensorShape & shape0,const TensorShape & shape1,const TensorShape & shape2,DataType data_type,bool is_inplace,bool fuse_two_ops)246     void setup(ArithmeticOperation ref_op, const TensorShape &shape0, const TensorShape &shape1, const TensorShape &shape2, DataType data_type, bool is_inplace, bool fuse_two_ops)
247     {
248         DynamicFusionGpuElementwiseBinaryValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ref_op, shape0, shape1, shape2, data_type, is_inplace, fuse_two_ops);
249     }
250 };
251 
252 } // namespace validation
253 } // namespace test
254 } // namespace arm_compute
255 #endif /* TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_ELEMENTWISEBINARYFIXTURE */
256