xref: /aosp_15_r20/external/ComputeLibrary/src/gpu/cl/operators/ClConcatenate.cpp (revision c217d954acce2dbc11938adb493fc0abd69584f3)
1 /*
2  * Copyright (c) 2021 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #include "src/gpu/cl/operators/ClConcatenate.h"
25 
26 #include "arm_compute/core/utils/misc/ShapeCalculator.h"
27 #include "arm_compute/runtime/CL/CLScheduler.h"
28 
29 #include "src/gpu/cl/kernels/ClBatchConcatenateKernel.h"
30 #include "src/gpu/cl/kernels/ClDepthConcatenateKernel.h"
31 #include "src/gpu/cl/kernels/ClHeightConcatenateKernel.h"
32 #include "src/gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.h"
33 #include "src/gpu/cl/kernels/ClWidthConcatenate4TensorsKernel.h"
34 #include "src/gpu/cl/kernels/ClWidthConcatenateKernel.h"
35 
36 #include "arm_compute/core/Error.h"
37 #include "arm_compute/core/TensorInfo.h"
38 #include "arm_compute/core/Types.h"
39 
40 #include "src/common/utils/Log.h"
41 #include "src/core/helpers/AutoConfiguration.h"
42 
43 namespace arm_compute
44 {
45 namespace opencl
46 {
configure(const CLCompileContext & compile_context,const std::vector<ITensorInfo * > & src_vector,ITensorInfo * dst,size_t axis)47 void ClConcatenate::configure(const CLCompileContext &compile_context, const std::vector<ITensorInfo *> &src_vector, ITensorInfo *dst, size_t axis)
48 {
49     ARM_COMPUTE_ERROR_ON(dst == nullptr);
50     ARM_COMPUTE_LOG_PARAMS(src_vector, dst, axis);
51     _axis       = axis;
52     _num_inputs = src_vector.size();
53 
54     TensorShape                      dst_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(src_vector, _axis);
55     std::vector<const ITensorInfo *> const_src_vector(src_vector.size());
56     std::transform(src_vector.begin(), src_vector.end(), const_src_vector.begin(), [](ITensorInfo * t)
57     {
58         ARM_COMPUTE_ERROR_ON_NULLPTR(t);
59         return t;
60     });
61 
62     // dst auto inizialitation if not yet initialized
63     auto_init_if_empty(*dst, dst_shape, 1, src_vector[0]->data_type());
64     ARM_COMPUTE_ERROR_THROW_ON(ClConcatenate::validate(const_src_vector, dst, axis));
65 
66     unsigned int offset = 0;
67     switch(_axis)
68     {
69         case Window::DimX:
70         {
71             switch(_num_inputs)
72             {
73                 case 2:
74                 {
75                     // Configure WidthConcatenate2Tensors kernel
76                     auto kernel = std::make_unique<kernels::ClWidthConcatenate2TensorsKernel>();
77                     kernel->configure(compile_context, src_vector.at(0), src_vector.at(1), dst);
78                     _concat_kernels.emplace_back(std::move(kernel));
79                     break;
80                 }
81                 case 4:
82                 {
83                     // Configure WidthConcatenate4Tensors kernel
84                     auto kernel = std::make_unique<kernels::ClWidthConcatenate4TensorsKernel>();
85                     kernel->configure(compile_context, src_vector.at(0), src_vector.at(1), src_vector.at(2), src_vector.at(3), dst);
86                     _concat_kernels.emplace_back(std::move(kernel));
87                     break;
88                 }
89                 default:
90                 {
91                     // Configure generic case WidthConcatenate kernels
92                     for(unsigned int i = 0; i < _num_inputs; ++i)
93                     {
94                         auto kernel = std::make_unique<kernels::ClWidthConcatenateKernel>();
95                         kernel->configure(compile_context, src_vector.at(i), offset, dst);
96                         offset += src_vector.at(i)->dimension(_axis);
97                         _concat_kernels.emplace_back(std::move(kernel));
98                     }
99                     break;
100                 }
101             }
102             break;
103         }
104         case Window::DimY:
105         {
106             for(unsigned int i = 0; i < _num_inputs; ++i)
107             {
108                 auto kernel = std::make_unique<kernels::ClHeightConcatenateKernel>();
109                 kernel->configure(compile_context, src_vector.at(i), offset, dst);
110                 offset += src_vector.at(i)->dimension(_axis);
111                 _concat_kernels.emplace_back(std::move(kernel));
112             }
113             break;
114         }
115         case Window::DimZ:
116         {
117             for(unsigned int i = 0; i < _num_inputs; ++i)
118             {
119                 auto kernel = std::make_unique<kernels::ClDepthConcatenateKernel>();
120                 kernel->configure(compile_context, src_vector.at(i), offset, dst);
121                 offset += src_vector.at(i)->dimension(_axis);
122                 _concat_kernels.emplace_back(std::move(kernel));
123             }
124             break;
125         }
126         case 3:
127         {
128             for(unsigned int i = 0; i < _num_inputs; ++i)
129             {
130                 auto kernel = std::make_unique<kernels::ClBatchConcatenateKernel>();
131                 kernel->configure(compile_context, src_vector.at(i), offset, dst);
132                 offset += src_vector.at(i)->dimension(_axis);
133                 _concat_kernels.emplace_back(std::move(kernel));
134             }
135             break;
136         }
137         default:
138             ARM_COMPUTE_ERROR("Axis not supported");
139     }
140 }
141 
validate(const std::vector<const ITensorInfo * > & src_vector,const ITensorInfo * dst,size_t axis)142 Status ClConcatenate::validate(const std::vector<const ITensorInfo *> &src_vector, const ITensorInfo *dst, size_t axis)
143 {
144     ARM_COMPUTE_RETURN_ERROR_ON(dst == nullptr);
145     const unsigned int num_inputs = src_vector.size();
146 
147     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(dst);
148     ARM_COMPUTE_RETURN_ERROR_ON(num_inputs < 2);
149 
150     unsigned int offset = 0;
151     switch(axis)
152     {
153         case Window::DimX:
154         {
155             switch(num_inputs)
156             {
157                 case 2:
158                     // Validate WidthConcatenate2Tensors kernels if there are 2 inputs
159                     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src_vector[0], src_vector[1]);
160                     ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClWidthConcatenate2TensorsKernel::validate(src_vector[0], src_vector[1], dst));
161                     break;
162                 case 4:
163                     // Validate WidthConcatenate4Tensors kernels if there are 4 inputs
164                     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src_vector[0], src_vector[1], src_vector[2], src_vector[3]);
165                     ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClWidthConcatenate4TensorsKernel::validate(src_vector[0], src_vector[1], src_vector[2], src_vector[3], dst));
166                     break;
167                 default:
168                     // Validate generic case of WidthConcatenate kernel
169                     for(const auto &src : src_vector)
170                     {
171                         ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src);
172                         ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClWidthConcatenateKernel::validate(src, offset, dst));
173                         offset += src->dimension(axis);
174                     }
175                     break;
176             }
177             break;
178         }
179         case Window::DimY:
180         {
181             for(const auto &src : src_vector)
182             {
183                 ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClHeightConcatenateKernel::validate(src, offset, dst));
184                 offset += src->dimension(axis);
185             }
186             break;
187         }
188         case Window::DimZ:
189         {
190             for(const auto &src : src_vector)
191             {
192                 ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClDepthConcatenateKernel::validate(src, offset, dst));
193                 offset += src->dimension(axis);
194             }
195             break;
196         }
197         case 3:
198         {
199             for(const auto &src : src_vector)
200             {
201                 ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClBatchConcatenateKernel::validate(src, offset, dst));
202                 offset += src->dimension(axis);
203             }
204             break;
205         }
206         default:
207             ARM_COMPUTE_ERROR("Axis not supported");
208     }
209 
210     if(dst->total_size() != 0)
211     {
212         TensorShape dst_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(src_vector, axis);
213         ARM_COMPUTE_RETURN_ERROR_ON(dst_shape.total_size() != dst->tensor_shape().total_size());
214     }
215 
216     return Status{};
217 }
218 
run(ITensorPack & tensors)219 void ClConcatenate::run(ITensorPack &tensors)
220 {
221     if(tensors.empty())
222     {
223         ARM_COMPUTE_ERROR("No inputs provided");
224     }
225 
226     if(static_cast<int>(tensors.size()) - 1 != static_cast<int>(_num_inputs))
227     {
228         ARM_COMPUTE_ERROR("Configured with different number of inputs");
229     }
230 
231     if(_axis == Window::DimX && (_num_inputs == 2 || _num_inputs == 4))
232     {
233         ARM_COMPUTE_ERROR_ON(_concat_kernels.empty());
234         CLScheduler::get().enqueue_op(*_concat_kernels.at(0), tensors, true);
235     }
236     else
237     {
238         int i = 0;
239         for(auto &k : _concat_kernels)
240         {
241             ITensorPack pack;
242             pack.add_tensor(TensorType::ACL_SRC, tensors.get_const_tensor(ACL_SRC_VEC + i));
243             pack.add_tensor(TensorType::ACL_DST, tensors.get_tensor(ACL_DST));
244             CLScheduler::get().enqueue_op(*k, pack, true);
245             ++i;
246         }
247     }
248 }
249 } // namespace opencl
250 } // namespace arm_compute
251