xref: /aosp_15_r20/external/ComputeLibrary/src/runtime/CL/functions/CLDeconvolutionLayer.cpp (revision c217d954acce2dbc11938adb493fc0abd69584f3)
1 /*
2  * Copyright (c) 2017-2023 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #include "arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h"
25 
26 #include "arm_compute/core/Utils.h"
27 #include "arm_compute/core/Validate.h"
28 #include "arm_compute/core/utils/misc/ShapeCalculator.h"
29 #include "arm_compute/core/utils/quantization/AsymmHelpers.h"
30 #include "arm_compute/runtime/CL/CLScheduler.h"
31 #include "src/core/CL/ICLKernel.h"
32 #include "src/gpu/cl/IClOperator.h"
33 #include "src/gpu/cl/operators/ClTransposedConvolution.h"
34 
35 #include "src/common/utils/Log.h"
36 
37 #include <cmath>
38 #include <memory>
39 #include <tuple>
40 
41 using namespace arm_compute;
42 using namespace arm_compute::misc::shape_calculator;
43 
44 struct CLDeconvolutionLayer::Impl
45 {
46     const ICLTensor                     *src{ nullptr };
47     const ICLTensor                     *weights{ nullptr };
48     const ICLTensor                     *biases{ nullptr };
49     ICLTensor                           *dst{ nullptr };
50     std::unique_ptr<opencl::IClOperator> op{ nullptr };
51 };
52 
53 CLDeconvolutionLayer::~CLDeconvolutionLayer() = default;
54 
CLDeconvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager)55 CLDeconvolutionLayer::CLDeconvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager)
56     : _memory_manager(std::move(memory_manager)), _function(), _impl(std::make_unique<Impl>())
57 {
58 }
59 
configure(ICLTensor * input,ICLTensor * weights,const ICLTensor * bias,ICLTensor * output,const PadStrideInfo & deconv_info,const WeightsInfo & weights_info)60 void CLDeconvolutionLayer::configure(ICLTensor *input, ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, const PadStrideInfo &deconv_info,
61                                      const WeightsInfo &weights_info)
62 {
63     configure(CLKernelLibrary::get().get_compile_context(), input, weights, bias, output, deconv_info, weights_info);
64 }
65 
configure(const CLCompileContext & compile_context,ICLTensor * input,ICLTensor * weights,const ICLTensor * bias,ICLTensor * output,const PadStrideInfo & deconv_info,const WeightsInfo & weights_info)66 void CLDeconvolutionLayer::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, const PadStrideInfo &deconv_info,
67                                      const WeightsInfo &weights_info)
68 {
69     ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
70     ARM_COMPUTE_LOG_PARAMS(input, weights, bias, output, deconv_info, weights_info);
71 
72     switch(CLDeconvolutionLayer::get_deconvolution_method(input->info(), weights->info(), nullptr, output->info(), deconv_info, weights_info))
73     {
74         case DeconvolutionMethod::DIRECT:
75         {
76             auto op = std::make_unique<opencl::ClTransposedConvolution>();
77             op->configure(compile_context, input->info(), weights->info(), bias != nullptr ? bias->info() : nullptr, output->info(), deconv_info);
78 
79             _impl->src     = input;
80             _impl->weights = weights;
81             _impl->biases  = bias;
82             _impl->dst     = output;
83 
84             _impl->op = std::move(op);
85             break;
86         }
87         case DeconvolutionMethod::UPSCALE_CONV2D:
88         {
89             auto f = std::make_unique<CLDirectDeconvolutionLayer>();
90             f->configure(compile_context, input, weights, bias, output, deconv_info, weights_info);
91             _function = std::move(f);
92             break;
93         }
94         case DeconvolutionMethod::GEMM:
95         {
96             auto f = std::make_unique<CLGEMMDeconvolutionLayer>(_memory_manager);
97             f->configure(compile_context, input, weights, bias, output, deconv_info);
98             _function = std::move(f);
99             break;
100         }
101         default:
102             ARM_COMPUTE_ERROR("Not supported.");
103             break;
104     }
105 }
106 
validate(const ITensorInfo * input,const ITensorInfo * weights,const ITensorInfo * bias,ITensorInfo * output,const PadStrideInfo & deconv_info,const WeightsInfo & weights_info)107 Status CLDeconvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias, ITensorInfo *output, const PadStrideInfo &deconv_info,
108                                       const WeightsInfo &weights_info)
109 {
110     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
111     switch(CLDeconvolutionLayer::get_deconvolution_method(input, weights, bias, output, deconv_info, weights_info))
112     {
113         case DeconvolutionMethod::DIRECT:
114         {
115             // Validate transposed convolution operator
116             ARM_COMPUTE_RETURN_ON_ERROR(opencl::ClTransposedConvolution::validate(input, weights, bias, output, deconv_info));
117             break;
118         }
119         case DeconvolutionMethod::UPSCALE_CONV2D:
120         {
121             // Validate direct convolution layer
122             ARM_COMPUTE_RETURN_ON_ERROR(CLDirectDeconvolutionLayer::validate(input, weights, bias, output, deconv_info, weights_info));
123             break;
124         }
125         case DeconvolutionMethod::GEMM:
126         {
127             // Validate gemm-based convolution layer
128             ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMDeconvolutionLayer::validate(input, weights, bias, output, deconv_info));
129             break;
130         }
131         default:
132             ARM_COMPUTE_ERROR("Not supported.");
133             break;
134     }
135 
136     return Status{};
137 }
138 
get_deconvolution_method(const ITensorInfo * input,const ITensorInfo * weights,const ITensorInfo * bias,ITensorInfo * output,const PadStrideInfo & deconv_info,const WeightsInfo & weights_info)139 DeconvolutionMethod CLDeconvolutionLayer::get_deconvolution_method(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias, ITensorInfo *output, const PadStrideInfo &deconv_info,
140                                                                    const WeightsInfo &weights_info)
141 {
142     ARM_COMPUTE_UNUSED(output, bias, weights_info);
143 
144     if(is_data_type_quantized_per_channel(weights->data_type()))
145     {
146         return DeconvolutionMethod::UPSCALE_CONV2D;
147     }
148 
149     const DataLayout data_layout = input->data_layout();
150 
151     const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
152     const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
153     const size_t idx_n = get_data_layout_dimension_index(data_layout, DataLayoutDimension::BATCHES);
154     const size_t ofm   = weights->tensor_shape()[idx_n];
155 
156     if(weights->dimension(idx_w) != deconv_info.stride().first || weights->dimension(idx_h) != deconv_info.stride().second)
157     {
158         if(input->data_layout() == DataLayout::NHWC && ofm <= 16)
159         {
160             return DeconvolutionMethod::DIRECT;
161         }
162         else
163         {
164             return DeconvolutionMethod::UPSCALE_CONV2D;
165         }
166     }
167 
168     return DeconvolutionMethod::GEMM;
169 }
170 
run()171 void CLDeconvolutionLayer::run()
172 {
173     prepare();
174 
175     if(_impl->op != nullptr)
176     {
177         // Optimized Operator will be used
178         ITensorPack pack;
179 
180         pack.add_tensor(TensorType::ACL_SRC_0, _impl->src);
181         pack.add_tensor(TensorType::ACL_SRC_1, _impl->weights);
182         pack.add_tensor(TensorType::ACL_SRC_2, _impl->biases);
183         pack.add_tensor(TensorType::ACL_DST, _impl->dst);
184 
185         _impl->op->run(pack);
186     }
187     else
188     {
189         _function->run();
190     }
191 }
192 
prepare()193 void CLDeconvolutionLayer::prepare()
194 {
195     if(_impl->op == nullptr)
196     {
197         _function->prepare();
198     }
199 }
200