1*c217d954SCole Faust /*
2*c217d954SCole Faust * Copyright (c) 2017-2023 Arm Limited.
3*c217d954SCole Faust *
4*c217d954SCole Faust * SPDX-License-Identifier: MIT
5*c217d954SCole Faust *
6*c217d954SCole Faust * Permission is hereby granted, free of charge, to any person obtaining a copy
7*c217d954SCole Faust * of this software and associated documentation files (the "Software"), to
8*c217d954SCole Faust * deal in the Software without restriction, including without limitation the
9*c217d954SCole Faust * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10*c217d954SCole Faust * sell copies of the Software, and to permit persons to whom the Software is
11*c217d954SCole Faust * furnished to do so, subject to the following conditions:
12*c217d954SCole Faust *
13*c217d954SCole Faust * The above copyright notice and this permission notice shall be included in all
14*c217d954SCole Faust * copies or substantial portions of the Software.
15*c217d954SCole Faust *
16*c217d954SCole Faust * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17*c217d954SCole Faust * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18*c217d954SCole Faust * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19*c217d954SCole Faust * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20*c217d954SCole Faust * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21*c217d954SCole Faust * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22*c217d954SCole Faust * SOFTWARE.
23*c217d954SCole Faust */
24*c217d954SCole Faust #include "arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h"
25*c217d954SCole Faust
26*c217d954SCole Faust #include "arm_compute/core/Utils.h"
27*c217d954SCole Faust #include "arm_compute/core/Validate.h"
28*c217d954SCole Faust #include "arm_compute/core/utils/misc/ShapeCalculator.h"
29*c217d954SCole Faust #include "arm_compute/core/utils/quantization/AsymmHelpers.h"
30*c217d954SCole Faust #include "arm_compute/runtime/CL/CLScheduler.h"
31*c217d954SCole Faust #include "src/core/CL/ICLKernel.h"
32*c217d954SCole Faust #include "src/gpu/cl/IClOperator.h"
33*c217d954SCole Faust #include "src/gpu/cl/operators/ClTransposedConvolution.h"
34*c217d954SCole Faust
35*c217d954SCole Faust #include "src/common/utils/Log.h"
36*c217d954SCole Faust
37*c217d954SCole Faust #include <cmath>
38*c217d954SCole Faust #include <memory>
39*c217d954SCole Faust #include <tuple>
40*c217d954SCole Faust
41*c217d954SCole Faust using namespace arm_compute;
42*c217d954SCole Faust using namespace arm_compute::misc::shape_calculator;
43*c217d954SCole Faust
44*c217d954SCole Faust struct CLDeconvolutionLayer::Impl
45*c217d954SCole Faust {
46*c217d954SCole Faust const ICLTensor *src{ nullptr };
47*c217d954SCole Faust const ICLTensor *weights{ nullptr };
48*c217d954SCole Faust const ICLTensor *biases{ nullptr };
49*c217d954SCole Faust ICLTensor *dst{ nullptr };
50*c217d954SCole Faust std::unique_ptr<opencl::IClOperator> op{ nullptr };
51*c217d954SCole Faust };
52*c217d954SCole Faust
53*c217d954SCole Faust CLDeconvolutionLayer::~CLDeconvolutionLayer() = default;
54*c217d954SCole Faust
CLDeconvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager)55*c217d954SCole Faust CLDeconvolutionLayer::CLDeconvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager)
56*c217d954SCole Faust : _memory_manager(std::move(memory_manager)), _function(), _impl(std::make_unique<Impl>())
57*c217d954SCole Faust {
58*c217d954SCole Faust }
59*c217d954SCole Faust
configure(ICLTensor * input,ICLTensor * weights,const ICLTensor * bias,ICLTensor * output,const PadStrideInfo & deconv_info,const WeightsInfo & weights_info)60*c217d954SCole Faust void CLDeconvolutionLayer::configure(ICLTensor *input, ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, const PadStrideInfo &deconv_info,
61*c217d954SCole Faust const WeightsInfo &weights_info)
62*c217d954SCole Faust {
63*c217d954SCole Faust configure(CLKernelLibrary::get().get_compile_context(), input, weights, bias, output, deconv_info, weights_info);
64*c217d954SCole Faust }
65*c217d954SCole Faust
configure(const CLCompileContext & compile_context,ICLTensor * input,ICLTensor * weights,const ICLTensor * bias,ICLTensor * output,const PadStrideInfo & deconv_info,const WeightsInfo & weights_info)66*c217d954SCole Faust void CLDeconvolutionLayer::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, const PadStrideInfo &deconv_info,
67*c217d954SCole Faust const WeightsInfo &weights_info)
68*c217d954SCole Faust {
69*c217d954SCole Faust ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
70*c217d954SCole Faust ARM_COMPUTE_LOG_PARAMS(input, weights, bias, output, deconv_info, weights_info);
71*c217d954SCole Faust
72*c217d954SCole Faust switch(CLDeconvolutionLayer::get_deconvolution_method(input->info(), weights->info(), nullptr, output->info(), deconv_info, weights_info))
73*c217d954SCole Faust {
74*c217d954SCole Faust case DeconvolutionMethod::DIRECT:
75*c217d954SCole Faust {
76*c217d954SCole Faust auto op = std::make_unique<opencl::ClTransposedConvolution>();
77*c217d954SCole Faust op->configure(compile_context, input->info(), weights->info(), bias != nullptr ? bias->info() : nullptr, output->info(), deconv_info);
78*c217d954SCole Faust
79*c217d954SCole Faust _impl->src = input;
80*c217d954SCole Faust _impl->weights = weights;
81*c217d954SCole Faust _impl->biases = bias;
82*c217d954SCole Faust _impl->dst = output;
83*c217d954SCole Faust
84*c217d954SCole Faust _impl->op = std::move(op);
85*c217d954SCole Faust break;
86*c217d954SCole Faust }
87*c217d954SCole Faust case DeconvolutionMethod::UPSCALE_CONV2D:
88*c217d954SCole Faust {
89*c217d954SCole Faust auto f = std::make_unique<CLDirectDeconvolutionLayer>();
90*c217d954SCole Faust f->configure(compile_context, input, weights, bias, output, deconv_info, weights_info);
91*c217d954SCole Faust _function = std::move(f);
92*c217d954SCole Faust break;
93*c217d954SCole Faust }
94*c217d954SCole Faust case DeconvolutionMethod::GEMM:
95*c217d954SCole Faust {
96*c217d954SCole Faust auto f = std::make_unique<CLGEMMDeconvolutionLayer>(_memory_manager);
97*c217d954SCole Faust f->configure(compile_context, input, weights, bias, output, deconv_info);
98*c217d954SCole Faust _function = std::move(f);
99*c217d954SCole Faust break;
100*c217d954SCole Faust }
101*c217d954SCole Faust default:
102*c217d954SCole Faust ARM_COMPUTE_ERROR("Not supported.");
103*c217d954SCole Faust break;
104*c217d954SCole Faust }
105*c217d954SCole Faust }
106*c217d954SCole Faust
validate(const ITensorInfo * input,const ITensorInfo * weights,const ITensorInfo * bias,ITensorInfo * output,const PadStrideInfo & deconv_info,const WeightsInfo & weights_info)107*c217d954SCole Faust Status CLDeconvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias, ITensorInfo *output, const PadStrideInfo &deconv_info,
108*c217d954SCole Faust const WeightsInfo &weights_info)
109*c217d954SCole Faust {
110*c217d954SCole Faust ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
111*c217d954SCole Faust switch(CLDeconvolutionLayer::get_deconvolution_method(input, weights, bias, output, deconv_info, weights_info))
112*c217d954SCole Faust {
113*c217d954SCole Faust case DeconvolutionMethod::DIRECT:
114*c217d954SCole Faust {
115*c217d954SCole Faust // Validate transposed convolution operator
116*c217d954SCole Faust ARM_COMPUTE_RETURN_ON_ERROR(opencl::ClTransposedConvolution::validate(input, weights, bias, output, deconv_info));
117*c217d954SCole Faust break;
118*c217d954SCole Faust }
119*c217d954SCole Faust case DeconvolutionMethod::UPSCALE_CONV2D:
120*c217d954SCole Faust {
121*c217d954SCole Faust // Validate direct convolution layer
122*c217d954SCole Faust ARM_COMPUTE_RETURN_ON_ERROR(CLDirectDeconvolutionLayer::validate(input, weights, bias, output, deconv_info, weights_info));
123*c217d954SCole Faust break;
124*c217d954SCole Faust }
125*c217d954SCole Faust case DeconvolutionMethod::GEMM:
126*c217d954SCole Faust {
127*c217d954SCole Faust // Validate gemm-based convolution layer
128*c217d954SCole Faust ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMDeconvolutionLayer::validate(input, weights, bias, output, deconv_info));
129*c217d954SCole Faust break;
130*c217d954SCole Faust }
131*c217d954SCole Faust default:
132*c217d954SCole Faust ARM_COMPUTE_ERROR("Not supported.");
133*c217d954SCole Faust break;
134*c217d954SCole Faust }
135*c217d954SCole Faust
136*c217d954SCole Faust return Status{};
137*c217d954SCole Faust }
138*c217d954SCole Faust
get_deconvolution_method(const ITensorInfo * input,const ITensorInfo * weights,const ITensorInfo * bias,ITensorInfo * output,const PadStrideInfo & deconv_info,const WeightsInfo & weights_info)139*c217d954SCole Faust DeconvolutionMethod CLDeconvolutionLayer::get_deconvolution_method(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias, ITensorInfo *output, const PadStrideInfo &deconv_info,
140*c217d954SCole Faust const WeightsInfo &weights_info)
141*c217d954SCole Faust {
142*c217d954SCole Faust ARM_COMPUTE_UNUSED(output, bias, weights_info);
143*c217d954SCole Faust
144*c217d954SCole Faust if(is_data_type_quantized_per_channel(weights->data_type()))
145*c217d954SCole Faust {
146*c217d954SCole Faust return DeconvolutionMethod::UPSCALE_CONV2D;
147*c217d954SCole Faust }
148*c217d954SCole Faust
149*c217d954SCole Faust const DataLayout data_layout = input->data_layout();
150*c217d954SCole Faust
151*c217d954SCole Faust const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
152*c217d954SCole Faust const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
153*c217d954SCole Faust const size_t idx_n = get_data_layout_dimension_index(data_layout, DataLayoutDimension::BATCHES);
154*c217d954SCole Faust const size_t ofm = weights->tensor_shape()[idx_n];
155*c217d954SCole Faust
156*c217d954SCole Faust if(weights->dimension(idx_w) != deconv_info.stride().first || weights->dimension(idx_h) != deconv_info.stride().second)
157*c217d954SCole Faust {
158*c217d954SCole Faust if(input->data_layout() == DataLayout::NHWC && ofm <= 16)
159*c217d954SCole Faust {
160*c217d954SCole Faust return DeconvolutionMethod::DIRECT;
161*c217d954SCole Faust }
162*c217d954SCole Faust else
163*c217d954SCole Faust {
164*c217d954SCole Faust return DeconvolutionMethod::UPSCALE_CONV2D;
165*c217d954SCole Faust }
166*c217d954SCole Faust }
167*c217d954SCole Faust
168*c217d954SCole Faust return DeconvolutionMethod::GEMM;
169*c217d954SCole Faust }
170*c217d954SCole Faust
run()171*c217d954SCole Faust void CLDeconvolutionLayer::run()
172*c217d954SCole Faust {
173*c217d954SCole Faust prepare();
174*c217d954SCole Faust
175*c217d954SCole Faust if(_impl->op != nullptr)
176*c217d954SCole Faust {
177*c217d954SCole Faust // Optimized Operator will be used
178*c217d954SCole Faust ITensorPack pack;
179*c217d954SCole Faust
180*c217d954SCole Faust pack.add_tensor(TensorType::ACL_SRC_0, _impl->src);
181*c217d954SCole Faust pack.add_tensor(TensorType::ACL_SRC_1, _impl->weights);
182*c217d954SCole Faust pack.add_tensor(TensorType::ACL_SRC_2, _impl->biases);
183*c217d954SCole Faust pack.add_tensor(TensorType::ACL_DST, _impl->dst);
184*c217d954SCole Faust
185*c217d954SCole Faust _impl->op->run(pack);
186*c217d954SCole Faust }
187*c217d954SCole Faust else
188*c217d954SCole Faust {
189*c217d954SCole Faust _function->run();
190*c217d954SCole Faust }
191*c217d954SCole Faust }
192*c217d954SCole Faust
prepare()193*c217d954SCole Faust void CLDeconvolutionLayer::prepare()
194*c217d954SCole Faust {
195*c217d954SCole Faust if(_impl->op == nullptr)
196*c217d954SCole Faust {
197*c217d954SCole Faust _function->prepare();
198*c217d954SCole Faust }
199*c217d954SCole Faust }
200