1*c217d954SCole Faust /*
2*c217d954SCole Faust * Copyright (c) 2017-2021 Arm Limited.
3*c217d954SCole Faust *
4*c217d954SCole Faust * SPDX-License-Identifier: MIT
5*c217d954SCole Faust *
6*c217d954SCole Faust * Permission is hereby granted, free of charge, to any person obtaining a copy
7*c217d954SCole Faust * of this software and associated documentation files (the "Software"), to
8*c217d954SCole Faust * deal in the Software without restriction, including without limitation the
9*c217d954SCole Faust * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10*c217d954SCole Faust * sell copies of the Software, and to permit persons to whom the Software is
11*c217d954SCole Faust * furnished to do so, subject to the following conditions:
12*c217d954SCole Faust *
13*c217d954SCole Faust * The above copyright notice and this permission notice shall be included in all
14*c217d954SCole Faust * copies or substantial portions of the Software.
15*c217d954SCole Faust *
16*c217d954SCole Faust * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17*c217d954SCole Faust * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18*c217d954SCole Faust * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19*c217d954SCole Faust * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20*c217d954SCole Faust * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21*c217d954SCole Faust * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22*c217d954SCole Faust * SOFTWARE.
23*c217d954SCole Faust */
24*c217d954SCole Faust #include "arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h"
25*c217d954SCole Faust
26*c217d954SCole Faust #include "arm_compute/core/utils/misc/InfoHelpers.h"
27*c217d954SCole Faust #include "arm_compute/core/utils/misc/ShapeCalculator.h"
28*c217d954SCole Faust #include "arm_compute/core/utils/quantization/AsymmHelpers.h"
29*c217d954SCole Faust #include "arm_compute/runtime/NEON/NEScheduler.h"
30*c217d954SCole Faust #include "src/common/utils/Log.h"
31*c217d954SCole Faust #include "src/cpu/operators/CpuDepthwiseConv2d.h"
32*c217d954SCole Faust
33*c217d954SCole Faust using namespace arm_compute::misc;
34*c217d954SCole Faust using namespace arm_compute::misc::shape_calculator;
35*c217d954SCole Faust
36*c217d954SCole Faust namespace arm_compute
37*c217d954SCole Faust {
38*c217d954SCole Faust NEDepthwiseConvolutionLayer::~NEDepthwiseConvolutionLayer() = default;
39*c217d954SCole Faust
40*c217d954SCole Faust struct NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::Impl
41*c217d954SCole Faust {
42*c217d954SCole Faust ITensor *src{ nullptr }; // SRC_0
43*c217d954SCole Faust ITensor *dst{ nullptr }; // DST_0
44*c217d954SCole Faust const ITensor *weights
45*c217d954SCole Faust {
46*c217d954SCole Faust nullptr
47*c217d954SCole Faust }; // SRC_1
48*c217d954SCole Faust const ITensor *biases
49*c217d954SCole Faust {
50*c217d954SCole Faust nullptr
51*c217d954SCole Faust }; // SRC_2
52*c217d954SCole Faust Tensor permuted_input{}; // INT_0
53*c217d954SCole Faust Tensor permuted_weights{}; // INT_1
54*c217d954SCole Faust Tensor permuted_output{}; // INT_2
55*c217d954SCole Faust Tensor workspace{}; // INT_3
56*c217d954SCole Faust Tensor packed_weights{}; // INT_4
57*c217d954SCole Faust std::shared_ptr<cpu::CpuDepthwiseConv2d> op{ nullptr };
58*c217d954SCole Faust bool is_prepared{ false };
59*c217d954SCole Faust bool permute{ false };
60*c217d954SCole Faust };
61*c217d954SCole Faust
NEDepthwiseConvolutionLayerOptimizedInternal(std::shared_ptr<IMemoryManager> memory_manager)62*c217d954SCole Faust NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::NEDepthwiseConvolutionLayerOptimizedInternal(std::shared_ptr<IMemoryManager> memory_manager)
63*c217d954SCole Faust : _memory_group(memory_manager), _impl(std::make_unique<Impl>())
64*c217d954SCole Faust {
65*c217d954SCole Faust }
66*c217d954SCole Faust
configure(ITensor * input,const ITensor * weights,const ITensor * biases,ITensor * output,const PadStrideInfo & conv_info,unsigned int depth_multiplier,const ActivationLayerInfo & act_info,const Size2D & dilation)67*c217d954SCole Faust void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::configure(ITensor *input,
68*c217d954SCole Faust const ITensor *weights,
69*c217d954SCole Faust const ITensor *biases,
70*c217d954SCole Faust ITensor *output, const PadStrideInfo &conv_info,
71*c217d954SCole Faust unsigned int depth_multiplier,
72*c217d954SCole Faust const ActivationLayerInfo &act_info,
73*c217d954SCole Faust const Size2D &dilation)
74*c217d954SCole Faust {
75*c217d954SCole Faust ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
76*c217d954SCole Faust
77*c217d954SCole Faust bool is_nhwc = input->info()->data_layout() == DataLayout::NCHW;
78*c217d954SCole Faust _impl->src = input;
79*c217d954SCole Faust _impl->weights = weights;
80*c217d954SCole Faust _impl->biases = biases;
81*c217d954SCole Faust _impl->dst = output;
82*c217d954SCole Faust _impl->permute = is_nhwc;
83*c217d954SCole Faust
84*c217d954SCole Faust _impl->op = std::make_unique<cpu::CpuDepthwiseConv2d>();
85*c217d954SCole Faust ConvolutionInfo info{ conv_info, depth_multiplier, act_info, dilation };
86*c217d954SCole Faust _impl->op->configure(_impl->src->info(), _impl->weights->info(), _impl->biases == nullptr ? nullptr : _impl->biases->info(),
87*c217d954SCole Faust _impl->dst->info(), info);
88*c217d954SCole Faust
89*c217d954SCole Faust // Configure pipeline
90*c217d954SCole Faust ActivationLayerInfo act_info_to_use = ActivationLayerInfo();
91*c217d954SCole Faust const bool is_relu = arm_compute::utils::info_helpers::is_relu(act_info);
92*c217d954SCole Faust const bool is_relu6 = arm_compute::utils::info_helpers::is_relu6(act_info);
93*c217d954SCole Faust bool is_activationlayer_enabled = act_info.enabled() && !(is_relu || is_relu6);
94*c217d954SCole Faust
95*c217d954SCole Faust if(!is_activationlayer_enabled)
96*c217d954SCole Faust {
97*c217d954SCole Faust act_info_to_use = act_info;
98*c217d954SCole Faust }
99*c217d954SCole Faust info = ConvolutionInfo{ conv_info, depth_multiplier, act_info_to_use, dilation };
100*c217d954SCole Faust
101*c217d954SCole Faust auto dwc_optimized_func = std::make_unique<cpu::CpuDepthwiseConv2dAssemblyDispatch>();
102*c217d954SCole Faust
103*c217d954SCole Faust if(is_nhwc)
104*c217d954SCole Faust {
105*c217d954SCole Faust auto permute_input = std::make_unique<cpu::CpuPermute>();
106*c217d954SCole Faust auto permute_weights = std::make_unique<cpu::CpuPermute>();
107*c217d954SCole Faust auto permute_output = std::make_unique<cpu::CpuPermute>();
108*c217d954SCole Faust
109*c217d954SCole Faust _memory_group.manage(&_impl->permuted_input);
110*c217d954SCole Faust _memory_group.manage(&_impl->permuted_weights);
111*c217d954SCole Faust _memory_group.manage(&_impl->permuted_output);
112*c217d954SCole Faust
113*c217d954SCole Faust // Configure the function to transform the input tensor from NCHW -> NHWC
114*c217d954SCole Faust permute_input->configure(input->info(), _impl->permuted_input.info(), PermutationVector(2U, 0U, 1U));
115*c217d954SCole Faust _impl->permuted_input.info()->set_data_layout(DataLayout::NHWC);
116*c217d954SCole Faust
117*c217d954SCole Faust // Configure the function to transform the weights tensor from IHW -> HWI
118*c217d954SCole Faust permute_weights->configure(weights->info(), _impl->permuted_weights.info(), PermutationVector(2U, 0U, 1U));
119*c217d954SCole Faust _impl->permuted_weights.info()->set_data_layout(DataLayout::NHWC);
120*c217d954SCole Faust
121*c217d954SCole Faust _impl->permuted_output.info()->set_data_layout(DataLayout::NHWC);
122*c217d954SCole Faust _impl->permuted_output.info()->set_quantization_info(output->info()->quantization_info());
123*c217d954SCole Faust
124*c217d954SCole Faust // Configure optimized depthwise
125*c217d954SCole Faust dwc_optimized_func->configure(_impl->permuted_input.info(), _impl->permuted_weights.info(), biases == nullptr ? nullptr : biases->info(), _impl->permuted_output.info(), info);
126*c217d954SCole Faust
127*c217d954SCole Faust // Configure the function to transform the convoluted output to ACL's native ordering format NCHW
128*c217d954SCole Faust _impl->permuted_output.info()->set_data_layout(DataLayout::NHWC);
129*c217d954SCole Faust permute_output->configure(_impl->permuted_output.info(), output->info(), PermutationVector(1U, 2U, 0U));
130*c217d954SCole Faust
131*c217d954SCole Faust _impl->permuted_input.allocator()->allocate();
132*c217d954SCole Faust _impl->permuted_output.allocator()->allocate();
133*c217d954SCole Faust }
134*c217d954SCole Faust else
135*c217d954SCole Faust {
136*c217d954SCole Faust dwc_optimized_func->configure(_impl->src->info(), _impl->weights->info(), biases == nullptr ? nullptr : biases->info(), _impl->dst->info(), info);
137*c217d954SCole Faust }
138*c217d954SCole Faust
139*c217d954SCole Faust // Allocate memory based on the internal memory requirements
140*c217d954SCole Faust experimental::MemoryRequirements mem_req = dwc_optimized_func->workspace();
141*c217d954SCole Faust _impl->workspace.allocator()->init(TensorInfo(TensorShape{ mem_req[0].size + mem_req[0].alignment }, 1, DataType::S8), mem_req[0].alignment);
142*c217d954SCole Faust _impl->packed_weights.allocator()->init(TensorInfo(TensorShape{ mem_req[1].size + mem_req[1].alignment }, 1, DataType::S8), mem_req[1].alignment);
143*c217d954SCole Faust _memory_group.manage(&_impl->workspace);
144*c217d954SCole Faust _memory_group.manage(&_impl->packed_weights);
145*c217d954SCole Faust _impl->workspace.allocator()->allocate();
146*c217d954SCole Faust _impl->packed_weights.allocator()->allocate();
147*c217d954SCole Faust }
148*c217d954SCole Faust
validate(const ITensorInfo * input,const ITensorInfo * weights,const ITensorInfo * biases,const ITensorInfo * output,const PadStrideInfo & conv_info,unsigned int depth_multiplier,const ActivationLayerInfo & act_info,const Size2D & dilation)149*c217d954SCole Faust Status NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::validate(const ITensorInfo *input,
150*c217d954SCole Faust const ITensorInfo *weights,
151*c217d954SCole Faust const ITensorInfo *biases,
152*c217d954SCole Faust const ITensorInfo *output,
153*c217d954SCole Faust const PadStrideInfo &conv_info,
154*c217d954SCole Faust unsigned int depth_multiplier,
155*c217d954SCole Faust const ActivationLayerInfo &act_info,
156*c217d954SCole Faust const Size2D &dilation)
157*c217d954SCole Faust {
158*c217d954SCole Faust ConvolutionInfo info{ conv_info, depth_multiplier, act_info, dilation };
159*c217d954SCole Faust return cpu::CpuDepthwiseConv2d::validate(input, weights, biases, output, info);
160*c217d954SCole Faust }
161*c217d954SCole Faust
run()162*c217d954SCole Faust void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::run()
163*c217d954SCole Faust {
164*c217d954SCole Faust prepare();
165*c217d954SCole Faust MemoryGroupResourceScope scope_mg(_memory_group);
166*c217d954SCole Faust
167*c217d954SCole Faust ITensorPack pack;
168*c217d954SCole Faust pack.add_tensor(TensorType::ACL_SRC_0, _impl->src);
169*c217d954SCole Faust pack.add_tensor(TensorType::ACL_SRC_1, _impl->weights);
170*c217d954SCole Faust pack.add_tensor(TensorType::ACL_SRC_2, _impl->biases);
171*c217d954SCole Faust pack.add_tensor(TensorType::ACL_INT_0, &_impl->permuted_input);
172*c217d954SCole Faust pack.add_tensor(TensorType::ACL_INT_1, &_impl->permuted_weights);
173*c217d954SCole Faust pack.add_tensor(TensorType::ACL_INT_2, &_impl->permuted_output);
174*c217d954SCole Faust pack.add_tensor(TensorType::ACL_INT_3, &_impl->workspace);
175*c217d954SCole Faust pack.add_tensor(TensorType::ACL_INT_4, &_impl->packed_weights);
176*c217d954SCole Faust pack.add_tensor(TensorType::ACL_DST_0, _impl->dst);
177*c217d954SCole Faust
178*c217d954SCole Faust _impl->op->run(pack);
179*c217d954SCole Faust }
180*c217d954SCole Faust
prepare()181*c217d954SCole Faust void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::prepare()
182*c217d954SCole Faust {
183*c217d954SCole Faust if(!_impl->is_prepared)
184*c217d954SCole Faust {
185*c217d954SCole Faust // Permute weights
186*c217d954SCole Faust if(_impl->permute)
187*c217d954SCole Faust {
188*c217d954SCole Faust _impl->permuted_weights.allocator()->allocate();
189*c217d954SCole Faust }
190*c217d954SCole Faust
191*c217d954SCole Faust if(!_impl->permuted_weights.is_used())
192*c217d954SCole Faust {
193*c217d954SCole Faust _impl->permuted_weights.allocator()->free();
194*c217d954SCole Faust }
195*c217d954SCole Faust
196*c217d954SCole Faust _impl->is_prepared = true;
197*c217d954SCole Faust }
198*c217d954SCole Faust }
199*c217d954SCole Faust
200*c217d954SCole Faust struct NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::Impl
201*c217d954SCole Faust {
202*c217d954SCole Faust Tensor permuted_input{};
203*c217d954SCole Faust Tensor permuted_weights{};
204*c217d954SCole Faust Tensor permuted_output{};
205*c217d954SCole Faust bool is_prepared{ false };
206*c217d954SCole Faust bool is_nchw{ false };
207*c217d954SCole Faust bool is_activationlayer_enabled{ false };
208*c217d954SCole Faust const ITensor *weights{ nullptr };
209*c217d954SCole Faust const ITensor *biases{ nullptr };
210*c217d954SCole Faust const ITensor *src{ nullptr };
211*c217d954SCole Faust ITensor *dst{ nullptr };
212*c217d954SCole Faust std::shared_ptr<cpu::CpuDepthwiseConv2d> op{ nullptr };
213*c217d954SCole Faust };
214*c217d954SCole Faust
NEDepthwiseConvolutionLayerGeneric()215*c217d954SCole Faust NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::NEDepthwiseConvolutionLayerGeneric()
216*c217d954SCole Faust : _impl(std::make_unique<Impl>())
217*c217d954SCole Faust {
218*c217d954SCole Faust }
219*c217d954SCole Faust
configure(ITensor * input,const ITensor * weights,const ITensor * biases,ITensor * output,const PadStrideInfo & conv_info,unsigned int depth_multiplier,const ActivationLayerInfo & act_info,const Size2D & dilation)220*c217d954SCole Faust void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
221*c217d954SCole Faust unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation)
222*c217d954SCole Faust {
223*c217d954SCole Faust ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
224*c217d954SCole Faust ARM_COMPUTE_ERROR_THROW_ON(NEDepthwiseConvolutionLayer::validate(input->info(), weights->info(), (biases == nullptr) ? nullptr : biases->info(),
225*c217d954SCole Faust output->info(), conv_info, depth_multiplier, act_info, dilation));
226*c217d954SCole Faust
227*c217d954SCole Faust const ConvolutionInfo info{ conv_info, depth_multiplier, act_info, dilation };
228*c217d954SCole Faust _impl->op = std::make_unique<cpu::CpuDepthwiseConv2d>();
229*c217d954SCole Faust _impl->op->configure(input->info(), weights->info(), biases == nullptr ? nullptr : biases->info(), output->info(), info);
230*c217d954SCole Faust
231*c217d954SCole Faust _impl->src = input;
232*c217d954SCole Faust _impl->dst = output;
233*c217d954SCole Faust _impl->weights = weights;
234*c217d954SCole Faust _impl->biases = biases;
235*c217d954SCole Faust _impl->is_nchw = input->info()->data_layout() == DataLayout::NCHW;
236*c217d954SCole Faust _impl->is_prepared = !_impl->is_nchw;
237*c217d954SCole Faust
238*c217d954SCole Faust ITensor *input_to_use = input;
239*c217d954SCole Faust const ITensor *weights_to_use = weights;
240*c217d954SCole Faust ITensor *output_to_use = output;
241*c217d954SCole Faust if(_impl->is_nchw)
242*c217d954SCole Faust {
243*c217d954SCole Faust auto permute_input = std::make_unique<cpu::CpuPermute>();
244*c217d954SCole Faust auto permute_weights = std::make_unique<cpu::CpuPermute>();
245*c217d954SCole Faust
246*c217d954SCole Faust permute_input->configure(input->info(), _impl->permuted_input.info(), PermutationVector(2U, 0U, 1U));
247*c217d954SCole Faust _impl->permuted_input.info()->set_data_layout(DataLayout::NHWC);
248*c217d954SCole Faust input_to_use = &_impl->permuted_input;
249*c217d954SCole Faust
250*c217d954SCole Faust permute_weights->configure(weights->info(), _impl->permuted_weights.info(), PermutationVector(2U, 0U, 1U));
251*c217d954SCole Faust _impl->permuted_weights.info()->set_data_layout(DataLayout::NHWC);
252*c217d954SCole Faust weights_to_use = &_impl->permuted_weights;
253*c217d954SCole Faust
254*c217d954SCole Faust _impl->permuted_output.allocator()->init(output->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(TensorShape()));
255*c217d954SCole Faust output_to_use = &_impl->permuted_output;
256*c217d954SCole Faust }
257*c217d954SCole Faust
258*c217d954SCole Faust auto depthwise_conv_kernel = std::make_unique<cpu::kernels::CpuDepthwiseConv2dNativeKernel>();
259*c217d954SCole Faust depthwise_conv_kernel->configure(input_to_use->info(), weights_to_use->info(), biases == nullptr ? nullptr : biases->info(), output_to_use->info(), info);
260*c217d954SCole Faust
261*c217d954SCole Faust if(_impl->is_nchw)
262*c217d954SCole Faust {
263*c217d954SCole Faust auto permute_output = std::make_unique<cpu::CpuPermute>();
264*c217d954SCole Faust permute_output->configure(_impl->permuted_output.info(), output->info(), PermutationVector(1U, 2U, 0U));
265*c217d954SCole Faust _impl->permuted_output.info()->set_data_layout(DataLayout::NHWC);
266*c217d954SCole Faust
267*c217d954SCole Faust _impl->permuted_input.allocator()->allocate();
268*c217d954SCole Faust _impl->permuted_weights.allocator()->allocate();
269*c217d954SCole Faust _impl->permuted_output.allocator()->allocate();
270*c217d954SCole Faust }
271*c217d954SCole Faust }
272*c217d954SCole Faust
validate(const ITensorInfo * input,const ITensorInfo * weights,const ITensorInfo * biases,const ITensorInfo * output,const PadStrideInfo & conv_info,unsigned int depth_multiplier,const ActivationLayerInfo & act_info,const Size2D & dilation)273*c217d954SCole Faust Status NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output,
274*c217d954SCole Faust const PadStrideInfo &conv_info,
275*c217d954SCole Faust unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation)
276*c217d954SCole Faust {
277*c217d954SCole Faust ConvolutionInfo info{ conv_info, depth_multiplier, act_info, dilation };
278*c217d954SCole Faust return cpu::CpuDepthwiseConv2d::validate(input, weights, biases, output, info);
279*c217d954SCole Faust }
280*c217d954SCole Faust
run()281*c217d954SCole Faust void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::run()
282*c217d954SCole Faust {
283*c217d954SCole Faust ITensorPack pack;
284*c217d954SCole Faust pack.add_tensor(TensorType::ACL_SRC_0, _impl->src);
285*c217d954SCole Faust pack.add_tensor(TensorType::ACL_SRC_1, _impl->weights);
286*c217d954SCole Faust pack.add_tensor(TensorType::ACL_SRC_2, _impl->biases);
287*c217d954SCole Faust pack.add_tensor(TensorType::ACL_INT_0, &_impl->permuted_input);
288*c217d954SCole Faust pack.add_tensor(TensorType::ACL_INT_1, &_impl->permuted_weights);
289*c217d954SCole Faust pack.add_tensor(TensorType::ACL_INT_2, &_impl->permuted_output);
290*c217d954SCole Faust pack.add_tensor(TensorType::ACL_DST_0, _impl->dst);
291*c217d954SCole Faust
292*c217d954SCole Faust _impl->op->run(pack);
293*c217d954SCole Faust }
294*c217d954SCole Faust
NEDepthwiseConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager)295*c217d954SCole Faust NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager)
296*c217d954SCole Faust : _memory_group(std::move(memory_manager)), _impl(std::make_unique<Impl>())
297*c217d954SCole Faust {
298*c217d954SCole Faust }
299*c217d954SCole Faust
300*c217d954SCole Faust #ifndef DOXYGEN_SKIP_THIS
301*c217d954SCole Faust struct NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayer::Impl
302*c217d954SCole Faust {
303*c217d954SCole Faust DepthwiseConvolutionFunction depth_conv_func{ DepthwiseConvolutionFunction::OPTIMIZED };
304*c217d954SCole Faust NEDepthwiseConvolutionLayerOptimizedInternal func_optimized{ nullptr };
305*c217d954SCole Faust NEDepthwiseConvolutionLayerGeneric func_generic{};
306*c217d954SCole Faust std::shared_ptr<cpu::CpuDepthwiseConv2d> op{ nullptr };
307*c217d954SCole Faust };
308*c217d954SCole Faust #endif // DOXYGEN_SKIP_THIS
309*c217d954SCole Faust
configure(ITensor * input,const ITensor * weights,const ITensor * biases,ITensor * output,const PadStrideInfo & conv_info,unsigned int depth_multiplier,const ActivationLayerInfo & act_info,const Size2D & dilation)310*c217d954SCole Faust void NEDepthwiseConvolutionLayer::configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier,
311*c217d954SCole Faust const ActivationLayerInfo &act_info, const Size2D &dilation)
312*c217d954SCole Faust {
313*c217d954SCole Faust ARM_COMPUTE_LOG_PARAMS(input, weights, output, conv_info, depth_multiplier, biases, act_info, dilation);
314*c217d954SCole Faust
315*c217d954SCole Faust const ConvolutionInfo info{ conv_info, depth_multiplier, act_info, dilation };
316*c217d954SCole Faust _impl->op = std::make_shared<cpu::CpuDepthwiseConv2d>();
317*c217d954SCole Faust _impl->depth_conv_func = _impl->op->get_depthwiseconvolution_function(input->info(), weights->info(), (biases != nullptr) ? biases->info() : nullptr, output->info(),
318*c217d954SCole Faust info);
319*c217d954SCole Faust switch(_impl->depth_conv_func)
320*c217d954SCole Faust {
321*c217d954SCole Faust case DepthwiseConvolutionFunction::OPTIMIZED:
322*c217d954SCole Faust _impl->func_optimized.configure(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
323*c217d954SCole Faust break;
324*c217d954SCole Faust case DepthwiseConvolutionFunction::GENERIC:
325*c217d954SCole Faust _impl->func_generic.configure(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
326*c217d954SCole Faust break;
327*c217d954SCole Faust default:
328*c217d954SCole Faust ARM_COMPUTE_ERROR("Unsupported DepthwiseConvolutionFunction");
329*c217d954SCole Faust }
330*c217d954SCole Faust }
331*c217d954SCole Faust
validate(const ITensorInfo * input,const ITensorInfo * weights,const ITensorInfo * biases,const ITensorInfo * output,const PadStrideInfo & conv_info,unsigned int depth_multiplier,const ActivationLayerInfo & act_info,const Size2D & dilation)332*c217d954SCole Faust Status NEDepthwiseConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
333*c217d954SCole Faust unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation)
334*c217d954SCole Faust {
335*c217d954SCole Faust ConvolutionInfo info{ conv_info, depth_multiplier, act_info, dilation };
336*c217d954SCole Faust return cpu::CpuDepthwiseConv2d::validate(input, weights, biases, output, info);
337*c217d954SCole Faust }
338*c217d954SCole Faust
run()339*c217d954SCole Faust void NEDepthwiseConvolutionLayer::run()
340*c217d954SCole Faust {
341*c217d954SCole Faust switch(_impl->depth_conv_func)
342*c217d954SCole Faust {
343*c217d954SCole Faust case DepthwiseConvolutionFunction::OPTIMIZED:
344*c217d954SCole Faust _impl->func_optimized.run();
345*c217d954SCole Faust break;
346*c217d954SCole Faust case DepthwiseConvolutionFunction::GENERIC:
347*c217d954SCole Faust _impl->func_generic.run();
348*c217d954SCole Faust break;
349*c217d954SCole Faust default:
350*c217d954SCole Faust ARM_COMPUTE_ERROR("DepthwiseConvolutionFunction not properly configured");
351*c217d954SCole Faust }
352*c217d954SCole Faust }
353*c217d954SCole Faust
prepare()354*c217d954SCole Faust void NEDepthwiseConvolutionLayer::prepare()
355*c217d954SCole Faust {
356*c217d954SCole Faust switch(_impl->depth_conv_func)
357*c217d954SCole Faust {
358*c217d954SCole Faust case DepthwiseConvolutionFunction::OPTIMIZED:
359*c217d954SCole Faust _impl->func_optimized.prepare();
360*c217d954SCole Faust break;
361*c217d954SCole Faust case DepthwiseConvolutionFunction::GENERIC:
362*c217d954SCole Faust _impl->func_generic.prepare();
363*c217d954SCole Faust break;
364*c217d954SCole Faust default:
365*c217d954SCole Faust ARM_COMPUTE_ERROR("DepthwiseConvolutionFunction not properly configured");
366*c217d954SCole Faust }
367*c217d954SCole Faust }
368*c217d954SCole Faust } // namespace arm_compute
369