xref: /aosp_15_r20/external/ComputeLibrary/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp (revision c217d954acce2dbc11938adb493fc0abd69584f3)
1*c217d954SCole Faust /*
2*c217d954SCole Faust  * Copyright (c) 2017-2021 Arm Limited.
3*c217d954SCole Faust  *
4*c217d954SCole Faust  * SPDX-License-Identifier: MIT
5*c217d954SCole Faust  *
6*c217d954SCole Faust  * Permission is hereby granted, free of charge, to any person obtaining a copy
7*c217d954SCole Faust  * of this software and associated documentation files (the "Software"), to
8*c217d954SCole Faust  * deal in the Software without restriction, including without limitation the
9*c217d954SCole Faust  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10*c217d954SCole Faust  * sell copies of the Software, and to permit persons to whom the Software is
11*c217d954SCole Faust  * furnished to do so, subject to the following conditions:
12*c217d954SCole Faust  *
13*c217d954SCole Faust  * The above copyright notice and this permission notice shall be included in all
14*c217d954SCole Faust  * copies or substantial portions of the Software.
15*c217d954SCole Faust  *
16*c217d954SCole Faust  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17*c217d954SCole Faust  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18*c217d954SCole Faust  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19*c217d954SCole Faust  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20*c217d954SCole Faust  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21*c217d954SCole Faust  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22*c217d954SCole Faust  * SOFTWARE.
23*c217d954SCole Faust  */
24*c217d954SCole Faust #include "arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h"
25*c217d954SCole Faust 
26*c217d954SCole Faust #include "arm_compute/core/utils/misc/InfoHelpers.h"
27*c217d954SCole Faust #include "arm_compute/core/utils/misc/ShapeCalculator.h"
28*c217d954SCole Faust #include "arm_compute/core/utils/quantization/AsymmHelpers.h"
29*c217d954SCole Faust #include "arm_compute/runtime/NEON/NEScheduler.h"
30*c217d954SCole Faust #include "src/common/utils/Log.h"
31*c217d954SCole Faust #include "src/cpu/operators/CpuDepthwiseConv2d.h"
32*c217d954SCole Faust 
33*c217d954SCole Faust using namespace arm_compute::misc;
34*c217d954SCole Faust using namespace arm_compute::misc::shape_calculator;
35*c217d954SCole Faust 
36*c217d954SCole Faust namespace arm_compute
37*c217d954SCole Faust {
38*c217d954SCole Faust NEDepthwiseConvolutionLayer::~NEDepthwiseConvolutionLayer() = default;
39*c217d954SCole Faust 
40*c217d954SCole Faust struct NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::Impl
41*c217d954SCole Faust {
42*c217d954SCole Faust     ITensor       *src{ nullptr }; // SRC_0
43*c217d954SCole Faust     ITensor       *dst{ nullptr }; // DST_0
44*c217d954SCole Faust     const ITensor *weights
45*c217d954SCole Faust     {
46*c217d954SCole Faust         nullptr
47*c217d954SCole Faust     }; // SRC_1
48*c217d954SCole Faust     const ITensor *biases
49*c217d954SCole Faust     {
50*c217d954SCole Faust         nullptr
51*c217d954SCole Faust     };                                                           // SRC_2
52*c217d954SCole Faust     Tensor                                   permuted_input{};   // INT_0
53*c217d954SCole Faust     Tensor                                   permuted_weights{}; // INT_1
54*c217d954SCole Faust     Tensor                                   permuted_output{};  // INT_2
55*c217d954SCole Faust     Tensor                                   workspace{};        // INT_3
56*c217d954SCole Faust     Tensor                                   packed_weights{};   // INT_4
57*c217d954SCole Faust     std::shared_ptr<cpu::CpuDepthwiseConv2d> op{ nullptr };
58*c217d954SCole Faust     bool                                     is_prepared{ false };
59*c217d954SCole Faust     bool                                     permute{ false };
60*c217d954SCole Faust };
61*c217d954SCole Faust 
NEDepthwiseConvolutionLayerOptimizedInternal(std::shared_ptr<IMemoryManager> memory_manager)62*c217d954SCole Faust NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::NEDepthwiseConvolutionLayerOptimizedInternal(std::shared_ptr<IMemoryManager> memory_manager)
63*c217d954SCole Faust     : _memory_group(memory_manager), _impl(std::make_unique<Impl>())
64*c217d954SCole Faust {
65*c217d954SCole Faust }
66*c217d954SCole Faust 
configure(ITensor * input,const ITensor * weights,const ITensor * biases,ITensor * output,const PadStrideInfo & conv_info,unsigned int depth_multiplier,const ActivationLayerInfo & act_info,const Size2D & dilation)67*c217d954SCole Faust void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::configure(ITensor       *input,
68*c217d954SCole Faust                                                                                           const ITensor *weights,
69*c217d954SCole Faust                                                                                           const ITensor *biases,
70*c217d954SCole Faust                                                                                           ITensor *output, const PadStrideInfo &conv_info,
71*c217d954SCole Faust                                                                                           unsigned int               depth_multiplier,
72*c217d954SCole Faust                                                                                           const ActivationLayerInfo &act_info,
73*c217d954SCole Faust                                                                                           const Size2D              &dilation)
74*c217d954SCole Faust {
75*c217d954SCole Faust     ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
76*c217d954SCole Faust 
77*c217d954SCole Faust     bool is_nhwc   = input->info()->data_layout() == DataLayout::NCHW;
78*c217d954SCole Faust     _impl->src     = input;
79*c217d954SCole Faust     _impl->weights = weights;
80*c217d954SCole Faust     _impl->biases  = biases;
81*c217d954SCole Faust     _impl->dst     = output;
82*c217d954SCole Faust     _impl->permute = is_nhwc;
83*c217d954SCole Faust 
84*c217d954SCole Faust     _impl->op = std::make_unique<cpu::CpuDepthwiseConv2d>();
85*c217d954SCole Faust     ConvolutionInfo info{ conv_info, depth_multiplier, act_info, dilation };
86*c217d954SCole Faust     _impl->op->configure(_impl->src->info(), _impl->weights->info(), _impl->biases == nullptr ? nullptr : _impl->biases->info(),
87*c217d954SCole Faust                          _impl->dst->info(), info);
88*c217d954SCole Faust 
89*c217d954SCole Faust     // Configure pipeline
90*c217d954SCole Faust     ActivationLayerInfo act_info_to_use            = ActivationLayerInfo();
91*c217d954SCole Faust     const bool          is_relu                    = arm_compute::utils::info_helpers::is_relu(act_info);
92*c217d954SCole Faust     const bool          is_relu6                   = arm_compute::utils::info_helpers::is_relu6(act_info);
93*c217d954SCole Faust     bool                is_activationlayer_enabled = act_info.enabled() && !(is_relu || is_relu6);
94*c217d954SCole Faust 
95*c217d954SCole Faust     if(!is_activationlayer_enabled)
96*c217d954SCole Faust     {
97*c217d954SCole Faust         act_info_to_use = act_info;
98*c217d954SCole Faust     }
99*c217d954SCole Faust     info = ConvolutionInfo{ conv_info, depth_multiplier, act_info_to_use, dilation };
100*c217d954SCole Faust 
101*c217d954SCole Faust     auto dwc_optimized_func = std::make_unique<cpu::CpuDepthwiseConv2dAssemblyDispatch>();
102*c217d954SCole Faust 
103*c217d954SCole Faust     if(is_nhwc)
104*c217d954SCole Faust     {
105*c217d954SCole Faust         auto permute_input   = std::make_unique<cpu::CpuPermute>();
106*c217d954SCole Faust         auto permute_weights = std::make_unique<cpu::CpuPermute>();
107*c217d954SCole Faust         auto permute_output  = std::make_unique<cpu::CpuPermute>();
108*c217d954SCole Faust 
109*c217d954SCole Faust         _memory_group.manage(&_impl->permuted_input);
110*c217d954SCole Faust         _memory_group.manage(&_impl->permuted_weights);
111*c217d954SCole Faust         _memory_group.manage(&_impl->permuted_output);
112*c217d954SCole Faust 
113*c217d954SCole Faust         // Configure the function to transform the input tensor from NCHW -> NHWC
114*c217d954SCole Faust         permute_input->configure(input->info(), _impl->permuted_input.info(), PermutationVector(2U, 0U, 1U));
115*c217d954SCole Faust         _impl->permuted_input.info()->set_data_layout(DataLayout::NHWC);
116*c217d954SCole Faust 
117*c217d954SCole Faust         // Configure the function to transform the weights tensor from IHW -> HWI
118*c217d954SCole Faust         permute_weights->configure(weights->info(), _impl->permuted_weights.info(), PermutationVector(2U, 0U, 1U));
119*c217d954SCole Faust         _impl->permuted_weights.info()->set_data_layout(DataLayout::NHWC);
120*c217d954SCole Faust 
121*c217d954SCole Faust         _impl->permuted_output.info()->set_data_layout(DataLayout::NHWC);
122*c217d954SCole Faust         _impl->permuted_output.info()->set_quantization_info(output->info()->quantization_info());
123*c217d954SCole Faust 
124*c217d954SCole Faust         // Configure optimized depthwise
125*c217d954SCole Faust         dwc_optimized_func->configure(_impl->permuted_input.info(), _impl->permuted_weights.info(), biases == nullptr ? nullptr : biases->info(), _impl->permuted_output.info(), info);
126*c217d954SCole Faust 
127*c217d954SCole Faust         // Configure the function to transform the convoluted output to ACL's native ordering format NCHW
128*c217d954SCole Faust         _impl->permuted_output.info()->set_data_layout(DataLayout::NHWC);
129*c217d954SCole Faust         permute_output->configure(_impl->permuted_output.info(), output->info(), PermutationVector(1U, 2U, 0U));
130*c217d954SCole Faust 
131*c217d954SCole Faust         _impl->permuted_input.allocator()->allocate();
132*c217d954SCole Faust         _impl->permuted_output.allocator()->allocate();
133*c217d954SCole Faust     }
134*c217d954SCole Faust     else
135*c217d954SCole Faust     {
136*c217d954SCole Faust         dwc_optimized_func->configure(_impl->src->info(), _impl->weights->info(), biases == nullptr ? nullptr : biases->info(), _impl->dst->info(), info);
137*c217d954SCole Faust     }
138*c217d954SCole Faust 
139*c217d954SCole Faust     // Allocate memory based on the internal memory requirements
140*c217d954SCole Faust     experimental::MemoryRequirements mem_req = dwc_optimized_func->workspace();
141*c217d954SCole Faust     _impl->workspace.allocator()->init(TensorInfo(TensorShape{ mem_req[0].size + mem_req[0].alignment }, 1, DataType::S8), mem_req[0].alignment);
142*c217d954SCole Faust     _impl->packed_weights.allocator()->init(TensorInfo(TensorShape{ mem_req[1].size + mem_req[1].alignment }, 1, DataType::S8), mem_req[1].alignment);
143*c217d954SCole Faust     _memory_group.manage(&_impl->workspace);
144*c217d954SCole Faust     _memory_group.manage(&_impl->packed_weights);
145*c217d954SCole Faust     _impl->workspace.allocator()->allocate();
146*c217d954SCole Faust     _impl->packed_weights.allocator()->allocate();
147*c217d954SCole Faust }
148*c217d954SCole Faust 
validate(const ITensorInfo * input,const ITensorInfo * weights,const ITensorInfo * biases,const ITensorInfo * output,const PadStrideInfo & conv_info,unsigned int depth_multiplier,const ActivationLayerInfo & act_info,const Size2D & dilation)149*c217d954SCole Faust Status NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::validate(const ITensorInfo         *input,
150*c217d954SCole Faust                                                                                            const ITensorInfo         *weights,
151*c217d954SCole Faust                                                                                            const ITensorInfo         *biases,
152*c217d954SCole Faust                                                                                            const ITensorInfo         *output,
153*c217d954SCole Faust                                                                                            const PadStrideInfo       &conv_info,
154*c217d954SCole Faust                                                                                            unsigned int               depth_multiplier,
155*c217d954SCole Faust                                                                                            const ActivationLayerInfo &act_info,
156*c217d954SCole Faust                                                                                            const Size2D              &dilation)
157*c217d954SCole Faust {
158*c217d954SCole Faust     ConvolutionInfo info{ conv_info, depth_multiplier, act_info, dilation };
159*c217d954SCole Faust     return cpu::CpuDepthwiseConv2d::validate(input, weights, biases, output, info);
160*c217d954SCole Faust }
161*c217d954SCole Faust 
run()162*c217d954SCole Faust void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::run()
163*c217d954SCole Faust {
164*c217d954SCole Faust     prepare();
165*c217d954SCole Faust     MemoryGroupResourceScope scope_mg(_memory_group);
166*c217d954SCole Faust 
167*c217d954SCole Faust     ITensorPack pack;
168*c217d954SCole Faust     pack.add_tensor(TensorType::ACL_SRC_0, _impl->src);
169*c217d954SCole Faust     pack.add_tensor(TensorType::ACL_SRC_1, _impl->weights);
170*c217d954SCole Faust     pack.add_tensor(TensorType::ACL_SRC_2, _impl->biases);
171*c217d954SCole Faust     pack.add_tensor(TensorType::ACL_INT_0, &_impl->permuted_input);
172*c217d954SCole Faust     pack.add_tensor(TensorType::ACL_INT_1, &_impl->permuted_weights);
173*c217d954SCole Faust     pack.add_tensor(TensorType::ACL_INT_2, &_impl->permuted_output);
174*c217d954SCole Faust     pack.add_tensor(TensorType::ACL_INT_3, &_impl->workspace);
175*c217d954SCole Faust     pack.add_tensor(TensorType::ACL_INT_4, &_impl->packed_weights);
176*c217d954SCole Faust     pack.add_tensor(TensorType::ACL_DST_0, _impl->dst);
177*c217d954SCole Faust 
178*c217d954SCole Faust     _impl->op->run(pack);
179*c217d954SCole Faust }
180*c217d954SCole Faust 
prepare()181*c217d954SCole Faust void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::prepare()
182*c217d954SCole Faust {
183*c217d954SCole Faust     if(!_impl->is_prepared)
184*c217d954SCole Faust     {
185*c217d954SCole Faust         // Permute weights
186*c217d954SCole Faust         if(_impl->permute)
187*c217d954SCole Faust         {
188*c217d954SCole Faust             _impl->permuted_weights.allocator()->allocate();
189*c217d954SCole Faust         }
190*c217d954SCole Faust 
191*c217d954SCole Faust         if(!_impl->permuted_weights.is_used())
192*c217d954SCole Faust         {
193*c217d954SCole Faust             _impl->permuted_weights.allocator()->free();
194*c217d954SCole Faust         }
195*c217d954SCole Faust 
196*c217d954SCole Faust         _impl->is_prepared = true;
197*c217d954SCole Faust     }
198*c217d954SCole Faust }
199*c217d954SCole Faust 
200*c217d954SCole Faust struct NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::Impl
201*c217d954SCole Faust {
202*c217d954SCole Faust     Tensor                                   permuted_input{};
203*c217d954SCole Faust     Tensor                                   permuted_weights{};
204*c217d954SCole Faust     Tensor                                   permuted_output{};
205*c217d954SCole Faust     bool                                     is_prepared{ false };
206*c217d954SCole Faust     bool                                     is_nchw{ false };
207*c217d954SCole Faust     bool                                     is_activationlayer_enabled{ false };
208*c217d954SCole Faust     const ITensor                           *weights{ nullptr };
209*c217d954SCole Faust     const ITensor                           *biases{ nullptr };
210*c217d954SCole Faust     const ITensor                           *src{ nullptr };
211*c217d954SCole Faust     ITensor                                 *dst{ nullptr };
212*c217d954SCole Faust     std::shared_ptr<cpu::CpuDepthwiseConv2d> op{ nullptr };
213*c217d954SCole Faust };
214*c217d954SCole Faust 
NEDepthwiseConvolutionLayerGeneric()215*c217d954SCole Faust NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::NEDepthwiseConvolutionLayerGeneric()
216*c217d954SCole Faust     : _impl(std::make_unique<Impl>())
217*c217d954SCole Faust {
218*c217d954SCole Faust }
219*c217d954SCole Faust 
configure(ITensor * input,const ITensor * weights,const ITensor * biases,ITensor * output,const PadStrideInfo & conv_info,unsigned int depth_multiplier,const ActivationLayerInfo & act_info,const Size2D & dilation)220*c217d954SCole Faust void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
221*c217d954SCole Faust                                                                                 unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation)
222*c217d954SCole Faust {
223*c217d954SCole Faust     ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
224*c217d954SCole Faust     ARM_COMPUTE_ERROR_THROW_ON(NEDepthwiseConvolutionLayer::validate(input->info(), weights->info(), (biases == nullptr) ? nullptr : biases->info(),
225*c217d954SCole Faust                                                                      output->info(), conv_info, depth_multiplier, act_info, dilation));
226*c217d954SCole Faust 
227*c217d954SCole Faust     const ConvolutionInfo info{ conv_info, depth_multiplier, act_info, dilation };
228*c217d954SCole Faust     _impl->op = std::make_unique<cpu::CpuDepthwiseConv2d>();
229*c217d954SCole Faust     _impl->op->configure(input->info(), weights->info(), biases == nullptr ? nullptr : biases->info(), output->info(), info);
230*c217d954SCole Faust 
231*c217d954SCole Faust     _impl->src         = input;
232*c217d954SCole Faust     _impl->dst         = output;
233*c217d954SCole Faust     _impl->weights     = weights;
234*c217d954SCole Faust     _impl->biases      = biases;
235*c217d954SCole Faust     _impl->is_nchw     = input->info()->data_layout() == DataLayout::NCHW;
236*c217d954SCole Faust     _impl->is_prepared = !_impl->is_nchw;
237*c217d954SCole Faust 
238*c217d954SCole Faust     ITensor       *input_to_use   = input;
239*c217d954SCole Faust     const ITensor *weights_to_use = weights;
240*c217d954SCole Faust     ITensor       *output_to_use  = output;
241*c217d954SCole Faust     if(_impl->is_nchw)
242*c217d954SCole Faust     {
243*c217d954SCole Faust         auto permute_input   = std::make_unique<cpu::CpuPermute>();
244*c217d954SCole Faust         auto permute_weights = std::make_unique<cpu::CpuPermute>();
245*c217d954SCole Faust 
246*c217d954SCole Faust         permute_input->configure(input->info(), _impl->permuted_input.info(), PermutationVector(2U, 0U, 1U));
247*c217d954SCole Faust         _impl->permuted_input.info()->set_data_layout(DataLayout::NHWC);
248*c217d954SCole Faust         input_to_use = &_impl->permuted_input;
249*c217d954SCole Faust 
250*c217d954SCole Faust         permute_weights->configure(weights->info(), _impl->permuted_weights.info(), PermutationVector(2U, 0U, 1U));
251*c217d954SCole Faust         _impl->permuted_weights.info()->set_data_layout(DataLayout::NHWC);
252*c217d954SCole Faust         weights_to_use = &_impl->permuted_weights;
253*c217d954SCole Faust 
254*c217d954SCole Faust         _impl->permuted_output.allocator()->init(output->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(TensorShape()));
255*c217d954SCole Faust         output_to_use = &_impl->permuted_output;
256*c217d954SCole Faust     }
257*c217d954SCole Faust 
258*c217d954SCole Faust     auto depthwise_conv_kernel = std::make_unique<cpu::kernels::CpuDepthwiseConv2dNativeKernel>();
259*c217d954SCole Faust     depthwise_conv_kernel->configure(input_to_use->info(), weights_to_use->info(), biases == nullptr ? nullptr : biases->info(), output_to_use->info(), info);
260*c217d954SCole Faust 
261*c217d954SCole Faust     if(_impl->is_nchw)
262*c217d954SCole Faust     {
263*c217d954SCole Faust         auto permute_output = std::make_unique<cpu::CpuPermute>();
264*c217d954SCole Faust         permute_output->configure(_impl->permuted_output.info(), output->info(), PermutationVector(1U, 2U, 0U));
265*c217d954SCole Faust         _impl->permuted_output.info()->set_data_layout(DataLayout::NHWC);
266*c217d954SCole Faust 
267*c217d954SCole Faust         _impl->permuted_input.allocator()->allocate();
268*c217d954SCole Faust         _impl->permuted_weights.allocator()->allocate();
269*c217d954SCole Faust         _impl->permuted_output.allocator()->allocate();
270*c217d954SCole Faust     }
271*c217d954SCole Faust }
272*c217d954SCole Faust 
validate(const ITensorInfo * input,const ITensorInfo * weights,const ITensorInfo * biases,const ITensorInfo * output,const PadStrideInfo & conv_info,unsigned int depth_multiplier,const ActivationLayerInfo & act_info,const Size2D & dilation)273*c217d954SCole Faust Status NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output,
274*c217d954SCole Faust                                                                                  const PadStrideInfo &conv_info,
275*c217d954SCole Faust                                                                                  unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation)
276*c217d954SCole Faust {
277*c217d954SCole Faust     ConvolutionInfo info{ conv_info, depth_multiplier, act_info, dilation };
278*c217d954SCole Faust     return cpu::CpuDepthwiseConv2d::validate(input, weights, biases, output, info);
279*c217d954SCole Faust }
280*c217d954SCole Faust 
run()281*c217d954SCole Faust void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::run()
282*c217d954SCole Faust {
283*c217d954SCole Faust     ITensorPack pack;
284*c217d954SCole Faust     pack.add_tensor(TensorType::ACL_SRC_0, _impl->src);
285*c217d954SCole Faust     pack.add_tensor(TensorType::ACL_SRC_1, _impl->weights);
286*c217d954SCole Faust     pack.add_tensor(TensorType::ACL_SRC_2, _impl->biases);
287*c217d954SCole Faust     pack.add_tensor(TensorType::ACL_INT_0, &_impl->permuted_input);
288*c217d954SCole Faust     pack.add_tensor(TensorType::ACL_INT_1, &_impl->permuted_weights);
289*c217d954SCole Faust     pack.add_tensor(TensorType::ACL_INT_2, &_impl->permuted_output);
290*c217d954SCole Faust     pack.add_tensor(TensorType::ACL_DST_0, _impl->dst);
291*c217d954SCole Faust 
292*c217d954SCole Faust     _impl->op->run(pack);
293*c217d954SCole Faust }
294*c217d954SCole Faust 
NEDepthwiseConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager)295*c217d954SCole Faust NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager)
296*c217d954SCole Faust     : _memory_group(std::move(memory_manager)), _impl(std::make_unique<Impl>())
297*c217d954SCole Faust {
298*c217d954SCole Faust }
299*c217d954SCole Faust 
300*c217d954SCole Faust #ifndef DOXYGEN_SKIP_THIS
301*c217d954SCole Faust struct NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayer::Impl
302*c217d954SCole Faust {
303*c217d954SCole Faust     DepthwiseConvolutionFunction                 depth_conv_func{ DepthwiseConvolutionFunction::OPTIMIZED };
304*c217d954SCole Faust     NEDepthwiseConvolutionLayerOptimizedInternal func_optimized{ nullptr };
305*c217d954SCole Faust     NEDepthwiseConvolutionLayerGeneric           func_generic{};
306*c217d954SCole Faust     std::shared_ptr<cpu::CpuDepthwiseConv2d>     op{ nullptr };
307*c217d954SCole Faust };
308*c217d954SCole Faust #endif // DOXYGEN_SKIP_THIS
309*c217d954SCole Faust 
configure(ITensor * input,const ITensor * weights,const ITensor * biases,ITensor * output,const PadStrideInfo & conv_info,unsigned int depth_multiplier,const ActivationLayerInfo & act_info,const Size2D & dilation)310*c217d954SCole Faust void NEDepthwiseConvolutionLayer::configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier,
311*c217d954SCole Faust                                             const ActivationLayerInfo &act_info, const Size2D &dilation)
312*c217d954SCole Faust {
313*c217d954SCole Faust     ARM_COMPUTE_LOG_PARAMS(input, weights, output, conv_info, depth_multiplier, biases, act_info, dilation);
314*c217d954SCole Faust 
315*c217d954SCole Faust     const ConvolutionInfo info{ conv_info, depth_multiplier, act_info, dilation };
316*c217d954SCole Faust     _impl->op              = std::make_shared<cpu::CpuDepthwiseConv2d>();
317*c217d954SCole Faust     _impl->depth_conv_func = _impl->op->get_depthwiseconvolution_function(input->info(), weights->info(), (biases != nullptr) ? biases->info() : nullptr, output->info(),
318*c217d954SCole Faust                                                                           info);
319*c217d954SCole Faust     switch(_impl->depth_conv_func)
320*c217d954SCole Faust     {
321*c217d954SCole Faust         case DepthwiseConvolutionFunction::OPTIMIZED:
322*c217d954SCole Faust             _impl->func_optimized.configure(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
323*c217d954SCole Faust             break;
324*c217d954SCole Faust         case DepthwiseConvolutionFunction::GENERIC:
325*c217d954SCole Faust             _impl->func_generic.configure(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
326*c217d954SCole Faust             break;
327*c217d954SCole Faust         default:
328*c217d954SCole Faust             ARM_COMPUTE_ERROR("Unsupported DepthwiseConvolutionFunction");
329*c217d954SCole Faust     }
330*c217d954SCole Faust }
331*c217d954SCole Faust 
validate(const ITensorInfo * input,const ITensorInfo * weights,const ITensorInfo * biases,const ITensorInfo * output,const PadStrideInfo & conv_info,unsigned int depth_multiplier,const ActivationLayerInfo & act_info,const Size2D & dilation)332*c217d954SCole Faust Status NEDepthwiseConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
333*c217d954SCole Faust                                              unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation)
334*c217d954SCole Faust {
335*c217d954SCole Faust     ConvolutionInfo info{ conv_info, depth_multiplier, act_info, dilation };
336*c217d954SCole Faust     return cpu::CpuDepthwiseConv2d::validate(input, weights, biases, output, info);
337*c217d954SCole Faust }
338*c217d954SCole Faust 
run()339*c217d954SCole Faust void NEDepthwiseConvolutionLayer::run()
340*c217d954SCole Faust {
341*c217d954SCole Faust     switch(_impl->depth_conv_func)
342*c217d954SCole Faust     {
343*c217d954SCole Faust         case DepthwiseConvolutionFunction::OPTIMIZED:
344*c217d954SCole Faust             _impl->func_optimized.run();
345*c217d954SCole Faust             break;
346*c217d954SCole Faust         case DepthwiseConvolutionFunction::GENERIC:
347*c217d954SCole Faust             _impl->func_generic.run();
348*c217d954SCole Faust             break;
349*c217d954SCole Faust         default:
350*c217d954SCole Faust             ARM_COMPUTE_ERROR("DepthwiseConvolutionFunction not properly configured");
351*c217d954SCole Faust     }
352*c217d954SCole Faust }
353*c217d954SCole Faust 
prepare()354*c217d954SCole Faust void NEDepthwiseConvolutionLayer::prepare()
355*c217d954SCole Faust {
356*c217d954SCole Faust     switch(_impl->depth_conv_func)
357*c217d954SCole Faust     {
358*c217d954SCole Faust         case DepthwiseConvolutionFunction::OPTIMIZED:
359*c217d954SCole Faust             _impl->func_optimized.prepare();
360*c217d954SCole Faust             break;
361*c217d954SCole Faust         case DepthwiseConvolutionFunction::GENERIC:
362*c217d954SCole Faust             _impl->func_generic.prepare();
363*c217d954SCole Faust             break;
364*c217d954SCole Faust         default:
365*c217d954SCole Faust             ARM_COMPUTE_ERROR("DepthwiseConvolutionFunction not properly configured");
366*c217d954SCole Faust     }
367*c217d954SCole Faust }
368*c217d954SCole Faust } // namespace arm_compute
369