xref: /aosp_15_r20/external/ComputeLibrary/src/cpu/operators/CpuDepthwiseConv2d.cpp (revision c217d954acce2dbc11938adb493fc0abd69584f3)
1 /*
2  * Copyright (c) 2021-2022 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #include "src/cpu/operators/CpuDepthwiseConv2d.h"
25 
26 #include "arm_compute/core/TensorInfo.h"
27 #include "arm_compute/core/Validate.h"
28 #include "arm_compute/core/utils/misc/InfoHelpers.h"
29 #include "arm_compute/core/utils/misc/ShapeCalculator.h"
30 #include "arm_compute/runtime/NEON/NEScheduler.h"
31 #include "src/common/utils/Log.h"
32 #include "src/cpu/kernels/CpuDepthwiseConv2dNativeKernel.h"
33 
34 namespace arm_compute
35 {
36 namespace cpu
37 {
38 namespace
39 {
validate_arguments_optimized(const ITensorInfo * src,const ITensorInfo * weights,const ITensorInfo * biases,const ITensorInfo * dst,const ConvolutionInfo & info)40 Status validate_arguments_optimized(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const ConvolutionInfo &info)
41 {
42     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, weights, dst);
43     ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32);
44     if(!is_data_type_quantized_per_channel(weights->data_type()))
45     {
46         ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, weights);
47     }
48     ARM_COMPUTE_RETURN_ERROR_ON(src->data_layout() == DataLayout::UNKNOWN);
49     ARM_COMPUTE_RETURN_ERROR_ON(info.dilation.x() < 1 || info.dilation.y() < 1);
50     const size_t idx_w = get_data_layout_dimension_index(src->data_layout(), DataLayoutDimension::WIDTH);
51     const size_t idx_h = get_data_layout_dimension_index(src->data_layout(), DataLayoutDimension::HEIGHT);
52     ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_w) + (weights->dimension(idx_w) - 1) * (info.dilation.x() - 1) > src->dimension(idx_w) + info.pad_stride_info.pad_left() +
53                                 info.pad_stride_info.pad_right());
54     ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_h) + (weights->dimension(idx_h) - 1) * (info.dilation.y() - 1) > src->dimension(idx_h) + info.pad_stride_info.pad_top() +
55                                 info.pad_stride_info.pad_bottom());
56 
57     if(biases != nullptr)
58     {
59         const unsigned int channel_idx = get_data_layout_dimension_index(src->data_layout(), DataLayoutDimension::CHANNEL);
60         ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() > 1);
61         ARM_COMPUTE_RETURN_ERROR_ON(biases->dimension(0) != weights->dimension(channel_idx));
62     }
63 
64     ARM_COMPUTE_RETURN_ON_ERROR(CpuDepthwiseConv2dAssemblyDispatch::validate(src, weights, biases, dst, info));
65 
66     // Validate Activation Layer
67     if(info.act_info.enabled() && !CpuDepthwiseConv2dAssemblyDispatch::is_activation_supported(info.act_info))
68     {
69         ARM_COMPUTE_RETURN_ON_ERROR(CpuActivation::validate(dst, nullptr, info.act_info));
70     }
71     return Status{};
72 }
73 } // namespace
74 
configure(ITensorInfo * src,const ITensorInfo * weights,const ITensorInfo * biases,ITensorInfo * dst,const ConvolutionInfo & info)75 void CpuDepthwiseConv2d::CpuDepthwiseConv2dOptimizedInternal::configure(ITensorInfo           *src,
76                                                                         const ITensorInfo     *weights,
77                                                                         const ITensorInfo     *biases,
78                                                                         ITensorInfo           *dst,
79                                                                         const ConvolutionInfo &info)
80 {
81     ARM_COMPUTE_ERROR_ON_NULLPTR(src, weights, dst);
82     // Perform validation step
83     ARM_COMPUTE_ERROR_THROW_ON(CpuDepthwiseConv2dOptimizedInternal::validate(src, weights, (biases == nullptr) ? nullptr : biases,
84                                                                              dst, info));
85 
86     _is_quantized = is_data_type_quantized_asymmetric(src->data_type());
87     _has_bias     = biases != nullptr;
88     _is_nchw      = src->data_layout() == DataLayout::NCHW;
89     _permute      = _is_nchw;
90     _is_prepared  = false;
91     _are_weights_const = weights->are_values_constant();
92 
93     // Configure pipeline
94     _is_activationlayer_enabled = info.act_info.enabled() && !CpuDepthwiseConv2dAssemblyDispatch::is_activation_supported(info.act_info);
95 
96     _dwc_optimized_func = std::make_unique<CpuDepthwiseConv2dAssemblyDispatch>();
97     if(_is_nchw)
98     {
99         _permute_input   = std::make_unique<cpu::CpuPermute>();
100         _permute_weights = std::make_unique<cpu::CpuPermute>();
101         _permute_output  = std::make_unique<cpu::CpuPermute>();
102 
103         auto input_perm   = std::make_unique<TensorInfo>();
104         auto weights_perm = std::make_unique<TensorInfo>();
105         auto output_perm  = std::make_unique<TensorInfo>();
106 
107         // Configure the function to transform the input tensor from NCHW -> NHWC
108         _permute_input->configure(src, input_perm.get(), PermutationVector(2U, 0U, 1U));
109         input_perm->set_data_layout(DataLayout::NHWC);
110 
111         // Configure the function to transform the weights tensor from IHW -> HWI
112         _permute_weights->configure(weights, weights_perm.get(), PermutationVector(2U, 0U, 1U));
113         weights_perm->set_data_layout(DataLayout::NHWC);
114 
115         output_perm->set_data_layout(DataLayout::NHWC);
116         output_perm->set_quantization_info(dst->quantization_info());
117 
118         // Configure optimized depthwise
119         _dwc_optimized_func->configure(input_perm.get(), weights_perm.get(), biases, output_perm.get(), info);
120 
121         // Configure the function to transform the convoluted output to ACL's native ordering format NCHW
122         output_perm->set_data_layout(DataLayout::NHWC);
123         _permute_output->configure(output_perm.get(), dst, PermutationVector(1U, 2U, 0U));
124     }
125     else
126     {
127         _dwc_optimized_func->configure(src, weights, biases, dst, info);
128     }
129 
130     // Configure activation
131     if(_is_activationlayer_enabled)
132     {
133         _activationlayer_function = std::make_unique<cpu::CpuActivation>();
134         _activationlayer_function->configure(dst, nullptr, info.act_info);
135     }
136 }
137 
validate(const ITensorInfo * src,const ITensorInfo * weights,const ITensorInfo * biases,const ITensorInfo * dst,const ConvolutionInfo & info)138 Status CpuDepthwiseConv2d::CpuDepthwiseConv2dOptimizedInternal::validate(const ITensorInfo     *src,
139                                                                          const ITensorInfo     *weights,
140                                                                          const ITensorInfo     *biases,
141                                                                          const ITensorInfo     *dst,
142                                                                          const ConvolutionInfo &info)
143 {
144     return validate_arguments_optimized(src, weights, biases, dst, info);
145 }
146 
run(ITensorPack & tensors)147 void CpuDepthwiseConv2d::CpuDepthwiseConv2dOptimizedInternal::run(ITensorPack &tensors)
148 {
149     ARM_COMPUTE_ERROR_ON_MSG(tensors.empty(), "No inputs provided");
150     prepare(tensors);
151 
152     auto bias           = tensors.get_const_tensor(TensorType::ACL_SRC_2);
153     auto dst            = tensors.get_tensor(TensorType::ACL_DST_0);
154     auto workspace      = tensors.get_tensor(TensorType::ACL_INT_3);
155     auto packed_weights = tensors.get_tensor(TensorType::ACL_INT_4);
156 
157     // Permute input
158     if(_permute)
159     {
160         ITensorPack pack;
161         auto        src      = tensors.get_const_tensor(TensorType::ACL_SRC_0);
162         auto        src_perm = tensors.get_tensor(TensorType::ACL_INT_0);
163         pack.add_tensor(TensorType::ACL_SRC, src);
164         pack.add_tensor(TensorType::ACL_DST, src_perm);
165         _permute_input->run(pack);
166     }
167 
168     // Run assembly function
169     if(_is_nchw)
170     {
171         auto src_perm     = tensors.get_tensor(TensorType::ACL_INT_0);
172         auto weights_perm = tensors.get_tensor(TensorType::ACL_INT_1);
173         auto dst_perm     = tensors.get_tensor(TensorType::ACL_INT_2);
174 
175         ITensorPack pack;
176         pack.add_tensor(TensorType::ACL_SRC_0, src_perm);
177         pack.add_tensor(TensorType::ACL_SRC_1, weights_perm);
178         pack.add_tensor(TensorType::ACL_SRC_2, bias);
179         pack.add_tensor(TensorType::ACL_INT_0, workspace);
180         pack.add_tensor(TensorType::ACL_INT_1, packed_weights);
181         pack.add_tensor(TensorType::ACL_DST, dst_perm);
182         _dwc_optimized_func->run(pack);
183     }
184     else
185     {
186         auto src     = tensors.get_tensor(TensorType::ACL_SRC_0);
187         auto weights = tensors.get_tensor(TensorType::ACL_SRC_1);
188         auto dst     = tensors.get_tensor(TensorType::ACL_DST);
189 
190         ITensorPack pack;
191         pack.add_tensor(TensorType::ACL_SRC_0, src);
192         pack.add_tensor(TensorType::ACL_SRC_1, weights);
193         pack.add_tensor(TensorType::ACL_SRC_2, bias);
194         pack.add_tensor(TensorType::ACL_INT_0, workspace);
195         pack.add_tensor(TensorType::ACL_INT_1, packed_weights);
196         pack.add_tensor(TensorType::ACL_DST, dst);
197         _dwc_optimized_func->run(pack);
198     }
199 
200     // Permute output
201     if(_is_nchw)
202     {
203         ITensorPack pack;
204         auto        dst_perm = tensors.get_tensor(TensorType::ACL_INT_2);
205         pack.add_tensor(TensorType::ACL_SRC, dst_perm);
206         pack.add_tensor(TensorType::ACL_DST, dst);
207         _permute_output->run(pack);
208     }
209 
210     // Run activation
211     if(_is_activationlayer_enabled)
212     {
213         ITensorPack pack;
214         pack.add_tensor(TensorType::ACL_SRC, dst);
215         pack.add_tensor(TensorType::ACL_DST, dst);
216         _activationlayer_function->run(pack);
217     }
218 }
219 
prepare(ITensorPack & tensors)220 void CpuDepthwiseConv2d::CpuDepthwiseConv2dOptimizedInternal::prepare(ITensorPack &tensors)
221 {
222     // if weights are not constant then we need to repack so that weights
223     // can be updated in-place
224     if(!_are_weights_const)
225     {
226         auto weights        = tensors.get_const_tensor(TensorType::ACL_SRC_1);
227         auto bias           = tensors.get_const_tensor(TensorType::ACL_SRC_2);
228         auto packed_weights = tensors.get_tensor(TensorType::ACL_INT_4);
229 
230         ITensorPack pack_opt;
231         pack_opt.add_tensor(TensorType::ACL_SRC_1, weights);
232         pack_opt.add_tensor(TensorType::ACL_SRC_2, bias);
233         pack_opt.add_tensor(TensorType::ACL_INT_1, packed_weights);
234 
235         // Prepare optimized function
236         _dwc_optimized_func->prepare(pack_opt);
237 
238         return;
239     }
240 
241     if(!_is_prepared)
242     {
243         auto weights        = tensors.get_const_tensor(TensorType::ACL_SRC_1);
244         auto bias           = tensors.get_const_tensor(TensorType::ACL_SRC_2);
245         auto packed_weights = tensors.get_tensor(TensorType::ACL_INT_4);
246 
247         // Permute weights
248         if(_permute)
249         {
250             auto permuted_weights = tensors.get_tensor(TensorType::ACL_INT_1);
251 
252             ITensorPack pack;
253             pack.add_tensor(TensorType::ACL_SRC, weights);
254             pack.add_tensor(TensorType::ACL_DST, permuted_weights);
255             _permute_weights->run(pack);
256 
257             weights->mark_as_unused();
258 
259             ITensorPack pack_opt;
260             pack_opt.add_const_tensor(TensorType::ACL_SRC_1, permuted_weights);
261             pack_opt.add_tensor(TensorType::ACL_SRC_2, bias);
262             pack_opt.add_tensor(TensorType::ACL_INT_1, packed_weights);
263 
264             // Prepare optimized function
265             _dwc_optimized_func->prepare(pack_opt);
266         }
267         else
268         {
269             ITensorPack pack_opt;
270             pack_opt.add_tensor(TensorType::ACL_SRC_1, weights);
271             pack_opt.add_tensor(TensorType::ACL_SRC_2, bias);
272             pack_opt.add_tensor(TensorType::ACL_INT_1, packed_weights);
273 
274             // Prepare optimized function
275             _dwc_optimized_func->prepare(pack_opt);
276         }
277 
278         _is_prepared = true;
279     }
280 }
281 
configure(ITensorInfo * src,const ITensorInfo * weights,const ITensorInfo * biases,ITensorInfo * dst,const ConvolutionInfo & info)282 void CpuDepthwiseConv2d::CpuDepthwiseConv2dGeneric::configure(ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *dst, const ConvolutionInfo &info)
283 {
284     ARM_COMPUTE_ERROR_ON_NULLPTR(src, weights, dst);
285     ARM_COMPUTE_ERROR_THROW_ON(CpuDepthwiseConv2d::validate(src, weights, (biases == nullptr) ? nullptr : biases,
286                                                             dst, info));
287 
288     _is_nchw     = src->data_layout() == DataLayout::NCHW;
289     _is_prepared = !_is_nchw;
290 
291     ITensorInfo       *input_to_use   = src;
292     const ITensorInfo *weights_to_use = weights;
293     ITensorInfo       *output_to_use  = dst;
294 
295     auto input_perm   = std::make_unique<TensorInfo>();
296     auto weights_perm = std::make_unique<TensorInfo>();
297     auto output_perm  = std::make_unique<TensorInfo>(dst->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(TensorShape()));
298 
299     if(_is_nchw)
300     {
301         _permute_input   = std::make_unique<cpu::CpuPermute>();
302         _permute_weights = std::make_unique<cpu::CpuPermute>();
303 
304         _permute_input->configure(src, input_perm.get(), PermutationVector(2U, 0U, 1U));
305         input_perm->set_data_layout(DataLayout::NHWC);
306         input_to_use = input_perm.get();
307 
308         _permute_weights->configure(weights, weights_perm.get(), PermutationVector(2U, 0U, 1U));
309         weights_perm->set_data_layout(DataLayout::NHWC);
310         weights_to_use = weights_perm.get();
311 
312         output_to_use = output_perm.get();
313     }
314 
315     _depthwise_conv_kernel = std::make_unique<cpu::kernels::CpuDepthwiseConv2dNativeKernel>();
316     _depthwise_conv_kernel->configure(input_to_use, weights_to_use, biases, output_to_use, info);
317 
318     if(_is_nchw)
319     {
320         _permute_output = std::make_unique<cpu::CpuPermute>();
321         _permute_output->configure(output_perm.get(), dst, PermutationVector(1U, 2U, 0U));
322         output_perm->set_data_layout(DataLayout::NHWC);
323     }
324 
325     //Configure Activation Layer
326     _is_activationlayer_enabled = info.act_info.enabled();
327     if(_is_activationlayer_enabled)
328     {
329         _activationlayer_function = std::make_unique<cpu::CpuActivation>();
330         _activationlayer_function->configure(dst, nullptr, info.act_info);
331     }
332 }
333 
validate(const ITensorInfo * src,const ITensorInfo * weights,const ITensorInfo * biases,const ITensorInfo * dst,const ConvolutionInfo & info)334 Status CpuDepthwiseConv2d::CpuDepthwiseConv2dGeneric::validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst,
335                                                                const ConvolutionInfo &info)
336 {
337     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, weights, dst);
338     if(src->data_layout() == DataLayout::NCHW)
339     {
340         TensorShape permuted_input_shape   = src->tensor_shape();
341         TensorShape permuted_weights_shape = weights->tensor_shape();
342         TensorShape permuted_output_shape  = misc::shape_calculator::compute_depthwise_convolution_shape(*src, *weights, info);
343         permute(permuted_input_shape, PermutationVector(2U, 0U, 1U));
344         permute(permuted_weights_shape, PermutationVector(2U, 0U, 1U));
345         permute(permuted_output_shape, PermutationVector(2U, 0U, 1U));
346 
347         const TensorInfo permuted_input   = TensorInfo(src->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(permuted_input_shape).set_data_layout(DataLayout::NHWC));
348         const TensorInfo permuted_weights = TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(permuted_weights_shape).set_data_layout(DataLayout::NHWC));
349         const TensorInfo permuted_output  = TensorInfo(dst->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(permuted_output_shape).set_data_layout(DataLayout::NCHW));
350 
351         ARM_COMPUTE_RETURN_ON_ERROR(CpuPermute::validate(src, &permuted_input, PermutationVector(2U, 0U, 1U)));
352         ARM_COMPUTE_RETURN_ON_ERROR(CpuPermute::validate(weights, &permuted_weights, PermutationVector(2U, 0U, 1U)));
353         ARM_COMPUTE_RETURN_ON_ERROR(CpuPermute::validate(&permuted_output, dst, PermutationVector(1U, 2U, 0U)));
354 
355         ARM_COMPUTE_RETURN_ON_ERROR(cpu::kernels::CpuDepthwiseConv2dNativeKernel::validate(&permuted_input, &permuted_weights, biases, &permuted_output, info));
356     }
357     else
358     {
359         ARM_COMPUTE_RETURN_ON_ERROR(cpu::kernels::CpuDepthwiseConv2dNativeKernel::validate(src, weights, biases, dst, info));
360     }
361 
362     // Validate Activation Layer
363     if(info.act_info.enabled() && !CpuDepthwiseConv2dAssemblyDispatch::is_activation_supported(info.act_info))
364     {
365         ARM_COMPUTE_RETURN_ON_ERROR(CpuActivation::validate(dst, nullptr, info.act_info));
366     }
367 
368     return Status{};
369 }
370 
run(ITensorPack & tensors)371 void CpuDepthwiseConv2d::CpuDepthwiseConv2dGeneric::run(ITensorPack &tensors)
372 {
373     auto src     = tensors.get_const_tensor(TensorType::ACL_SRC_0);
374     auto weights = tensors.get_const_tensor(TensorType::ACL_SRC_1);
375     auto biases  = tensors.get_const_tensor(TensorType::ACL_SRC_2);
376     auto dst     = tensors.get_tensor(TensorType::ACL_DST_0);
377 
378     if(_is_nchw)
379     {
380         prepare(tensors);
381         auto src_perm     = tensors.get_tensor(TensorType::ACL_INT_0);
382         auto weights_perm = tensors.get_tensor(TensorType::ACL_INT_1);
383         auto dst_perm     = tensors.get_tensor(TensorType::ACL_INT_2);
384 
385         ITensorPack pack;
386         pack.add_tensor(TensorType::ACL_SRC, src);
387         pack.add_tensor(TensorType::ACL_DST, src_perm);
388         _permute_input->run(pack);
389 
390         ITensorPack pack_depth;
391         pack_depth.add_const_tensor(TensorType::ACL_SRC_0, src_perm);
392         pack_depth.add_const_tensor(TensorType::ACL_SRC_1, weights_perm);
393         pack_depth.add_tensor(TensorType::ACL_SRC_2, biases);
394         pack_depth.add_tensor(TensorType::ACL_DST, dst_perm);
395         NEScheduler::get().schedule_op(_depthwise_conv_kernel.get(), Window::DimY, _depthwise_conv_kernel->window(), pack_depth);
396     }
397     else
398     {
399         ITensorPack pack_depth;
400         pack_depth.add_tensor(TensorType::ACL_SRC_0, src);
401         pack_depth.add_tensor(TensorType::ACL_SRC_1, weights);
402         pack_depth.add_tensor(TensorType::ACL_SRC_2, biases);
403         pack_depth.add_tensor(TensorType::ACL_DST, dst);
404         NEScheduler::get().schedule_op(_depthwise_conv_kernel.get(), Window::DimY, _depthwise_conv_kernel->window(), pack_depth);
405     }
406 
407     if(_is_nchw)
408     {
409         ITensorPack pack;
410         auto        dst_perm = tensors.get_tensor(TensorType::ACL_INT_2);
411         pack.add_tensor(TensorType::ACL_SRC, dst_perm);
412         pack.add_tensor(TensorType::ACL_DST, dst);
413         _permute_output->run(pack);
414     }
415 
416     if(_is_activationlayer_enabled)
417     {
418         ITensorPack pack;
419         pack.add_tensor(TensorType::ACL_SRC, dst);
420         pack.add_tensor(TensorType::ACL_DST, dst);
421         _activationlayer_function->run(pack);
422     }
423 }
424 
prepare(ITensorPack & tensors)425 void CpuDepthwiseConv2d::CpuDepthwiseConv2dGeneric::prepare(ITensorPack &tensors)
426 {
427     if(!_is_prepared)
428     {
429         auto weights      = tensors.get_const_tensor(TensorType::ACL_SRC_1);
430         auto weights_perm = tensors.get_tensor(TensorType::ACL_INT_1);
431 
432         ARM_COMPUTE_ERROR_ON(!weights->is_used());
433 
434         ITensorPack pack;
435         pack.add_tensor(TensorType::ACL_SRC, weights);
436         pack.add_tensor(TensorType::ACL_DST, weights_perm);
437 
438         _permute_weights->run(pack);
439         weights->mark_as_unused();
440         _is_prepared = true;
441     }
442 }
443 
configure(ITensorInfo * src,const ITensorInfo * weights,const ITensorInfo * biases,ITensorInfo * dst,const ConvolutionInfo & info)444 void CpuDepthwiseConv2d::configure(ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *dst, const ConvolutionInfo &info)
445 {
446     ARM_COMPUTE_LOG_PARAMS(src, weights, biases, dst, info);
447 
448     _depth_conv_func = get_depthwiseconvolution_function(src, weights, (biases != nullptr) ? biases : nullptr, dst, info);
449     switch(_depth_conv_func)
450     {
451         case DepthwiseConvolutionFunction::OPTIMIZED:
452             _func_optimized.configure(src, weights, biases, dst, info);
453             break;
454         case DepthwiseConvolutionFunction::GENERIC:
455             _func_generic.configure(src, weights, biases, dst, info);
456             break;
457         default:
458             ARM_COMPUTE_ERROR("Unsupported DepthwiseConvolutionFunction");
459     }
460 }
461 
validate(const ITensorInfo * src,const ITensorInfo * weights,const ITensorInfo * biases,const ITensorInfo * dst,const ConvolutionInfo & info)462 Status CpuDepthwiseConv2d::validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const ConvolutionInfo &info)
463 {
464     DepthwiseConvolutionFunction depth_conv_func = get_depthwiseconvolution_function(src, weights, biases, dst, info);
465     switch(depth_conv_func)
466     {
467         case DepthwiseConvolutionFunction::OPTIMIZED:
468             return CpuDepthwiseConv2dOptimizedInternal::validate(src, weights, biases, dst, info);
469             break;
470         case DepthwiseConvolutionFunction::GENERIC:
471             return CpuDepthwiseConv2dGeneric::validate(src, weights, biases, dst, info);
472             break;
473         default:
474             ARM_COMPUTE_ERROR("Unsupported DepthwiseConvolutionFunction");
475     }
476 }
477 
get_depthwiseconvolution_function(const ITensorInfo * src,const ITensorInfo * weights,const ITensorInfo * biases,const ITensorInfo * dst,const ConvolutionInfo & info)478 DepthwiseConvolutionFunction CpuDepthwiseConv2d::get_depthwiseconvolution_function(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst,
479                                                                                    const ConvolutionInfo &info)
480 {
481     if(bool(CpuDepthwiseConv2dOptimizedInternal::validate(src, weights, biases, dst, info)))
482     {
483         return DepthwiseConvolutionFunction::OPTIMIZED;
484     }
485     else
486     {
487         return DepthwiseConvolutionFunction::GENERIC;
488     }
489 }
490 
run(ITensorPack & tensors)491 void CpuDepthwiseConv2d::run(ITensorPack &tensors)
492 {
493     switch(_depth_conv_func)
494     {
495         case DepthwiseConvolutionFunction::OPTIMIZED:
496             _func_optimized.run(tensors);
497             break;
498         case DepthwiseConvolutionFunction::GENERIC:
499             _func_generic.run(tensors);
500             break;
501         default:
502             ARM_COMPUTE_ERROR("DepthwiseConvolutionFunction not properly configured");
503     }
504 }
505 
prepare(ITensorPack & tensors)506 void CpuDepthwiseConv2d::prepare(ITensorPack &tensors)
507 {
508     switch(_depth_conv_func)
509     {
510         case DepthwiseConvolutionFunction::OPTIMIZED:
511             _func_optimized.prepare(tensors);
512             break;
513         case DepthwiseConvolutionFunction::GENERIC:
514             _func_generic.prepare(tensors);
515             break;
516         default:
517             ARM_COMPUTE_ERROR("DepthwiseConvolutionFunction not properly configured");
518     }
519 }
520 } // namespace cpu
521 } // namespace arm_compute
522