1 /*
2  * Copyright (c) 2017-2021 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifndef ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H
25 #define ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H
26 
27 #include "arm_compute/runtime/IMemoryManager.h"
28 #include "arm_compute/runtime/MemoryGroup.h"
29 #include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
30 #include "arm_compute/runtime/NEON/functions/NEPermute.h"
31 #include <memory>
32 
33 namespace arm_compute
34 {
35 // Forward declarations
36 class ITensor;
37 class NEDepthwiseConvolutionLayerNativeKernel;
38 
39 /** Function to execute a depthwise convolution.
40  */
41 class NEDepthwiseConvolutionLayer : public IFunction
42 {
43 public:
44     /** Default constructor */
45     NEDepthwiseConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
46     /** Prevent instances of this class from being copied (As this class contains pointers) */
47     NEDepthwiseConvolutionLayer(const NEDepthwiseConvolutionLayer &) = delete;
48     /** Default move constructor */
49     NEDepthwiseConvolutionLayer(NEDepthwiseConvolutionLayer &&) = default;
50     /** Prevent instances of this class from being copied (As this class contains pointers) */
51     NEDepthwiseConvolutionLayer &operator=(const NEDepthwiseConvolutionLayer &) = delete;
52     /** Default move assignment operator */
53     NEDepthwiseConvolutionLayer &operator=(NEDepthwiseConvolutionLayer &&) = default;
54     /** Default destructor */
55     ~NEDepthwiseConvolutionLayer();
56     /** Initialize the function's source, destination, weights and convolution information.
57      *
58      * Valid data layouts:
59      * - NHWC
60      * - NCHW
61      *
62      * Valid data type configurations:
63      * |src0           |src1               |src2   |dst            |
64      * |:--------------|:------------------|:------|:--------------|
65      * |F16            |F16                |F16    |F16            |
66      * |F32            |F32                |F32    |F32            |
67      * |QASYMM8        |QASYMM8            |S32    |QASYMM8        |
68      * |QASYMM8        |QSYMM8_PER_CHANNEL |S32    |QASYMM8        |
69      * |QASYMM8_SIGNED |QASYMM8_SIGNED     |S32    |QASYMM8_SIGNED |
70      * |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32    |QASYMM8_SIGNED |
71      *
72      * @param[in, out] input            Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32
73      * @param[out]     output           Destination tensor. Data type supported: same as @p input.
74      * @param[in]      weights          Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM].
75      *                                  Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.
76      * @param[in]      biases           Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
77      *                                  Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
78      * @param[in]      conv_info        Padding and stride information to use for the convolution.
79      * @param[in]      depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
80      * @param[in]      act_info         (Optional) Activation layer information in case of a fused activation.
81      * @param[in]      dilation         (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
82      */
83     void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
84                    unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
85 
86     /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayer
87      *
88      * @param[in] input            Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32
89      * @param[in] output           Destination tensor. Data type supported: same as @p input.
90      * @param[in] weights          Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM].
91      *                             Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.
92      * @param[in] biases           Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
93      *                             Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
94      * @param[in] conv_info        Padding and stride information to use for the convolution.
95      * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
96      * @param[in] act_info         (Optional) Activation layer information in case of a fused activation.
97      * @param[in] dilation         (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
98      *
99      * @return a status
100      */
101     static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
102                            unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
103 
104     // Inherited methods overriden:
105     void run() override;
106     void prepare() override;
107 
108 private:
109     /** Basic function to execute optimized depthwise convolution routines. This function calls the following kernels:
110     *
111     * @note At the moment 3x3 and 5x5 convolution of stride 1, 2 are supported
112     *
113     * -# @ref NEFillBorderKernel (if pad_x or pad_y > 0) and no assembly kernel implementation is present
114     * -# @ref NEDepthwiseConvolutionLayer3x3Kernel if 3x3 and no assembly kernel implementation is present
115     * -# @ref cpu::CpuDepthwiseConvolutionAssemblyDispatch if assembly kernel implementation is present
116     * -# @ref NEDirectConvolutionLayerOutputStageKernel if re-quantization of output is required
117     * -# @ref NEActivationLayer if fused activation is required
118     *
119     */
120     class NEDepthwiseConvolutionLayerOptimizedInternal : public IFunction
121     {
122     public:
123         /** Default constructor */
124         NEDepthwiseConvolutionLayerOptimizedInternal(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
125         /** Prevent instances of this class from being copied (As this class contains pointers) */
126         NEDepthwiseConvolutionLayerOptimizedInternal(const NEDepthwiseConvolutionLayerOptimizedInternal &) = delete;
127         /** Default move constructor */
128         NEDepthwiseConvolutionLayerOptimizedInternal(NEDepthwiseConvolutionLayerOptimizedInternal &&) = default;
129         /** Prevent instances of this class from being copied (As this class contains pointers) */
130         NEDepthwiseConvolutionLayerOptimizedInternal &operator=(const NEDepthwiseConvolutionLayerOptimizedInternal &) = delete;
131         /** Default move assignment operator */
132         NEDepthwiseConvolutionLayerOptimizedInternal &operator=(NEDepthwiseConvolutionLayerOptimizedInternal &&) = default;
133         /** Default destructor */
134         ~NEDepthwiseConvolutionLayerOptimizedInternal() = default;
135         /** Initialize the function's source, destination, kernels and border_size.
136          *
137          * @param[in, out] input            Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. (Written to only for border filling).
138          * @param[in]      weights          Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.
139          * @param[in]      biases           Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
140          *                                  Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
141          * @param[out]     output           Destination tensor. Data type supported: same as @p input.
142          * @param[in]      conv_info        Padding and stride information to use for the convolution.
143          * @param[in]      depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
144          * @param[in]      act_info         (Optional) Activation layer information in case of a fused activation.
145          * @param[in]      dilation         (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
146          */
147         void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
148                        unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
149 
150         /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayer3x3
151          *
152          * @param[in] input            Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. (Written to only for border filling).
153          * @param[in] weights          Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.
154          * @param[in] biases           Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
155          *                             Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
156          * @param[in] output           Destination tensor. Data type supported: same as @p input.
157          * @param[in] conv_info        Padding and stride information to use for the convolution.
158          * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
159          * @param[in] act_info         (Optional) Activation layer information in case of a fused activation.
160          * @param[in] dilation         (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
161          *
162          * @return a status
163          */
164         static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
165                                unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
166 
167         // Inherited methods overriden:
168         void run() override;
169         void prepare() override;
170 
171     private:
172         MemoryGroup _memory_group;
173         struct Impl;
174         std::unique_ptr<Impl> _impl;
175     };
176 
177     /** Basic function to execute a generic depthwise convolution. This function calls the following kernel:
178      *
179      * -# @ref NEDepthwiseConvolutionLayerNativeKernel
180      *
181      */
182     class NEDepthwiseConvolutionLayerGeneric : public IFunction
183     {
184     public:
185         /** Default constructor */
186         NEDepthwiseConvolutionLayerGeneric();
187         /** Prevent instances of this class from being copied (As this class contains pointers) */
188         NEDepthwiseConvolutionLayerGeneric(const NEDepthwiseConvolutionLayerGeneric &) = delete;
189         /** Default move constructor */
190         NEDepthwiseConvolutionLayerGeneric(NEDepthwiseConvolutionLayerGeneric &&) = default;
191         /** Prevent instances of this class from being copied (As this class contains pointers) */
192         NEDepthwiseConvolutionLayerGeneric &operator=(const NEDepthwiseConvolutionLayerGeneric &) = delete;
193         /** Default move assignment operator */
194         NEDepthwiseConvolutionLayerGeneric &operator=(NEDepthwiseConvolutionLayerGeneric &&) = default;
195         /** Default destructor */
196         ~NEDepthwiseConvolutionLayerGeneric() = default;
197         /** Initialize the function's source, destination, weights and convolution information.
198          *
199          * @param[in, out] input            Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. (Written to only for border filling).
200          * @param[out]     output           Destination tensor. Data type supported: same as @p input.
201          * @param[in]      weights          Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM].
202          *                                  Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.
203          * @param[in]      biases           Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
204          *                                  Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
205          * @param[in]      conv_info        Padding and stride information to use for the convolution.
206          * @param[in]      depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
207          * @param[in]      act_info         (Optional) Activation layer information in case of a fused activation.
208          * @param[in]      dilation         (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
209          */
210         void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
211                        unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
212 
213         /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayerGeneric
214          *
215          * @param[in] input            Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. (Written to only for border filling).
216          * @param[in] output           Destination tensor. Data type supported: same as @p input.
217          * @param[in] weights          Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM].
218          *                             Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.
219          * @param[in] biases           Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
220          *                             Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
221          * @param[in] conv_info        Padding and stride information to use for the convolution.
222          * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
223          * @param[in] act_info         (Optional) Activation layer information in case of a fused activation.
224          * @param[in] dilation         (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
225          *
226          * @return a status
227          */
228         static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
229                                unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
230 
231         // Inherited methods overriden:
232         void run() override;
233 
234     private:
235         struct Impl;
236         std::unique_ptr<Impl> _impl;
237     };
238     MemoryGroup _memory_group;
239     struct Impl;
240     std::unique_ptr<Impl> _impl;
241 };
242 } // namespace arm_compute
243 #endif /* ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H */
244