1 /*
2  * Copyright (c) 2022 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #include "ClComponentDepthwiseConv2d.h"
25 
26 #include "arm_compute/core/CL/CLHelpers.h"
27 #include "arm_compute/core/utils/misc/ShapeCalculator.h"
28 #include "arm_compute/dynamic_fusion/sketch/attributes/DepthwiseConv2dAttributes.h"
29 #include "src/core/CL/CLValidate.h"
30 #include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.h"
31 
32 namespace arm_compute
33 {
34 namespace experimental
35 {
36 namespace dynamic_fusion
37 {
38 using Settings = ClComponentDepthwiseConv2dSettings;
39 
export_input_to_cl_image(bool cl_image)40 Settings &Settings::export_input_to_cl_image(bool cl_image)
41 {
42     _export_input_to_cl_image = cl_image;
43     return *this;
44 }
45 
export_input_to_cl_image() const46 bool Settings::export_input_to_cl_image() const
47 {
48     return _export_input_to_cl_image;
49 }
50 
export_weights_to_cl_image(bool cl_image)51 Settings &Settings::export_weights_to_cl_image(bool cl_image)
52 {
53     _export_weights_to_cl_image = cl_image;
54     return *this;
55 }
56 
export_weights_to_cl_image() const57 bool Settings::export_weights_to_cl_image() const
58 {
59     return _export_weights_to_cl_image;
60 }
61 
fast_relaxed_math(bool fast_relaxed_math)62 Settings &Settings::fast_relaxed_math(bool fast_relaxed_math)
63 {
64     _fast_relaxed_math = fast_relaxed_math;
65     return *this;
66 }
67 
fast_relaxed_math() const68 bool Settings::fast_relaxed_math() const
69 {
70     return _fast_relaxed_math;
71 }
72 
is_fma_available(bool is_fma_available)73 Settings &Settings::is_fma_available(bool is_fma_available)
74 {
75     _is_fma_available = is_fma_available;
76     return *this;
77 }
78 
is_fma_available() const79 bool Settings::is_fma_available() const
80 {
81     return _is_fma_available;
82 }
83 
n0(unsigned int n0)84 Settings &Settings::n0(unsigned int n0)
85 {
86     _n0 = n0;
87     return *this;
88 }
89 
n0() const90 unsigned int Settings::n0() const
91 {
92     return _n0;
93 }
94 
m0(unsigned int m0)95 Settings &Settings::m0(unsigned int m0)
96 {
97     _m0 = m0;
98     return *this;
99 }
100 
m0() const101 unsigned int Settings::m0() const
102 {
103     return _m0;
104 }
105 
validate(const Properties & properties,const ArgumentPack<ITensorInfo> & tensors,const Attributes & attributes,const Settings & settings)106 Status ClComponentDepthwiseConv2d::validate(
107     const Properties                &properties,
108     const ArgumentPack<ITensorInfo> &tensors,
109     const Attributes                &attributes,
110     const Settings                  &settings)
111 {
112     ARM_COMPUTE_UNUSED(properties, settings);
113     const auto src = tensors.get_const_tensor(TensorType::ACL_SRC_0);
114     const auto wei = tensors.get_const_tensor(TensorType::ACL_SRC_1);
115     const auto bia = tensors.get_const_tensor(TensorType::ACL_SRC_2);
116     const auto dst = tensors.get_const_tensor(TensorType::ACL_DST_0);
117 
118     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, wei, dst);
119 
120     // 1. Check validity
121     // Matching data type
122     ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, wei);
123     ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, dst);
124     if(bia != nullptr)
125     {
126         ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, bia);
127     }
128 
129     // Matching data layout
130     ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(src, wei);
131     ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(src, dst);
132     if(bia != nullptr)
133     {
134         ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(src, bia);
135     }
136 
137     // All tensor infos are initialized
138     ARM_COMPUTE_RETURN_ERROR_ON(src->tensor_shape().total_size() == 0);
139     ARM_COMPUTE_RETURN_ERROR_ON(wei->tensor_shape().total_size() == 0);
140     ARM_COMPUTE_RETURN_ERROR_ON(dst->tensor_shape().total_size() == 0);
141     if(bia != nullptr)
142     {
143         ARM_COMPUTE_RETURN_ERROR_ON(bia->tensor_shape().total_size() == 0);
144     }
145     // Device requirements are met
146     ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(src);
147     // wei shape is correct
148     const DataLayout data_layout = src->data_layout();
149     const size_t     channel_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
150 
151     ARM_COMPUTE_RETURN_ERROR_ON(wei->dimension(channel_idx) != (src->dimension(channel_idx) * attributes.depth_multiplier()));
152     ARM_COMPUTE_RETURN_ERROR_ON_MSG(wei->num_dimensions() > 3, "Weights can be at most 3 dimensional");
153 
154     // dst shape is correct
155     const PadStrideInfo pad_stride_info = PadStrideInfo(attributes.stride().x(), attributes.stride().y(),
156                                                         attributes.pad().left, attributes.pad().right,
157                                                         attributes.pad().top, attributes.pad().bottom,
158                                                         attributes.dimension_rounding_type());
159     const ConvolutionInfo conv_info{ pad_stride_info, attributes.depth_multiplier(), ActivationLayerInfo(), attributes.dilation() };
160     const TensorShape     output_shape = misc::shape_calculator::compute_depthwise_convolution_shape(*src, *wei, conv_info);
161 
162     ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(dst->tensor_shape(), output_shape);
163 
164     // Check strides and dilation
165     ARM_COMPUTE_RETURN_ERROR_ON(conv_info.pad_stride_info.stride().first < 1);
166     ARM_COMPUTE_RETURN_ERROR_ON(conv_info.pad_stride_info.stride().second < 1);
167     ARM_COMPUTE_RETURN_ERROR_ON((conv_info.dilation.x() < 1) || (conv_info.dilation.y() < 1));
168     ARM_COMPUTE_RETURN_ERROR_ON(conv_info.pad_stride_info.stride().first > 1 && settings.m0() != 1);
169     ARM_COMPUTE_RETURN_ERROR_ON(conv_info.dilation.x() > 1 && settings.m0() != 1);
170 
171     if(conv_info.depth_multiplier > 1 && settings.n0() > 1)
172     {
173         ARM_COMPUTE_RETURN_ERROR_ON((conv_info.depth_multiplier % settings.n0()) != 0);
174     }
175 
176     // Check export weights to cl image
177     ARM_COMPUTE_RETURN_ERROR_ON_MSG((settings.export_weights_to_cl_image() == true) && (export_to_cl_image(wei) == false), "Weights cannot be exported to cl_image!");
178     ARM_COMPUTE_RETURN_ERROR_ON((settings.export_weights_to_cl_image() == true) && ((settings.n0() % 4) != 0));
179 
180     ARM_COMPUTE_RETURN_ERROR_ON(wei->dimension(channel_idx) != (src->dimension(channel_idx) * conv_info.depth_multiplier));
181 
182     // bia shape is correct
183     if(bia != nullptr)
184     {
185         ARM_COMPUTE_RETURN_ERROR_ON_MSG(bia->dimension(0) != output_shape[channel_idx],
186                                         "Biases size and number of dst feature maps should match");
187         ARM_COMPUTE_RETURN_ERROR_ON_MSG(bia->num_dimensions() > 1, "Biases should be one dimensional");
188     }
189 
190     // 2. Check support level
191     // Data type
192     ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::F16, DataType::F32);
193     // Data layout
194     ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(src, DataLayout::NHWC);
195     // Texture in the input tensor
196     ARM_COMPUTE_RETURN_ERROR_ON((settings.export_input_to_cl_image() == true));
197 
198     return Status{};
199 }
200 
ClComponentDepthwiseConv2d(ComponentId id,const Properties & properties,const ArgumentPack<ITensorInfo> & tensors,const Attributes & attributes,const Settings & settings)201 ClComponentDepthwiseConv2d::ClComponentDepthwiseConv2d(
202     ComponentId                      id,
203     const Properties                &properties,
204     const ArgumentPack<ITensorInfo> &tensors,
205     const Attributes                &attributes,
206     const Settings                  &settings)
207     : IGpuKernelComponent{ id, properties, tensors },
208       _component_writer{ std::make_unique<ClTemplateDepthwiseConv2d>(id, tensors, attributes, settings) }
209 {
210 }
~ClComponentDepthwiseConv2d()211 ClComponentDepthwiseConv2d::~ClComponentDepthwiseConv2d()
212 {
213 }
template_writer() const214 const IGpuTemplateComponentWriter *ClComponentDepthwiseConv2d::template_writer() const
215 {
216     return _component_writer.get();
217 }
218 } // namespace dynamic_fusion
219 } // namespace experimental
220 } // namespace arm_compute
221