1 /*
2  * Copyright (c) 2022 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #include "src/runtime/heuristics/dwc_native/ClDWCNativeDefaultConfigValhall.h"
25 #include "src/runtime/heuristics/dwc_native/ClDWCNativeHeuristicsHelpers.h"
26 
27 #include "arm_compute/core/CL/CLHelpers.h"
28 #include "arm_compute/core/GPUTarget.h"
29 #include "arm_compute/core/TensorInfo.h"
30 #include "arm_compute/core/TensorShape.h"
31 
32 namespace arm_compute
33 {
34 namespace cl_dwc
35 {
ClDWCNativeDefaultConfigValhall(GPUTarget gpu)36 ClDWCNativeDefaultConfigValhall::ClDWCNativeDefaultConfigValhall(GPUTarget gpu)
37     : IClDWCNativeKernelConfig(gpu)
38 {
39 }
40 
configure(const ITensorInfo * src,const ITensorInfo * wei,const PadStrideInfo & conv_info,const Size2D & dilation,unsigned int depth_multiplier)41 DWCComputeKernelInfo ClDWCNativeDefaultConfigValhall::configure(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info, const Size2D &dilation,
42                                                                 unsigned int depth_multiplier)
43 {
44     using ConfigurationFunctionExecutorPtr = DWCComputeKernelInfo (ClDWCNativeDefaultConfigValhall::*)(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info, const Size2D &dilation,
45                                                                    unsigned int depth_multiplier);
46 
47     ClDWCNativeConfigArray<ConfigurationFunctionExecutorPtr> configs_G78(&ClDWCNativeDefaultConfigValhall::configure_G78_f32,
48                                                                          &ClDWCNativeDefaultConfigValhall::configure_G78_f16,
49                                                                          &ClDWCNativeDefaultConfigValhall::configure_G78_u8);
50 
51     ClDWCNativeConfigArray<ConfigurationFunctionExecutorPtr> configs_G77(&ClDWCNativeDefaultConfigValhall::configure_G78_f32,
52                                                                          &ClDWCNativeDefaultConfigValhall::configure_G77_f16,
53                                                                          &ClDWCNativeDefaultConfigValhall::configure_G78_u8);
54 
55     ConfigurationFunctionExecutorPtr func = nullptr;
56     switch(_target)
57     {
58         case GPUTarget::G77:
59             func = configs_G77.get_function(src->data_type());
60             break;
61         case GPUTarget::G78:
62         default:
63             func = configs_G78.get_function(src->data_type());
64             break;
65     }
66 
67     ARM_COMPUTE_ERROR_ON_MSG(func == nullptr, "Data type not supported for depthwise convolution");
68     return (this->*func)(src, wei, conv_info, dilation, depth_multiplier);
69 }
70 
configure_G78_f32(const ITensorInfo * src,const ITensorInfo * wei,const PadStrideInfo & conv_info,const Size2D & dilation,unsigned int depth_multiplier)71 DWCComputeKernelInfo ClDWCNativeDefaultConfigValhall::configure_G78_f32(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info, const Size2D &dilation,
72                                                                         unsigned int depth_multiplier)
73 {
74     DWCComputeKernelInfo desc;
75 
76     if(src->data_layout() == DataLayout::NHWC)
77     {
78         const size_t idx_c          = get_data_layout_dimension_index(wei->data_layout(), DataLayoutDimension::CHANNEL);
79         const size_t idx_w          = get_data_layout_dimension_index(wei->data_layout(), DataLayoutDimension::WIDTH);
80         const TensorShape wei_shape = wei->tensor_shape();
81         const size_t      kernel_c  = wei_shape[idx_c];
82         const size_t      kernel_w  = wei_shape[idx_w];
83 
84         desc.export_input_to_cl_image   = false;
85         desc.export_weights_to_cl_image = use_cl_image_for_weights(wei, depth_multiplier);
86 
87         if(depth_multiplier == 1)
88         {
89             desc.n0 = 4;
90         }
91         else
92         {
93             if((depth_multiplier % 4) == 0)
94             {
95                 desc.n0 = 4;
96             }
97             else if((depth_multiplier % 2) == 0)
98             {
99                 desc.n0 = 2;
100             }
101             else
102             {
103                 desc.n0 = 1;
104             }
105         }
106 
107         // Note: If we reduce n0, export to cl_image must be false
108         ARM_COMPUTE_ERROR_ON((adjust_vec_size(desc.n0, kernel_c) != desc.n0) && (desc.export_weights_to_cl_image == true));
109 
110         desc.n0 = adjust_vec_size(desc.n0, kernel_c);
111 
112         // Set m0 only if stride_x == 1 and dilation_x == 1
113         if(conv_info.stride().first == 1 && dilation.x() == 1)
114         {
115             if((kernel_w >= 9) || (kernel_w == 1))
116             {
117                 desc.m0 = 1;
118             }
119             else
120             {
121                 desc.m0 = 2;
122             }
123         }
124         else
125         {
126             desc.m0 = 1;
127         }
128     }
129 
130     return desc;
131 }
132 
configure_G78_f16(const ITensorInfo * src,const ITensorInfo * wei,const PadStrideInfo & conv_info,const Size2D & dilation,unsigned int depth_multiplier)133 DWCComputeKernelInfo ClDWCNativeDefaultConfigValhall::configure_G78_f16(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info, const Size2D &dilation,
134                                                                         unsigned int depth_multiplier)
135 {
136     DWCComputeKernelInfo desc;
137 
138     if(src->data_layout() == DataLayout::NHWC)
139     {
140         // Src and weights have the same dimension indices
141         const size_t idx_c          = get_data_layout_dimension_index(wei->data_layout(), DataLayoutDimension::CHANNEL);
142         const size_t idx_w          = get_data_layout_dimension_index(wei->data_layout(), DataLayoutDimension::WIDTH);
143         const TensorShape src_shape = src->tensor_shape();
144         const TensorShape wei_shape = wei->tensor_shape();
145         const size_t      src_w     = src_shape[idx_w];
146         const size_t      kernel_c  = wei_shape[idx_c];
147         const size_t      kernel_w  = wei_shape[idx_w];
148 
149         desc.export_input_to_cl_image   = false;
150         desc.export_weights_to_cl_image = use_cl_image_for_weights(wei, depth_multiplier);
151 
152         if(depth_multiplier == 1)
153         {
154             if(desc.export_weights_to_cl_image == false)
155             {
156                 desc.n0 = 8;
157             }
158             else
159             {
160                 desc.n0 = 4;
161             }
162         }
163         else
164         {
165             if((depth_multiplier % 4) == 0)
166             {
167                 desc.n0 = 4;
168             }
169             else if((depth_multiplier % 2) == 0)
170             {
171                 desc.n0 = 2;
172             }
173             else
174             {
175                 desc.n0 = 1;
176             }
177         }
178 
179         // Note: If we reduce n0, export to cl_image must be false
180         ARM_COMPUTE_ERROR_ON((adjust_vec_size(desc.n0, kernel_c) != desc.n0) && (desc.export_weights_to_cl_image == true));
181 
182         desc.n0 = adjust_vec_size(desc.n0, kernel_c);
183 
184         // Set m0 only if stride_x == 1 and dilation_x == 1
185         if(conv_info.stride().first == 1 && dilation.x() == 1)
186         {
187             if((kernel_w >= 9) || (kernel_w == 1))
188             {
189                 desc.m0 = 1;
190             }
191             else
192             {
193                 if((src_w % 5) == 0)
194                 {
195                     desc.m0 = 5;
196                 }
197                 else
198                 {
199                     desc.m0 = 4;
200                 }
201             }
202         }
203         else
204         {
205             desc.m0 = 1;
206         }
207     }
208 
209     return desc;
210 }
211 
configure_G78_u8(const ITensorInfo * src,const ITensorInfo * wei,const PadStrideInfo & conv_info,const Size2D & dilation,unsigned int depth_multiplier)212 DWCComputeKernelInfo ClDWCNativeDefaultConfigValhall::configure_G78_u8(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info, const Size2D &dilation,
213                                                                        unsigned int depth_multiplier)
214 {
215     ARM_COMPUTE_UNUSED(wei);
216 
217     DWCComputeKernelInfo desc;
218 
219     if(src->data_layout() == DataLayout::NHWC)
220     {
221         desc.export_input_to_cl_image   = false;
222         desc.export_weights_to_cl_image = false;
223         desc.n0                         = (depth_multiplier == 1) ? 4 : 1;
224         if(conv_info.stride().first == 1 && dilation.x() == 1 && depth_multiplier == 1)
225         {
226             desc.m0 = 2;
227         }
228         else
229         {
230             desc.m0 = 1;
231         }
232     }
233 
234     return desc;
235 }
236 
configure_G77_f16(const ITensorInfo * src,const ITensorInfo * wei,const PadStrideInfo & conv_info,const Size2D & dilation,unsigned int depth_multiplier)237 DWCComputeKernelInfo ClDWCNativeDefaultConfigValhall::configure_G77_f16(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info, const Size2D &dilation,
238                                                                         unsigned int depth_multiplier)
239 {
240     DWCComputeKernelInfo desc;
241 
242     if(src->data_layout() == DataLayout::NHWC)
243     {
244         const size_t idx_c          = get_data_layout_dimension_index(wei->data_layout(), DataLayoutDimension::CHANNEL);
245         const size_t idx_w          = get_data_layout_dimension_index(wei->data_layout(), DataLayoutDimension::WIDTH);
246         const TensorShape wei_shape = wei->tensor_shape();
247         const size_t      kernel_c  = wei_shape[idx_c];
248         const size_t      kernel_w  = wei_shape[idx_w];
249 
250         desc.export_input_to_cl_image   = false;
251         desc.export_weights_to_cl_image = use_cl_image_for_weights(wei, depth_multiplier);
252 
253         if(depth_multiplier == 1)
254         {
255             if(desc.export_weights_to_cl_image == false)
256             {
257                 desc.n0 = 8;
258             }
259             else
260             {
261                 desc.n0 = 4;
262             }
263         }
264         else
265         {
266             if((depth_multiplier % 4) == 0)
267             {
268                 desc.n0 = 4;
269             }
270             else if((depth_multiplier % 2) == 0)
271             {
272                 desc.n0 = 2;
273             }
274             else
275             {
276                 desc.n0 = 1;
277             }
278         }
279 
280         // Note: If we reduce n0, export to cl_image must be false
281         ARM_COMPUTE_ERROR_ON((adjust_vec_size(desc.n0, kernel_c) != desc.n0) && (desc.export_weights_to_cl_image == true));
282 
283         desc.n0 = adjust_vec_size(desc.n0, kernel_c);
284 
285         // Set m0 only if stride_x == 1 and dilation_x == 1
286         if(conv_info.stride().first == 1 && dilation.x() == 1)
287         {
288             if((kernel_w >= 9) || (kernel_w == 1))
289             {
290                 desc.m0 = 1;
291             }
292             else
293             {
294                 desc.m0 = 2;
295             }
296         }
297         else
298         {
299             desc.m0 = 1;
300         }
301     }
302 
303     return desc;
304 }
305 } // namespace cl_dwc
306 } // namespace arm_compute
307