1 /*
2  * Copyright (c) 2022 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #include "src/runtime/heuristics/direct_conv/ClDirectConvDefaultConfigValhall.h"
25 
26 #include "arm_compute/core/CL/CLHelpers.h"
27 #include "arm_compute/core/CL/CLKernelLibrary.h"
28 #include "arm_compute/core/GPUTarget.h"
29 #include "arm_compute/core/TensorInfo.h"
30 #include "arm_compute/core/TensorShape.h"
31 #include "arm_compute/core/utils/misc/ShapeCalculator.h"
32 #include <utility>
33 
34 namespace arm_compute
35 {
36 namespace cl_direct_conv
37 {
38 using namespace arm_compute::misc::shape_calculator;
39 
ClDirectConvDefaultConfigValhall(GPUTarget gpu)40 ClDirectConvDefaultConfigValhall::ClDirectConvDefaultConfigValhall(GPUTarget gpu)
41     : IClDirectConvKernelConfig(gpu)
42 {
43 }
44 
configure(const ITensorInfo * src,const ITensorInfo * wei,const PadStrideInfo & conv_info)45 DirectConvComputeKernelInfo ClDirectConvDefaultConfigValhall::configure(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info)
46 {
47     using ConfigurationFunctionExecutorPtr = DirectConvComputeKernelInfo (ClDirectConvDefaultConfigValhall::*)(const ITensorInfo * src, const ITensorInfo * wei, const PadStrideInfo & conv_info);
48 
49     ClDirectConvConfigArray<ConfigurationFunctionExecutorPtr> configs_G78(&ClDirectConvDefaultConfigValhall::configure_G78_f32,
50                                                                           &ClDirectConvDefaultConfigValhall::configure_G78_f16,
51                                                                           &ClDirectConvDefaultConfigValhall::configure_G78_u8);
52 
53     ClDirectConvConfigArray<ConfigurationFunctionExecutorPtr> configs_G57(&ClDirectConvDefaultConfigValhall::configure_G57_f32,
54                                                                           &ClDirectConvDefaultConfigValhall::configure_G57_f16,
55                                                                           &ClDirectConvDefaultConfigValhall::configure_G78_u8);
56 
57     ConfigurationFunctionExecutorPtr func = nullptr;
58     switch(_target)
59     {
60         case GPUTarget::G57:
61             func = configs_G57.get_function(src->data_type());
62             break;
63         case GPUTarget::G78:
64         default:
65             func = configs_G78.get_function(src->data_type());
66             break;
67     }
68 
69     ARM_COMPUTE_ERROR_ON_MSG(func == nullptr, "Data type not supported for direct convolution");
70     return (this->*func)(src, wei, conv_info);
71 }
72 
configure_G78_f32(const ITensorInfo * src,const ITensorInfo * wei,const PadStrideInfo & conv_info)73 DirectConvComputeKernelInfo ClDirectConvDefaultConfigValhall::configure_G78_f32(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info)
74 {
75     DirectConvComputeKernelInfo desc;
76 
77     if(src->data_layout() == DataLayout::NHWC)
78     {
79         // Get the output shape
80         const TensorShape wei_shape                  = wei->tensor_shape();
81         const TensorShape dst_shape                  = misc::shape_calculator::compute_deep_convolution_shape(*src, *wei, conv_info);
82         const bool        export_weights_to_cl_image = export_to_cl_image(wei);
83 
84         const int32_t ofm          = dst_shape[0];
85         const int32_t m            = dst_shape[1] * dst_shape[2];
86         const bool    is_pointwise = (wei_shape[1] == wei_shape[2]) && wei_shape[1] == 1;
87 
88         desc.export_weights_to_cl_image = export_weights_to_cl_image;
89 
90         if(dst_shape[0] <= 4)
91         {
92             if(is_pointwise)
93             {
94                 if(ofm == 4)
95                 {
96                     desc.m0 = 1;
97                     desc.n0 = 4;
98                     desc.k0 = 16;
99                 }
100                 else
101                 {
102                     desc.m0 = 1;
103                     desc.n0 = 1;
104                     desc.k0 = 16;
105                 }
106             }
107             else
108             {
109                 desc.m0 = 1;
110                 desc.n0 = 2;
111                 desc.k0 = 16;
112             }
113         }
114         else
115         {
116             if(m < 64)
117             {
118                 desc.m0 = 1;
119                 desc.n0 = 1;
120                 desc.k0 = 16;
121             }
122             else
123             {
124                 desc.m0 = 4;
125                 desc.n0 = 4;
126                 desc.k0 = 4;
127             }
128         }
129     }
130 
131     return desc;
132 }
133 
configure_G78_f16(const ITensorInfo * src,const ITensorInfo * wei,const PadStrideInfo & conv_info)134 DirectConvComputeKernelInfo ClDirectConvDefaultConfigValhall::configure_G78_f16(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info)
135 {
136     DirectConvComputeKernelInfo desc;
137 
138     if(src->data_layout() == DataLayout::NHWC)
139     {
140         // Get the output shape
141         const TensorShape wei_shape                  = wei->tensor_shape();
142         const TensorShape dst_shape                  = misc::shape_calculator::compute_deep_convolution_shape(*src, *wei, conv_info);
143         const bool        export_weights_to_cl_image = export_to_cl_image(wei);
144 
145         const int32_t ofm          = dst_shape[0];
146         const int32_t m            = dst_shape[1] * dst_shape[2];
147         const int32_t k            = wei_shape[0];
148         const bool    is_pointwise = (wei_shape[1] == wei_shape[2]) && wei_shape[1] == 1;
149 
150         desc.export_weights_to_cl_image = export_weights_to_cl_image;
151 
152         if(dst_shape[0] <= 4)
153         {
154             // k0 should be as larger as possible. However, we should avoid
155             // having left-over for loops that make the implementation slower.
156             if((k % 16) == 0)
157             {
158                 desc.k0 = 16;
159             }
160             else if((k % 8) == 0)
161             {
162                 desc.k0 = 8;
163             }
164             else
165             {
166                 desc.k0 = 4;
167             }
168 
169             if(is_pointwise)
170             {
171                 if(ofm == 4)
172                 {
173                     desc.m0 = 1;
174                     desc.n0 = 4;
175                 }
176                 else
177                 {
178                     desc.m0 = 1;
179                     desc.n0 = 1;
180                 }
181             }
182             else
183             {
184                 desc.m0 = 1;
185                 desc.n0 = dst_shape[0];
186             }
187         }
188         else
189         {
190             if(m < 64)
191             {
192                 desc.m0 = 1;
193                 desc.n0 = 1;
194                 if((k % 16) == 0)
195                 {
196                     desc.k0 = 16;
197                 }
198                 else if((k % 8) == 0)
199                 {
200                     desc.k0 = 8;
201                 }
202                 else
203                 {
204                     desc.k0 = 4;
205                 }
206             }
207             else
208             {
209                 if(ofm >= 16)
210                 {
211                     if(m / 6 > 24000)
212                     {
213                         desc.m0 = 6;
214                     }
215                     else
216                     {
217                         desc.m0 = 5;
218                     }
219                     desc.n0 = 8;
220                     desc.k0 = 4;
221                 }
222                 else
223                 {
224                     desc.m0 = 2;
225                     desc.n0 = 8;
226                     if((k % 16) == 0)
227                     {
228                         desc.k0 = 16;
229                     }
230                     else if((k % 8) == 0)
231                     {
232                         desc.k0 = 8;
233                     }
234                     else
235                     {
236                         desc.k0 = 4;
237                     }
238                 }
239             }
240         }
241     }
242 
243     return desc;
244 }
245 
configure_G78_u8(const ITensorInfo * src,const ITensorInfo * wei,const PadStrideInfo & conv_info)246 DirectConvComputeKernelInfo ClDirectConvDefaultConfigValhall::configure_G78_u8(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info)
247 {
248     DirectConvComputeKernelInfo desc;
249 
250     if(src->data_layout() == DataLayout::NHWC)
251     {
252         // Get the output shape
253         TensorShape output_shape = misc::shape_calculator::compute_deep_convolution_shape(*src, *wei, conv_info);
254 
255         desc.n0 = 4;
256 
257         if(output_shape[0] > 16)
258         {
259             desc.m0 = 4;
260         }
261 
262         desc.k0 = 16;
263 
264         desc.export_weights_to_cl_image = false;
265     }
266 
267     return desc;
268 }
269 
configure_G57_f32(const ITensorInfo * src,const ITensorInfo * wei,const PadStrideInfo & conv_info)270 DirectConvComputeKernelInfo ClDirectConvDefaultConfigValhall::configure_G57_f32(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info)
271 {
272     DirectConvComputeKernelInfo desc;
273 
274     if(src->data_layout() == DataLayout::NHWC)
275     {
276         // Get the output shape
277         const TensorShape wei_shape                  = wei->tensor_shape();
278         const TensorShape dst_shape                  = misc::shape_calculator::compute_deep_convolution_shape(*src, *wei, conv_info);
279         const bool        export_weights_to_cl_image = export_to_cl_image(wei);
280 
281         const int32_t m            = dst_shape[1] * dst_shape[2];
282         const bool    is_pointwise = (wei_shape[1] == wei_shape[2]) && wei_shape[1] == 1;
283 
284         desc.export_weights_to_cl_image = export_weights_to_cl_image;
285 
286         if(dst_shape[0] <= 4)
287         {
288             if(is_pointwise)
289             {
290                 desc.m0 = 1;
291                 desc.n0 = 1;
292                 desc.k0 = 16;
293             }
294             else
295             {
296                 desc.m0 = 1;
297                 desc.n0 = dst_shape[0];
298                 desc.k0 = 16;
299             }
300         }
301         else
302         {
303             if(m < 64)
304             {
305                 if(m == 1)
306                 {
307                     desc.m0 = 1;
308                     desc.n0 = 1;
309                     desc.k0 = 16;
310                 }
311                 else
312                 {
313                     desc.m0 = 4;
314                     desc.n0 = 2;
315                     desc.k0 = 8;
316                 }
317             }
318             else
319             {
320                 desc.m0 = 4;
321                 desc.n0 = 4;
322                 desc.k0 = 4;
323             }
324         }
325     }
326 
327     return desc;
328 }
329 
configure_G57_f16(const ITensorInfo * src,const ITensorInfo * wei,const PadStrideInfo & conv_info)330 DirectConvComputeKernelInfo ClDirectConvDefaultConfigValhall::configure_G57_f16(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info)
331 {
332     DirectConvComputeKernelInfo desc;
333 
334     if(src->data_layout() == DataLayout::NHWC)
335     {
336         // Get the output shape
337         const TensorShape wei_shape                  = wei->tensor_shape();
338         const TensorShape dst_shape                  = misc::shape_calculator::compute_deep_convolution_shape(*src, *wei, conv_info);
339         const bool        export_weights_to_cl_image = export_to_cl_image(wei);
340 
341         const int32_t ofm          = dst_shape[0];
342         const int32_t m            = dst_shape[1] * dst_shape[2];
343         const bool    is_pointwise = (wei_shape[1] == wei_shape[2]) && wei_shape[1] == 1;
344 
345         desc.export_weights_to_cl_image = export_weights_to_cl_image;
346 
347         if(dst_shape[0] <= 4)
348         {
349             if(is_pointwise)
350             {
351                 desc.m0 = 2;
352                 desc.n0 = 1;
353                 desc.k0 = 16;
354             }
355             else
356             {
357                 desc.m0 = 1;
358                 desc.n0 = dst_shape[0];
359                 desc.k0 = 16;
360             }
361         }
362         else
363         {
364             if(m < 64)
365             {
366                 if(m == 1)
367                 {
368                     desc.m0 = 1;
369                     desc.n0 = 1;
370                     desc.k0 = 16;
371                 }
372                 else
373                 {
374                     desc.m0 = 4;
375                     desc.n0 = 2;
376                     desc.k0 = 8;
377                 }
378             }
379             else
380             {
381                 if(ofm > 16)
382                 {
383                     desc.m0 = 4;
384                     desc.n0 = 8;
385                     desc.k0 = 8;
386                 }
387                 else
388                 {
389                     desc.m0 = 8;
390                     desc.n0 = 4;
391                     desc.k0 = 4;
392                 }
393             }
394         }
395     }
396 
397     return desc;
398 }
399 } // namespace opencl
400 } // namespace arm_compute
401