1 /*
2  * Copyright (c) 2022 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #include "src/runtime/heuristics/direct_conv/ClDirectConvDefaultConfigBifrost.h"
25 
26 #include "arm_compute/core/CL/CLHelpers.h"
27 #include "arm_compute/core/CL/CLKernelLibrary.h"
28 #include "arm_compute/core/GPUTarget.h"
29 #include "arm_compute/core/TensorInfo.h"
30 #include "arm_compute/core/TensorShape.h"
31 #include "arm_compute/core/utils/misc/ShapeCalculator.h"
32 #include <utility>
33 
34 namespace arm_compute
35 {
36 namespace cl_direct_conv
37 {
38 using namespace arm_compute::misc::shape_calculator;
39 
ClDirectConvDefaultConfigBifrost(GPUTarget gpu)40 ClDirectConvDefaultConfigBifrost::ClDirectConvDefaultConfigBifrost(GPUTarget gpu)
41     : IClDirectConvKernelConfig(gpu)
42 {
43 }
44 
configure(const ITensorInfo * src,const ITensorInfo * wei,const PadStrideInfo & conv_info)45 DirectConvComputeKernelInfo ClDirectConvDefaultConfigBifrost::configure(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info)
46 {
47     using ConfigurationFunctionExecutorPtr = DirectConvComputeKernelInfo (ClDirectConvDefaultConfigBifrost::*)(const ITensorInfo * src, const ITensorInfo * wei, const PadStrideInfo & conv_info);
48 
49     ClDirectConvConfigArray<ConfigurationFunctionExecutorPtr> configs_G71(&ClDirectConvDefaultConfigBifrost::configure_G71_f32,
50                                                                           &ClDirectConvDefaultConfigBifrost::configure_G71_f16,
51                                                                           &ClDirectConvDefaultConfigBifrost::configure_G71_u8);
52 
53     ClDirectConvConfigArray<ConfigurationFunctionExecutorPtr> configs_default(&ClDirectConvDefaultConfigBifrost::configure_default_f32,
54                                                                               &ClDirectConvDefaultConfigBifrost::configure_default_f16,
55                                                                               &ClDirectConvDefaultConfigBifrost::configure_G71_u8);
56 
57     ConfigurationFunctionExecutorPtr func = nullptr;
58     switch(_target)
59     {
60         case GPUTarget::G71:
61             func = configs_G71.get_function(src->data_type());
62             break;
63         default:
64             func = configs_default.get_function(src->data_type());
65             break;
66     }
67 
68     ARM_COMPUTE_ERROR_ON_MSG(func == nullptr, "Data type not supported for direct convolution");
69     return (this->*func)(src, wei, conv_info);
70 }
71 
configure_G71_f32(const ITensorInfo * src,const ITensorInfo * wei,const PadStrideInfo & conv_info)72 DirectConvComputeKernelInfo ClDirectConvDefaultConfigBifrost::configure_G71_f32(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info)
73 {
74     DirectConvComputeKernelInfo desc;
75 
76     if(src->data_layout() == DataLayout::NHWC)
77     {
78         // Get the output shape
79         TensorShape output_shape = misc::shape_calculator::compute_deep_convolution_shape(*src, *wei, conv_info);
80 
81         desc.n0 = 4;
82 
83         if(output_shape[0] > 16)
84         {
85             desc.m0 = 2;
86         }
87 
88         desc.k0 = 8;
89 
90         desc.export_weights_to_cl_image = false;
91     }
92 
93     return desc;
94 }
95 
configure_G71_f16(const ITensorInfo * src,const ITensorInfo * wei,const PadStrideInfo & conv_info)96 DirectConvComputeKernelInfo ClDirectConvDefaultConfigBifrost::configure_G71_f16(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info)
97 {
98     DirectConvComputeKernelInfo desc;
99 
100     if(src->data_layout() == DataLayout::NHWC)
101     {
102         // Get the output shape
103         TensorShape output_shape = misc::shape_calculator::compute_deep_convolution_shape(*src, *wei, conv_info);
104 
105         desc.n0 = 4;
106 
107         if(output_shape[0] > 16)
108         {
109             desc.m0 = 4;
110         }
111 
112         desc.k0 = 8;
113 
114         desc.export_weights_to_cl_image = false;
115     }
116 
117     return desc;
118 }
119 
configure_G71_u8(const ITensorInfo * src,const ITensorInfo * wei,const PadStrideInfo & conv_info)120 DirectConvComputeKernelInfo ClDirectConvDefaultConfigBifrost::configure_G71_u8(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info)
121 {
122     DirectConvComputeKernelInfo desc;
123 
124     if(src->data_layout() == DataLayout::NHWC)
125     {
126         // Get the output shape
127         TensorShape output_shape = misc::shape_calculator::compute_deep_convolution_shape(*src, *wei, conv_info);
128 
129         desc.n0 = 4;
130 
131         if(output_shape[0] > 16)
132         {
133             desc.m0 = 4;
134         }
135 
136         desc.k0 = 16;
137 
138         desc.export_weights_to_cl_image = false;
139     }
140 
141     return desc;
142 }
143 
configure_default_f32(const ITensorInfo * src,const ITensorInfo * wei,const PadStrideInfo & conv_info)144 DirectConvComputeKernelInfo ClDirectConvDefaultConfigBifrost::configure_default_f32(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info)
145 {
146     DirectConvComputeKernelInfo desc;
147 
148     if(src->data_layout() == DataLayout::NHWC)
149     {
150         // Get the output shape
151         TensorShape output_shape = misc::shape_calculator::compute_deep_convolution_shape(*src, *wei, conv_info);
152 
153         desc.n0 = 4;
154 
155         if(output_shape[0] > 16)
156         {
157             desc.m0 = 2;
158         }
159 
160         desc.k0 = 8;
161 
162         desc.export_weights_to_cl_image = export_to_cl_image(wei);
163     }
164 
165     return desc;
166 }
167 
configure_default_f16(const ITensorInfo * src,const ITensorInfo * wei,const PadStrideInfo & conv_info)168 DirectConvComputeKernelInfo ClDirectConvDefaultConfigBifrost::configure_default_f16(const ITensorInfo *src, const ITensorInfo *wei, const PadStrideInfo &conv_info)
169 {
170     DirectConvComputeKernelInfo desc;
171 
172     if(src->data_layout() == DataLayout::NHWC)
173     {
174         // Get the output shape
175         TensorShape output_shape = misc::shape_calculator::compute_deep_convolution_shape(*src, *wei, conv_info);
176 
177         desc.n0 = 4;
178 
179         if(output_shape[0] > 16)
180         {
181             desc.m0 = 4;
182         }
183 
184         desc.k0 = 8;
185 
186         desc.export_weights_to_cl_image = export_to_cl_image(wei);
187     }
188 
189     return desc;
190 }
191 } // namespace opencl
192 } // namespace arm_compute
193