xref: /aosp_15_r20/external/ComputeLibrary/src/gpu/cl/ClKernelLibrary.cpp (revision c217d954acce2dbc11938adb493fc0abd69584f3)
1*c217d954SCole Faust /*
2*c217d954SCole Faust  * Copyright (c) 2016-2022 Arm Limited.
3*c217d954SCole Faust  *
4*c217d954SCole Faust  * SPDX-License-Identifier: MIT
5*c217d954SCole Faust  *
6*c217d954SCole Faust  * Permission is hereby granted, free of charge, to any person obtaining a copy
7*c217d954SCole Faust  * of this software and associated documentation files (the "Software"), to
8*c217d954SCole Faust  * deal in the Software without restriction, including without limitation the
9*c217d954SCole Faust  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10*c217d954SCole Faust  * sell copies of the Software, and to permit persons to whom the Software is
11*c217d954SCole Faust  * furnished to do so, subject to the following conditions:
12*c217d954SCole Faust  *
13*c217d954SCole Faust  * The above copyright notice and this permission notice shall be included in all
14*c217d954SCole Faust  * copies or substantial portions of the Software.
15*c217d954SCole Faust  *
16*c217d954SCole Faust  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17*c217d954SCole Faust  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18*c217d954SCole Faust  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19*c217d954SCole Faust  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20*c217d954SCole Faust  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21*c217d954SCole Faust  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22*c217d954SCole Faust  * SOFTWARE.
23*c217d954SCole Faust  */
24*c217d954SCole Faust #include "src/gpu/cl/ClKernelLibrary.h"
25*c217d954SCole Faust 
26*c217d954SCole Faust #include "arm_compute/core/Error.h"
27*c217d954SCole Faust #include "arm_compute/core/Utils.h"
28*c217d954SCole Faust 
29*c217d954SCole Faust #include <algorithm>
30*c217d954SCole Faust #include <array>
31*c217d954SCole Faust #include <fstream>
32*c217d954SCole Faust #include <utility>
33*c217d954SCole Faust 
34*c217d954SCole Faust #ifdef ARM_COMPUTE_COMPRESSED_KERNELS
35*c217d954SCole Faust #include <zlib.h>
36*c217d954SCole Faust 
37*c217d954SCole Faust namespace
38*c217d954SCole Faust {
39*c217d954SCole Faust /* Decoding table */
40*c217d954SCole Faust constexpr std::array<uint8_t, 256> b64_invtab =
41*c217d954SCole Faust {
42*c217d954SCole Faust     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
43*c217d954SCole Faust     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
44*c217d954SCole Faust     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 62, 0, 0, 0, 63,
45*c217d954SCole Faust     52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 0, 0, 0, 0, 0, 0,
46*c217d954SCole Faust     0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
47*c217d954SCole Faust     15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 0, 0, 0, 0, 0,
48*c217d954SCole Faust     0, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
49*c217d954SCole Faust     41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 0, 0, 0, 0, 0,
50*c217d954SCole Faust     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
51*c217d954SCole Faust     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
52*c217d954SCole Faust     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
53*c217d954SCole Faust     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
54*c217d954SCole Faust     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
55*c217d954SCole Faust     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
56*c217d954SCole Faust     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
57*c217d954SCole Faust     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
58*c217d954SCole Faust };
59*c217d954SCole Faust 
60*c217d954SCole Faust /** Decode a base64 encoded string
61*c217d954SCole Faust  *
62*c217d954SCole Faust  * @param[in] str Base64 encoded string to decode
63*c217d954SCole Faust  *
64*c217d954SCole Faust  * @return The decode string in case of a valid, non-empty string otherwise an empty string
65*c217d954SCole Faust  */
decode_base64(const std::string & str)66*c217d954SCole Faust std::string decode_base64(const std::string &str)
67*c217d954SCole Faust {
68*c217d954SCole Faust     constexpr const char pad_char = '=';
69*c217d954SCole Faust 
70*c217d954SCole Faust     // Handle empty string
71*c217d954SCole Faust     if(str.empty())
72*c217d954SCole Faust     {
73*c217d954SCole Faust         return {};
74*c217d954SCole Faust     }
75*c217d954SCole Faust 
76*c217d954SCole Faust     // Base64 encoded string has size multiple of 4
77*c217d954SCole Faust     if(str.length() % 4)
78*c217d954SCole Faust     {
79*c217d954SCole Faust         return {};
80*c217d954SCole Faust     }
81*c217d954SCole Faust 
82*c217d954SCole Faust     //
83*c217d954SCole Faust     // Check encoded string padding
84*c217d954SCole Faust     std::size_t padding = (str.rbegin()[0] == pad_char) + (str.rbegin()[1] == pad_char);
85*c217d954SCole Faust     const int   str_len = str.size();
86*c217d954SCole Faust 
87*c217d954SCole Faust     // Reserve memory for the decoded string
88*c217d954SCole Faust     // Note each 4 consecutive elements of 6-bit encode 3 bytes
89*c217d954SCole Faust     std::string dec_b64;
90*c217d954SCole Faust     dec_b64.reserve(((str_len / 4) * 3));
91*c217d954SCole Faust 
92*c217d954SCole Faust     // Block decoding function (exclude padding)
93*c217d954SCole Faust     int       c   = 0;
94*c217d954SCole Faust     const int end = str_len - 4 - padding;
95*c217d954SCole Faust     for(; c <= end; c += 4)
96*c217d954SCole Faust     {
97*c217d954SCole Faust         const int byte0 = b64_invtab[str[c]];
98*c217d954SCole Faust         const int byte1 = b64_invtab[str[c + 1]];
99*c217d954SCole Faust         const int byte2 = b64_invtab[str[c + 2]];
100*c217d954SCole Faust         const int byte3 = b64_invtab[str[c + 3]];
101*c217d954SCole Faust 
102*c217d954SCole Faust         dec_b64.push_back((byte0 << 2) | (byte1 >> 4));
103*c217d954SCole Faust         dec_b64.push_back((byte1 << 4) | (byte2 >> 2));
104*c217d954SCole Faust         dec_b64.push_back((byte2 << 6) | (byte3));
105*c217d954SCole Faust     }
106*c217d954SCole Faust 
107*c217d954SCole Faust     // Last step that might contain padding symbols
108*c217d954SCole Faust     if(padding == 1)
109*c217d954SCole Faust     {
110*c217d954SCole Faust         const int byte0 = b64_invtab[str[c]];
111*c217d954SCole Faust         const int byte1 = b64_invtab[str[c + 1]];
112*c217d954SCole Faust         const int byte2 = b64_invtab[str[c + 2]];
113*c217d954SCole Faust 
114*c217d954SCole Faust         dec_b64.push_back((byte0 << 2) | (byte1 >> 4));
115*c217d954SCole Faust         dec_b64.push_back((byte1 << 4) | (byte2 >> 2));
116*c217d954SCole Faust     }
117*c217d954SCole Faust     else if(padding == 2)
118*c217d954SCole Faust     {
119*c217d954SCole Faust         const int byte0 = b64_invtab[str[c]];
120*c217d954SCole Faust         const int byte1 = b64_invtab[str[c + 1]];
121*c217d954SCole Faust 
122*c217d954SCole Faust         dec_b64.push_back((byte0 << 2) | (byte1 >> 4));
123*c217d954SCole Faust     }
124*c217d954SCole Faust 
125*c217d954SCole Faust     return dec_b64;
126*c217d954SCole Faust }
127*c217d954SCole Faust 
128*c217d954SCole Faust /** Decompress a zlib compressed string
129*c217d954SCole Faust  *
130*c217d954SCole Faust  * @param[in] str ZLib compressed string
131*c217d954SCole Faust  *
132*c217d954SCole Faust  * @return The decompressed string if successful, otherwise false.
133*c217d954SCole Faust  */
decompress_zlib(const std::string & str)134*c217d954SCole Faust std::string decompress_zlib(const std::string &str)
135*c217d954SCole Faust {
136*c217d954SCole Faust     // Create and initialize decompression stream
137*c217d954SCole Faust     z_stream ds{};
138*c217d954SCole Faust     if(inflateInit(&ds) != Z_OK)
139*c217d954SCole Faust     {
140*c217d954SCole Faust         return std::string();
141*c217d954SCole Faust     }
142*c217d954SCole Faust     ds.avail_in = str.size();
143*c217d954SCole Faust     ds.next_in  = (Bytef *)str.data();
144*c217d954SCole Faust 
145*c217d954SCole Faust     // Roll-over the string using a buffer and decompress
146*c217d954SCole Faust     int         status = Z_OK;
147*c217d954SCole Faust     char        roll_buff[16384];
148*c217d954SCole Faust     std::string inflated_str;
149*c217d954SCole Faust     do
150*c217d954SCole Faust     {
151*c217d954SCole Faust         ds.avail_out = sizeof(roll_buff);
152*c217d954SCole Faust         ds.next_out  = reinterpret_cast<Bytef *>(roll_buff);
153*c217d954SCole Faust 
154*c217d954SCole Faust         status = inflate(&ds, 0);
155*c217d954SCole Faust         if(inflated_str.size() < ds.total_out)
156*c217d954SCole Faust         {
157*c217d954SCole Faust             inflated_str.append(roll_buff, ds.total_out - inflated_str.size());
158*c217d954SCole Faust         }
159*c217d954SCole Faust     }
160*c217d954SCole Faust     while(status == Z_OK);
161*c217d954SCole Faust 
162*c217d954SCole Faust     // Finalize decompression stream
163*c217d954SCole Faust     inflateEnd(&ds);
164*c217d954SCole Faust     if(status != Z_STREAM_END)
165*c217d954SCole Faust     {
166*c217d954SCole Faust         return std::string();
167*c217d954SCole Faust     }
168*c217d954SCole Faust 
169*c217d954SCole Faust     return inflated_str;
170*c217d954SCole Faust }
171*c217d954SCole Faust } // namespace
172*c217d954SCole Faust #endif /* ARM_COMPUTE_COMPRESSED_KERNELS */
173*c217d954SCole Faust 
174*c217d954SCole Faust namespace arm_compute
175*c217d954SCole Faust {
176*c217d954SCole Faust namespace opencl
177*c217d954SCole Faust {
178*c217d954SCole Faust const std::map<std::string, std::string> ClKernelLibrary::_kernel_program_map =
179*c217d954SCole Faust {
180*c217d954SCole Faust     // Common Kernels
181*c217d954SCole Faust     { "activation_layer", "common/activation_layer.cl" },
182*c217d954SCole Faust     { "activation_layer_quant", "common/activation_layer_quant.cl" },
183*c217d954SCole Faust     { "activation_layer_quant_f32", "common/activation_layer_quant.cl" },
184*c217d954SCole Faust     { "arg_min_max_x", "common/arg_min_max.cl" },
185*c217d954SCole Faust     { "arg_min_max_y", "common/arg_min_max.cl" },
186*c217d954SCole Faust     { "arg_min_max_z", "common/arg_min_max.cl" },
187*c217d954SCole Faust     { "arg_min_max_w", "common/arg_min_max.cl" },
188*c217d954SCole Faust     { "bitwise_or", "common/bitwise_op.cl" },
189*c217d954SCole Faust     { "bitwise_and", "common/bitwise_op.cl" },
190*c217d954SCole Faust     { "bitwise_xor", "common/bitwise_op.cl" },
191*c217d954SCole Faust     { "bitwise_not", "common/bitwise_op.cl" },
192*c217d954SCole Faust     { "bounding_box_transform", "common/bounding_box_transform.cl" },
193*c217d954SCole Faust     { "bounding_box_transform_quantized", "common/bounding_box_transform_quantized.cl" },
194*c217d954SCole Faust     { "compare_equal", "common/comparisons.cl" },
195*c217d954SCole Faust     { "compare_equal_quantized", "common/comparisons.cl" },
196*c217d954SCole Faust     { "compare_notequal", "common/comparisons.cl" },
197*c217d954SCole Faust     { "compare_notequal_quantized", "common/comparisons.cl" },
198*c217d954SCole Faust     { "compare_greater", "common/comparisons.cl" },
199*c217d954SCole Faust     { "compare_greater_quantized", "common/comparisons.cl" },
200*c217d954SCole Faust     { "compare_greaterequal", "common/comparisons.cl" },
201*c217d954SCole Faust     { "compare_greaterequal_quantized", "common/comparisons.cl" },
202*c217d954SCole Faust     { "compare_less", "common/comparisons.cl" },
203*c217d954SCole Faust     { "compare_less_quantized", "common/comparisons.cl" },
204*c217d954SCole Faust     { "compare_lessequal", "common/comparisons.cl" },
205*c217d954SCole Faust     { "compare_lessequal_quantized", "common/comparisons.cl" },
206*c217d954SCole Faust     { "concatenate", "common/concatenate.cl" },
207*c217d954SCole Faust     { "concatenate_width", "common/concatenate.cl" },
208*c217d954SCole Faust     { "concatenate_height", "common/concatenate.cl" },
209*c217d954SCole Faust     { "concatenate_width_x2", "common/concatenate.cl" },
210*c217d954SCole Faust     { "concatenate_width_x4", "common/concatenate.cl" },
211*c217d954SCole Faust     { "col2im", "common/col2im.cl" },
212*c217d954SCole Faust     { "cast_down", "common/cast.cl" },
213*c217d954SCole Faust     { "cast_up", "common/cast.cl" },
214*c217d954SCole Faust     { "convert_fc_weights", "common/convert_fc_weights.cl" },
215*c217d954SCole Faust     { "copy_tensor", "common/copy_tensor.cl" },
216*c217d954SCole Faust     { "crop_tensor", "common/crop_tensor.cl" },
217*c217d954SCole Faust     { "deconvolution_reshape", "common/deconvolution_layer.cl" },
218*c217d954SCole Faust     { "deconvolution_upsample", "common/deconvolution_layer.cl" },
219*c217d954SCole Faust     { "dequantization_layer", "common/dequantization_layer.cl" },
220*c217d954SCole Faust     { "elementwise_operation_ADD", "common/elementwise_operation.cl" },
221*c217d954SCole Faust     { "elementwise_operation_SUB", "common/elementwise_operation.cl" },
222*c217d954SCole Faust     { "elementwise_operation_MAX", "common/elementwise_operation.cl" },
223*c217d954SCole Faust     { "elementwise_operation_MIN", "common/elementwise_operation.cl" },
224*c217d954SCole Faust     { "elementwise_operation_DIV", "common/elementwise_operation.cl" },
225*c217d954SCole Faust     { "elementwise_operation_SQUARED_DIFF", "common/elementwise_operation.cl" },
226*c217d954SCole Faust     { "elementwise_operation_POWER", "common/elementwise_operation.cl" },
227*c217d954SCole Faust     { "elementwise_operation_PRELU", "common/elementwise_operation.cl" },
228*c217d954SCole Faust     { "elementwise_operation_AND", "common/elementwise_operation.cl" },
229*c217d954SCole Faust     { "elementwise_operation_OR", "common/elementwise_operation.cl" },
230*c217d954SCole Faust     { "elementwise_operation_ADD_quantized", "common/elementwise_operation_quantized.cl" },
231*c217d954SCole Faust     { "elementwise_operation_SUB_quantized", "common/elementwise_operation_quantized.cl" },
232*c217d954SCole Faust     { "elementwise_operation_MAX_quantized", "common/elementwise_operation_quantized.cl" },
233*c217d954SCole Faust     { "elementwise_operation_MIN_quantized", "common/elementwise_operation_quantized.cl" },
234*c217d954SCole Faust     { "elementwise_operation_DIV_quantized", "common/elementwise_operation_quantized.cl" },
235*c217d954SCole Faust     { "elementwise_operation_SQUARED_DIFF_quantized", "common/elementwise_operation_quantized.cl" },
236*c217d954SCole Faust     { "elementwise_operation_PRELU_quantized", "common/elementwise_operation_quantized.cl" },
237*c217d954SCole Faust     { "elementwise_unary", "common/elementwise_unary.cl" },
238*c217d954SCole Faust     { "fft_digit_reverse_axis_0", "common/fft_digit_reverse.cl" },
239*c217d954SCole Faust     { "fft_digit_reverse_axis_1", "common/fft_digit_reverse.cl" },
240*c217d954SCole Faust     { "fft_radix_2_first_stage_axis_0", "common/fft.cl" },
241*c217d954SCole Faust     { "fft_radix_2_first_stage_axis_1", "common/fft.cl" },
242*c217d954SCole Faust     { "fft_radix_2_axis_0", "common/fft.cl" },
243*c217d954SCole Faust     { "fft_radix_2_axis_1", "common/fft.cl" },
244*c217d954SCole Faust     { "fft_radix_3_first_stage_axis_0", "common/fft.cl" },
245*c217d954SCole Faust     { "fft_radix_3_first_stage_axis_1", "common/fft.cl" },
246*c217d954SCole Faust     { "fft_radix_3_axis_0", "common/fft.cl" },
247*c217d954SCole Faust     { "fft_radix_3_axis_1", "common/fft.cl" },
248*c217d954SCole Faust     { "fft_radix_4_first_stage_axis_0", "common/fft.cl" },
249*c217d954SCole Faust     { "fft_radix_4_first_stage_axis_1", "common/fft.cl" },
250*c217d954SCole Faust     { "fft_radix_4_axis_0", "common/fft.cl" },
251*c217d954SCole Faust     { "fft_radix_4_axis_1", "common/fft.cl" },
252*c217d954SCole Faust     { "fft_radix_5_first_stage_axis_0", "common/fft.cl" },
253*c217d954SCole Faust     { "fft_radix_5_first_stage_axis_1", "common/fft.cl" },
254*c217d954SCole Faust     { "fft_radix_5_axis_0", "common/fft.cl" },
255*c217d954SCole Faust     { "fft_radix_5_axis_1", "common/fft.cl" },
256*c217d954SCole Faust     { "fft_radix_7_first_stage_axis_0", "common/fft.cl" },
257*c217d954SCole Faust     { "fft_radix_7_first_stage_axis_1", "common/fft.cl" },
258*c217d954SCole Faust     { "fft_radix_7_axis_0", "common/fft.cl" },
259*c217d954SCole Faust     { "fft_radix_7_axis_1", "common/fft.cl" },
260*c217d954SCole Faust     { "fft_radix_8_first_stage_axis_0", "common/fft.cl" },
261*c217d954SCole Faust     { "fft_radix_8_first_stage_axis_1", "common/fft.cl" },
262*c217d954SCole Faust     { "fft_radix_8_axis_0", "common/fft.cl" },
263*c217d954SCole Faust     { "fft_radix_8_axis_1", "common/fft.cl" },
264*c217d954SCole Faust     { "fft_scale_conj", "common/fft_scale.cl" },
265*c217d954SCole Faust     { "fill_image_borders_constant", "common/fill_border.cl" },
266*c217d954SCole Faust     { "fill_image_borders_replicate", "common/fill_border.cl" },
267*c217d954SCole Faust     { "floor_layer", "common/floor.cl" },
268*c217d954SCole Faust     { "fuse_batchnormalization_layer", "common/batchnormalization_layer.cl" },
269*c217d954SCole Faust     { "gather", "common/gather.cl" },
270*c217d954SCole Faust     { "gemm_ma_f16", "common/gemm.cl" },
271*c217d954SCole Faust     { "gemm_ma_f32", "common/gemm.cl" },
272*c217d954SCole Faust     { "gemm_mv", "common/gemv.cl" },
273*c217d954SCole Faust     { "gemm_mv_quantized", "common/gemv.cl" },
274*c217d954SCole Faust     { "gemm_mm_native", "common/gemm.cl" },
275*c217d954SCole Faust     { "gemm_mm_reshaped_only_rhs_nt_mmul", "common/gemm_reshaped_only_rhs_mmul.cl" },
276*c217d954SCole Faust     { "gemm_mm_reshaped_only_rhs_nt_mmul_texture", "common/gemm_reshaped_only_rhs_mmul.cl" },
277*c217d954SCole Faust     { "gemm_mm_native_post_act_eltwise_op_act", "common/experimental/gemm_fused_post_ops/act_eltwise_op_act/gemm_mm_native.cl" },
278*c217d954SCole Faust     { "gemm_mm_reshaped_lhs_nt_rhs_t", "common/gemm.cl" },
279*c217d954SCole Faust     { "gemm_mm_reshaped_lhs_nt_rhs_t_texture", "common/gemm.cl" },
280*c217d954SCole Faust     { "gemm_mm_reshaped_lhs_t_rhs_nt", "common/gemm.cl" },
281*c217d954SCole Faust     { "gemm_mm_reshaped_lhs_t_rhs_nt_texture", "common/gemm.cl" },
282*c217d954SCole Faust     { "gemm_mm_reshaped_lhs_nt_rhs_t_post_act_eltwise_op_act", "common/experimental/gemm_fused_post_ops/act_eltwise_op_act/gemm_mm_reshaped.cl" },
283*c217d954SCole Faust     { "gemm_mm_reshaped_lhs_nt_rhs_t_texture_post_act_eltwise_op_act", "common/experimental/gemm_fused_post_ops/act_eltwise_op_act/gemm_mm_reshaped.cl" },
284*c217d954SCole Faust     { "gemm_mm_reshaped_lhs_t_rhs_nt_post_act_eltwise_op_act", "common/experimental/gemm_fused_post_ops/act_eltwise_op_act/gemm_mm_reshaped.cl" },
285*c217d954SCole Faust     { "gemm_mm_reshaped_lhs_t_rhs_nt_texture_post_act_eltwise_op_act", "common/experimental/gemm_fused_post_ops/act_eltwise_op_act/gemm_mm_reshaped.cl" },
286*c217d954SCole Faust     { "gemm_mm_reshaped_only_rhs_nt", "common/gemm.cl" },
287*c217d954SCole Faust     { "gemm_mm_reshaped_only_rhs_nt_texture", "common/gemm.cl" },
288*c217d954SCole Faust     { "gemm_mm_reshaped_only_rhs_t", "common/gemm.cl" },
289*c217d954SCole Faust     { "gemm_mm_reshaped_only_rhs_t_texture", "common/gemm.cl" },
290*c217d954SCole Faust     { "gemm_mm_reshaped_only_rhs_nt_post_act_eltwise_op_act", "common/experimental/gemm_fused_post_ops/act_eltwise_op_act/gemm_mm_reshaped_only_rhs.cl" },
291*c217d954SCole Faust     { "gemm_mm_reshaped_only_rhs_nt_texture_post_act_eltwise_op_act", "common/experimental/gemm_fused_post_ops/act_eltwise_op_act/gemm_mm_reshaped_only_rhs.cl" },
292*c217d954SCole Faust     { "gemm_mm_reshaped_only_rhs_t_post_act_eltwise_op_act", "common/experimental/gemm_fused_post_ops/act_eltwise_op_act/gemm_mm_reshaped_only_rhs.cl" },
293*c217d954SCole Faust     { "gemm_mm_reshaped_only_rhs_t_texture_post_act_eltwise_op_act", "common/experimental/gemm_fused_post_ops/act_eltwise_op_act/gemm_mm_reshaped_only_rhs.cl" },
294*c217d954SCole Faust     { "gemm_lc_vm_f32", "common/gemm.cl" },
295*c217d954SCole Faust     { "gemm_reshape_lhs_matrix_nt", "common/gemm_utils.cl" },
296*c217d954SCole Faust     { "gemm_reshape_lhs_matrix_t", "common/gemm_utils.cl" },
297*c217d954SCole Faust     { "gemm_reshape_rhs_matrix_nt", "common/gemm_utils.cl" },
298*c217d954SCole Faust     { "gemm_reshape_rhs_matrix_t", "common/gemm_utils.cl" },
299*c217d954SCole Faust     { "gemmlowp_matrix_a_reduction", "common/gemmlowp.cl" },
300*c217d954SCole Faust     { "gemmlowp_matrix_a_reduction_dot8", "common/gemmlowp.cl" },
301*c217d954SCole Faust     { "gemmlowp_matrix_b_reduction", "common/gemmlowp.cl" },
302*c217d954SCole Faust     { "gemmlowp_mm_native", "common/gemmlowp.cl" },
303*c217d954SCole Faust     { "gemmlowp_mm_reshaped_lhs_nt_rhs_t", "common/gemmlowp.cl" },
304*c217d954SCole Faust     { "gemmlowp_mm_reshaped_only_rhs_t", "common/gemmlowp.cl" },
305*c217d954SCole Faust     { "gemmlowp_mm_reshaped_only_rhs_t_fused_output_stage_fixedpoint", "common/gemmlowp.cl" },
306*c217d954SCole Faust     { "gemmlowp_mm_reshaped_only_rhs_mmul", "common/gemmlowp_reshaped_only_rhs_mmul.cl" },
307*c217d954SCole Faust     { "gemmlowp_offset_contribution", "common/gemmlowp.cl" },
308*c217d954SCole Faust     { "gemmlowp_offset_contribution_quantize_down", "common/gemmlowp.cl" },
309*c217d954SCole Faust     { "gemmlowp_offset_contribution_quantize_down_fixedpoint", "common/gemmlowp.cl" },
310*c217d954SCole Faust     { "gemmlowp_output_stage_quantize_down", "common/gemmlowp.cl" },
311*c217d954SCole Faust     { "gemmlowp_output_stage_quantize_down_fixedpoint", "common/gemmlowp.cl" },
312*c217d954SCole Faust     { "gemmlowp_output_stage_quantize_down_fixedpoint_qsymm16", "common/gemmlowp.cl" },
313*c217d954SCole Faust     { "gemmlowp_output_stage_quantize_down_float", "common/gemmlowp.cl" },
314*c217d954SCole Faust     { "generate_proposals_compute_all_anchors", "common/generate_proposals.cl" },
315*c217d954SCole Faust     { "generate_proposals_compute_all_anchors_quantized", "common/generate_proposals_quantized.cl" },
316*c217d954SCole Faust     { "instance_normalization", "common/instance_normalization.cl" },
317*c217d954SCole Faust     { "compute_mean_var", "common/instance_normalization.cl" },
318*c217d954SCole Faust     { "l2_normalize_x", "common/l2_normalize.cl" },
319*c217d954SCole Faust     { "l2_normalize_y", "common/l2_normalize.cl" },
320*c217d954SCole Faust     { "l2_normalize_z", "common/l2_normalize.cl" },
321*c217d954SCole Faust     { "max_unpooling_layer_2", "common/unpooling_layer.cl" },
322*c217d954SCole Faust     { "mean_stddev_normalization", "common/mean_stddev_normalization.cl" },
323*c217d954SCole Faust     { "memset", "common/memset.cl" },
324*c217d954SCole Faust     { "minmax_layer", "common/minmax_layer.cl" },
325*c217d954SCole Faust     { "non_max_suppression", "common/nonmax.cl" },
326*c217d954SCole Faust     { "pad_layer_constant", "common/pad_layer.cl" },
327*c217d954SCole Faust     { "pad_layer_symmetric_reflect", "common/pad_layer.cl" },
328*c217d954SCole Faust     { "permute", "common/permute.cl" },
329*c217d954SCole Faust     { "pixelwise_mul_complex", "common/pixelwise_mul_float.cl" },
330*c217d954SCole Faust     { "pixelwise_mul_float", "common/pixelwise_mul_float.cl" },
331*c217d954SCole Faust     { "pixelwise_mul_int", "common/pixelwise_mul_int.cl" },
332*c217d954SCole Faust     { "pixelwise_mul_quantized", "common/pixelwise_mul_int.cl" },
333*c217d954SCole Faust     { "qlstm_layer_normalization", "common/qlstm_layer_normalization.cl" },
334*c217d954SCole Faust     { "quantization_layer", "common/quantization_layer.cl" },
335*c217d954SCole Faust     { "range", "common/range.cl" },
336*c217d954SCole Faust     { "range_quantized", "common/range.cl" },
337*c217d954SCole Faust     { "reduction_operation_x", "common/reduction_operation.cl" },
338*c217d954SCole Faust     { "reduction_operation_non_parallel_x", "common/reduction_operation.cl" },
339*c217d954SCole Faust     { "reduction_operation_y", "common/reduction_operation.cl" },
340*c217d954SCole Faust     { "reduction_operation_z", "common/reduction_operation.cl" },
341*c217d954SCole Faust     { "reduction_operation_w", "common/reduction_operation.cl" },
342*c217d954SCole Faust     { "reshape_layer", "common/reshape_layer.cl" },
343*c217d954SCole Faust     { "reshape_to_columns", "common/convolution_layer.cl" },
344*c217d954SCole Faust     { "reverse", "common/reverse.cl" },
345*c217d954SCole Faust     { "roi_align_layer", "common/roi_align_layer.cl" },
346*c217d954SCole Faust     { "roi_align_layer_quantized", "common/roi_align_layer_quantized.cl" },
347*c217d954SCole Faust     { "roi_pooling_layer", "common/roi_pooling_layer.cl" },
348*c217d954SCole Faust     { "select_same_rank", "common/select.cl" },
349*c217d954SCole Faust     { "select_different_rank_2", "common/select.cl" },
350*c217d954SCole Faust     { "select_different_rank_n", "common/select.cl" },
351*c217d954SCole Faust     { "softmax_layer_norm", "common/softmax_layer.cl" },
352*c217d954SCole Faust     { "softmax_layer_norm_quantized", "common/softmax_layer_quantized.cl" },
353*c217d954SCole Faust     { "softmax_layer_max_shift_exp_sum_quantized_serial", "common/softmax_layer_quantized.cl" },
354*c217d954SCole Faust     { "softmax_layer_max_shift_exp_sum_quantized_parallel", "common/softmax_layer_quantized.cl" },
355*c217d954SCole Faust     { "softmax_layer_max_shift_exp_sum_serial", "common/softmax_layer.cl" },
356*c217d954SCole Faust     { "softmax_layer_max_shift_exp_sum_parallel", "common/softmax_layer.cl" },
357*c217d954SCole Faust     { "stack_layer", "common/stack_layer.cl" },
358*c217d954SCole Faust     { "strided_slice", "common/slice_ops.cl" },
359*c217d954SCole Faust     { "tile", "common/tile.cl" },
360*c217d954SCole Faust     { "transpose", "common/transpose.cl" },
361*c217d954SCole Faust #ifdef ENABLE_NCHW_KERNELS
362*c217d954SCole Faust     { "batch_to_space_nchw", "nchw/batch_to_space.cl" },
363*c217d954SCole Faust     { "batch_to_space_static_nchw", "nchw/batch_to_space.cl" },
364*c217d954SCole Faust     { "batchnormalization_layer_nchw", "nchw/batchnormalization_layer.cl" },
365*c217d954SCole Faust     { "channel_shuffle_nchw", "nchw/channel_shuffle.cl" },
366*c217d954SCole Faust     { "depth_to_space_nchw", "nchw/depth_to_space.cl" },
367*c217d954SCole Faust     { "dequantization_layer_per_channel_nchw", "nchw/dequantization_layer.cl" },
368*c217d954SCole Faust     { "direct_convolution1x1", "nchw/direct_convolution1x1.cl" },
369*c217d954SCole Faust     { "direct_convolution_nchw", "nchw/direct_convolution.cl" },
370*c217d954SCole Faust 
371*c217d954SCole Faust     { "im2col1x1_stridex1_nchw", "nchw/im2col.cl" },
372*c217d954SCole Faust     { "im2col3x3_nchw", "nchw/im2col.cl" },
373*c217d954SCole Faust     { "im2col5x5_nchw", "nchw/im2col.cl" },
374*c217d954SCole Faust     { "im2col11x11_padx0_pady0_nchw", "nchw/im2col.cl" },
375*c217d954SCole Faust     { "im2col_generic_nchw", "nchw/im2col.cl" },
376*c217d954SCole Faust     { "im2col_generic_padx0_pady0_nchw", "nchw/im2col.cl" },
377*c217d954SCole Faust     { "normalization_layer_cross_map_nchw", "nchw/normalization_layer.cl" },
378*c217d954SCole Faust     { "normalization_layer_in_map_nchw", "nchw/normalization_layer.cl" },
379*c217d954SCole Faust     { "normalize_planar_yuv_layer_nchw", "nchw/normalize_planar_yuv_layer.cl" },
380*c217d954SCole Faust     { "normalize_planar_yuv_layer_q8_nchw", "nchw/normalize_planar_yuv_layer_quantized.cl" },
381*c217d954SCole Faust     { "pooling_layer_MxN_nchw", "nchw/pooling_layer.cl" },
382*c217d954SCole Faust     { "pooling_layer_2_nchw_indices", "nchw/pooling_layer.cl" },
383*c217d954SCole Faust     { "prior_box_layer_nchw", "nchw/prior_box_layer.cl" },
384*c217d954SCole Faust     { "reorg_layer_nchw", "nchw/reorg_layer.cl" },
385*c217d954SCole Faust     { "scale_nearest_neighbour_nchw", "nchw/scale.cl" },
386*c217d954SCole Faust     { "scale_bilinear_nchw", "nchw/scale.cl" },
387*c217d954SCole Faust     { "space_to_batch_nchw", "nchw/space_to_batch.cl" },
388*c217d954SCole Faust     { "space_to_batch_static_nchw", "nchw/space_to_batch.cl" },
389*c217d954SCole Faust     { "space_to_depth_nchw", "nchw/space_to_depth.cl" },
390*c217d954SCole Faust     { "upsample_layer_nchw", "nchw/upsample_layer.cl" },
391*c217d954SCole Faust     { "winograd_filter_transform_2x2_3x3_nchw", "nchw/winograd_filter_transform.cl" },
392*c217d954SCole Faust     { "winograd_filter_transform_2x1_3x1_nchw", "nchw/winograd_filter_transform.cl" },
393*c217d954SCole Faust     { "winograd_filter_transform_1x2_1x3_nchw", "nchw/winograd_filter_transform.cl" },
394*c217d954SCole Faust     { "winograd_filter_transform_4x4_3x3_nchw", "nchw/winograd_filter_transform.cl" },
395*c217d954SCole Faust     { "winograd_filter_transform_4x1_3x1_nchw", "nchw/winograd_filter_transform.cl" },
396*c217d954SCole Faust     { "winograd_filter_transform_1x4_1x3_nchw", "nchw/winograd_filter_transform.cl" },
397*c217d954SCole Faust     { "winograd_filter_transform_4x4_5x5_nchw", "nchw/winograd_filter_transform.cl" },
398*c217d954SCole Faust     { "winograd_filter_transform_4x1_5x1_nchw", "nchw/winograd_filter_transform.cl" },
399*c217d954SCole Faust     { "winograd_filter_transform_1x4_1x5_nchw", "nchw/winograd_filter_transform.cl" },
400*c217d954SCole Faust     { "winograd_input_transform_2x2_3x3_stepz1_nchw", "nchw/winograd_input_transform.cl" },
401*c217d954SCole Faust     { "winograd_input_transform_2x2_3x3_stepz2_nchw", "nchw/winograd_input_transform.cl" },
402*c217d954SCole Faust     { "winograd_input_transform_2x1_3x1_stepz1_nchw", "nchw/winograd_input_transform.cl" },
403*c217d954SCole Faust     { "winograd_input_transform_2x1_3x1_stepz2_nchw", "nchw/winograd_input_transform.cl" },
404*c217d954SCole Faust     { "winograd_input_transform_1x2_1x3_stepz1_nchw", "nchw/winograd_input_transform.cl" },
405*c217d954SCole Faust     { "winograd_input_transform_1x2_1x3_stepz2_nchw", "nchw/winograd_input_transform.cl" },
406*c217d954SCole Faust     { "winograd_input_transform_4x4_3x3_stepz1_nchw", "nchw/winograd_input_transform.cl" },
407*c217d954SCole Faust     { "winograd_input_transform_4x1_3x1_stepz1_nchw", "nchw/winograd_input_transform.cl" },
408*c217d954SCole Faust     { "winograd_input_transform_1x4_1x3_stepz1_nchw", "nchw/winograd_input_transform.cl" },
409*c217d954SCole Faust     { "winograd_input_transform_4x4_5x5_stepz1_nchw", "nchw/winograd_input_transform.cl" },
410*c217d954SCole Faust     { "winograd_input_transform_4x1_5x1_stepz1_nchw", "nchw/winograd_input_transform.cl" },
411*c217d954SCole Faust     { "winograd_input_transform_1x4_1x5_stepz1_nchw", "nchw/winograd_input_transform.cl" },
412*c217d954SCole Faust     { "winograd_output_transform_2x2_3x3_nchw", "nchw/winograd_output_transform.cl" },
413*c217d954SCole Faust     { "winograd_output_transform_2x1_3x1_nchw", "nchw/winograd_output_transform.cl" },
414*c217d954SCole Faust     { "winograd_output_transform_1x2_1x3_nchw", "nchw/winograd_output_transform.cl" },
415*c217d954SCole Faust     { "winograd_output_transform_4x4_3x3_nchw", "nchw/winograd_output_transform.cl" },
416*c217d954SCole Faust     { "winograd_output_transform_4x1_3x1_nchw", "nchw/winograd_output_transform.cl" },
417*c217d954SCole Faust     { "winograd_output_transform_1x4_1x3_nchw", "nchw/winograd_output_transform.cl" },
418*c217d954SCole Faust     { "winograd_output_transform_4x4_5x5_nchw", "nchw/winograd_output_transform.cl" },
419*c217d954SCole Faust     { "winograd_output_transform_4x1_5x1_nchw", "nchw/winograd_output_transform.cl" },
420*c217d954SCole Faust     { "winograd_output_transform_1x4_1x5_nchw", "nchw/winograd_output_transform.cl" },
421*c217d954SCole Faust #endif /* ENABLE_NCHW_KERNELS */
422*c217d954SCole Faust #ifdef ENABLE_NHWC_KERNELS
423*c217d954SCole Faust     { "batch_to_space_nhwc", "nhwc/batch_to_space.cl" },
424*c217d954SCole Faust     { "batch_to_space_static_nhwc", "nhwc/batch_to_space.cl" },
425*c217d954SCole Faust     { "batchnormalization_layer_nhwc", "nhwc/batchnormalization_layer.cl" },
426*c217d954SCole Faust     { "channel_shuffle_nhwc", "nhwc/channel_shuffle.cl" },
427*c217d954SCole Faust     { "depth_to_space_nhwc", "nhwc/depth_to_space.cl" },
428*c217d954SCole Faust     { "dequantization_layer_per_channel_nhwc", "nhwc/dequantization_layer.cl" },
429*c217d954SCole Faust     { "dwc_native_fp_nhwc", "nhwc/dwc_native_fp_nhwc.cl" },
430*c217d954SCole Faust     { "dwc_native_quantized_nhwc", "nhwc/dwc_native_quantized_nhwc.cl" },
431*c217d954SCole Faust     { "direct_convolution_nhwc", "nhwc/direct_convolution.cl" },
432*c217d954SCole Faust     { "direct_convolution3d_ndhwc", "nhwc/direct_convolution3d.cl" },
433*c217d954SCole Faust     { "im2col3x3_nhwc", "nhwc/im2col.cl" },
434*c217d954SCole Faust     { "im2col9x9_nhwc", "nhwc/im2col.cl" },
435*c217d954SCole Faust     { "im2col_generic_nhwc", "nhwc/im2col.cl" },
436*c217d954SCole Faust     { "indirect_convolution_nhwc", "nhwc/indirect_convolution.cl" },
437*c217d954SCole Faust     { "indirect_convolution_address_precalculation", "nhwc/indirect_convolution.cl" },
438*c217d954SCole Faust     { "normalization_layer_cross_map_nhwc", "nhwc/normalization_layer.cl" },
439*c217d954SCole Faust     { "normalization_layer_in_map_nhwc", "nhwc/normalization_layer.cl" },
440*c217d954SCole Faust     { "normalize_planar_yuv_layer_nhwc", "nhwc/normalize_planar_yuv_layer.cl" },
441*c217d954SCole Faust     { "normalize_planar_yuv_layer_q8_nhwc", "nhwc/normalize_planar_yuv_layer_quantized.cl" },
442*c217d954SCole Faust     { "pooling_layer_MxN_nhwc", "nhwc/pooling_layer.cl" },
443*c217d954SCole Faust     { "pooling_layer_2x2_nhwc", "nhwc/pooling_layer.cl" },
444*c217d954SCole Faust     { "pooling_layer_MxN_quantized_nhwc", "nhwc/pooling_layer_quantized.cl" },
445*c217d954SCole Faust     { "pooling_3d_layer_MxN_ndhwc", "nhwc/pooling_3d_layer.cl" },
446*c217d954SCole Faust     { "pooling_3d_layer_MxN_ndhwc_quantized", "nhwc/pooling_3d_layer_quantized.cl" },
447*c217d954SCole Faust     { "reorg_layer_nhwc", "nhwc/reorg_layer.cl" },
448*c217d954SCole Faust     { "scale_nearest_neighbour_nhwc", "nhwc/scale.cl" },
449*c217d954SCole Faust     { "scale_bilinear_nhwc", "nhwc/scale.cl" },
450*c217d954SCole Faust     { "space_to_batch_nhwc", "nhwc/space_to_batch.cl" },
451*c217d954SCole Faust     { "space_to_batch_static_nhwc", "nhwc/space_to_batch.cl" },
452*c217d954SCole Faust     { "space_to_depth_nhwc", "nhwc/space_to_depth.cl" },
453*c217d954SCole Faust     { "transposed_convolution_nhwc", "nhwc/transposed_convolution.cl" },
454*c217d954SCole Faust     { "upsample_layer_nhwc", "nhwc/upsample_layer.cl" },
455*c217d954SCole Faust     { "winograd_filter_transform_4x1_3x1_nhwc", "nhwc/winograd_filter_transform.cl" },
456*c217d954SCole Faust     { "winograd_filter_transform_1x4_1x3_nhwc", "nhwc/winograd_filter_transform.cl" },
457*c217d954SCole Faust     { "winograd_filter_transform_4x4_3x3_nhwc", "nhwc/winograd_filter_transform.cl" },
458*c217d954SCole Faust     { "winograd_filter_transform_4x4_5x5_nhwc", "nhwc/winograd_filter_transform.cl" },
459*c217d954SCole Faust     { "winograd_filter_transform_4x1_5x1_nhwc", "nhwc/winograd_filter_transform.cl" },
460*c217d954SCole Faust     { "winograd_filter_transform_1x4_1x5_nhwc", "nhwc/winograd_filter_transform.cl" },
461*c217d954SCole Faust     { "winograd_filter_transform_2x2_7x7_nhwc", "nhwc/winograd_filter_transform.cl" },
462*c217d954SCole Faust     { "winograd_filter_transform_2x1_7x1_nhwc", "nhwc/winograd_filter_transform.cl" },
463*c217d954SCole Faust     { "winograd_filter_transform_1x2_1x7_nhwc", "nhwc/winograd_filter_transform.cl" },
464*c217d954SCole Faust     { "winograd_input_transform_4x1_3x1_stepz1_nhwc", "nhwc/winograd_input_transform.cl" },
465*c217d954SCole Faust     { "winograd_input_transform_1x4_1x3_stepz1_nhwc", "nhwc/winograd_input_transform.cl" },
466*c217d954SCole Faust     { "winograd_input_transform_4x4_3x3_stepz1_nhwc", "nhwc/winograd_input_transform.cl" },
467*c217d954SCole Faust     { "winograd_input_transform_4x4_5x5_stepz1_nhwc", "nhwc/winograd_input_transform.cl" },
468*c217d954SCole Faust     { "winograd_input_transform_4x1_5x1_stepz1_nhwc", "nhwc/winograd_input_transform.cl" },
469*c217d954SCole Faust     { "winograd_input_transform_1x4_1x5_stepz1_nhwc", "nhwc/winograd_input_transform.cl" },
470*c217d954SCole Faust     { "winograd_input_transform_2x2_7x7_stepz1_nhwc", "nhwc/winograd_input_transform.cl" },
471*c217d954SCole Faust     { "winograd_input_transform_2x1_7x1_stepz1_nhwc", "nhwc/winograd_input_transform.cl" },
472*c217d954SCole Faust     { "winograd_input_transform_1x2_1x7_stepz1_nhwc", "nhwc/winograd_input_transform.cl" },
473*c217d954SCole Faust     { "winograd_output_transform_4x1_3x1_nhwc", "nhwc/winograd_output_transform.cl" },
474*c217d954SCole Faust     { "winograd_output_transform_1x4_1x3_nhwc", "nhwc/winograd_output_transform.cl" },
475*c217d954SCole Faust     { "winograd_output_transform_4x4_3x3_nhwc", "nhwc/winograd_output_transform.cl" },
476*c217d954SCole Faust     { "winograd_output_transform_4x4_5x5_nhwc", "nhwc/winograd_output_transform.cl" },
477*c217d954SCole Faust     { "winograd_output_transform_4x1_5x1_nhwc", "nhwc/winograd_output_transform.cl" },
478*c217d954SCole Faust     { "winograd_output_transform_1x4_1x5_nhwc", "nhwc/winograd_output_transform.cl" },
479*c217d954SCole Faust     { "winograd_output_transform_2x2_7x7_nhwc", "nhwc/winograd_output_transform.cl" },
480*c217d954SCole Faust     { "winograd_output_transform_2x1_7x1_nhwc", "nhwc/winograd_output_transform.cl" },
481*c217d954SCole Faust     { "winograd_output_transform_1x2_1x7_nhwc", "nhwc/winograd_output_transform.cl" },
482*c217d954SCole Faust #endif /* ENABLE_NHWC_KERNELS */
483*c217d954SCole Faust };
484*c217d954SCole Faust 
485*c217d954SCole Faust const std::map<std::string, std::string> ClKernelLibrary::_program_source_map =
486*c217d954SCole Faust {
487*c217d954SCole Faust #ifdef EMBEDDED_KERNELS
488*c217d954SCole Faust     {
489*c217d954SCole Faust         "activation_float_helpers.h",
490*c217d954SCole Faust #include "./cl_kernels/activation_float_helpers.hembed"
491*c217d954SCole Faust     },
492*c217d954SCole Faust     {
493*c217d954SCole Faust         "activation_quant_helpers.h",
494*c217d954SCole Faust #include "./cl_kernels/activation_quant_helpers.hembed"
495*c217d954SCole Faust     },
496*c217d954SCole Faust     {
497*c217d954SCole Faust         "common/activation_layer.cl",
498*c217d954SCole Faust #include "./cl_kernels/common/activation_layer.clembed"
499*c217d954SCole Faust     },
500*c217d954SCole Faust     {
501*c217d954SCole Faust         "common/activation_layer_quant.cl",
502*c217d954SCole Faust #include "./cl_kernels/common/activation_layer_quant.clembed"
503*c217d954SCole Faust     },
504*c217d954SCole Faust     {
505*c217d954SCole Faust         "common/arg_min_max.cl",
506*c217d954SCole Faust #include "./cl_kernels/common/arg_min_max.clembed"
507*c217d954SCole Faust     },
508*c217d954SCole Faust     {
509*c217d954SCole Faust         "common/bitwise_op.cl",
510*c217d954SCole Faust #include "./cl_kernels/common/bitwise_op.clembed"
511*c217d954SCole Faust     },
512*c217d954SCole Faust     {
513*c217d954SCole Faust         "common/bounding_box_transform.cl",
514*c217d954SCole Faust #include "./cl_kernels/common/bounding_box_transform.clembed"
515*c217d954SCole Faust     },
516*c217d954SCole Faust     {
517*c217d954SCole Faust         "common/bounding_box_transform_quantized.cl",
518*c217d954SCole Faust #include "./cl_kernels/common/bounding_box_transform_quantized.clembed"
519*c217d954SCole Faust     },
520*c217d954SCole Faust     {
521*c217d954SCole Faust         "common/col2im.cl",
522*c217d954SCole Faust #include "./cl_kernels/common/col2im.clembed"
523*c217d954SCole Faust     },
524*c217d954SCole Faust     {
525*c217d954SCole Faust         "common/comparisons.cl",
526*c217d954SCole Faust #include "./cl_kernels/common/comparisons.clembed"
527*c217d954SCole Faust     },
528*c217d954SCole Faust     {
529*c217d954SCole Faust         "common/concatenate.cl",
530*c217d954SCole Faust #include "./cl_kernels/common/concatenate.clembed"
531*c217d954SCole Faust     },
532*c217d954SCole Faust     {
533*c217d954SCole Faust         "common/convert_fc_weights.cl",
534*c217d954SCole Faust #include "./cl_kernels/common/convert_fc_weights.clembed"
535*c217d954SCole Faust     },
536*c217d954SCole Faust     {
537*c217d954SCole Faust         "common/convolution_layer.cl",
538*c217d954SCole Faust #include "./cl_kernels/common/convolution_layer.clembed"
539*c217d954SCole Faust     },
540*c217d954SCole Faust     {
541*c217d954SCole Faust         "common/copy_tensor.cl",
542*c217d954SCole Faust #include "./cl_kernels/common/copy_tensor.clembed"
543*c217d954SCole Faust     },
544*c217d954SCole Faust     {
545*c217d954SCole Faust         "common/crop_tensor.cl",
546*c217d954SCole Faust #include "./cl_kernels/common/crop_tensor.clembed"
547*c217d954SCole Faust     },
548*c217d954SCole Faust     {
549*c217d954SCole Faust         "common/deconvolution_layer.cl",
550*c217d954SCole Faust #include "./cl_kernels/common/deconvolution_layer.clembed"
551*c217d954SCole Faust     },
552*c217d954SCole Faust     {
553*c217d954SCole Faust         "common/cast.cl",
554*c217d954SCole Faust #include "./cl_kernels/common/cast.clembed"
555*c217d954SCole Faust     },
556*c217d954SCole Faust     {
557*c217d954SCole Faust         "common/dequantization_layer.cl",
558*c217d954SCole Faust #include "./cl_kernels/common/dequantization_layer.clembed"
559*c217d954SCole Faust     },
560*c217d954SCole Faust     {
561*c217d954SCole Faust         "common/elementwise_operation.cl",
562*c217d954SCole Faust #include "./cl_kernels/common/elementwise_operation.clembed"
563*c217d954SCole Faust     },
564*c217d954SCole Faust     {
565*c217d954SCole Faust         "common/elementwise_operation_quantized.cl",
566*c217d954SCole Faust #include "./cl_kernels/common/elementwise_operation_quantized.clembed"
567*c217d954SCole Faust     },
568*c217d954SCole Faust     {
569*c217d954SCole Faust         "common/elementwise_unary.cl",
570*c217d954SCole Faust #include "./cl_kernels/common/elementwise_unary.clembed"
571*c217d954SCole Faust     },
572*c217d954SCole Faust     {
573*c217d954SCole Faust         "common/fft.cl",
574*c217d954SCole Faust #include "./cl_kernels/common/fft.clembed"
575*c217d954SCole Faust     },
576*c217d954SCole Faust     {
577*c217d954SCole Faust         "common/fft_digit_reverse.cl",
578*c217d954SCole Faust #include "./cl_kernels/common/fft_digit_reverse.clembed"
579*c217d954SCole Faust     },
580*c217d954SCole Faust     {
581*c217d954SCole Faust         "common/fft_scale.cl",
582*c217d954SCole Faust #include "./cl_kernels/common/fft_scale.clembed"
583*c217d954SCole Faust     },
584*c217d954SCole Faust     {
585*c217d954SCole Faust         "common/fill_border.cl",
586*c217d954SCole Faust #include "./cl_kernels/common/fill_border.clembed"
587*c217d954SCole Faust     },
588*c217d954SCole Faust     {
589*c217d954SCole Faust         "common/floor.cl",
590*c217d954SCole Faust #include "./cl_kernels/common/floor.clembed"
591*c217d954SCole Faust     },
592*c217d954SCole Faust     {
593*c217d954SCole Faust         "common/gather.cl",
594*c217d954SCole Faust #include "./cl_kernels/common/gather.clembed"
595*c217d954SCole Faust     },
596*c217d954SCole Faust     {
597*c217d954SCole Faust         "common/gemm.cl",
598*c217d954SCole Faust #include "./cl_kernels/common/gemm.clembed"
599*c217d954SCole Faust     },
600*c217d954SCole Faust     {
601*c217d954SCole Faust         "common/gemm_reshaped_only_rhs_mmul.cl",
602*c217d954SCole Faust #include "./cl_kernels/common/gemm_reshaped_only_rhs_mmul.clembed"
603*c217d954SCole Faust     },
604*c217d954SCole Faust     {
605*c217d954SCole Faust         "common/gemm_utils.cl",
606*c217d954SCole Faust #include "./cl_kernels/common/gemm_utils.clembed"
607*c217d954SCole Faust     },
608*c217d954SCole Faust     {
609*c217d954SCole Faust         "common/experimental/gemm_fused_post_ops/act_eltwise_op_act/fp_post_ops_act_eltwise_op_act.h",
610*c217d954SCole Faust #include "./cl_kernels/common/experimental/gemm_fused_post_ops/act_eltwise_op_act/fp_post_ops_act_eltwise_op_act.hembed"
611*c217d954SCole Faust     },
612*c217d954SCole Faust     {
613*c217d954SCole Faust         "common/experimental/gemm_fused_post_ops/fp_mixed_precision_helpers.h",
614*c217d954SCole Faust #include "./cl_kernels/common/experimental/gemm_fused_post_ops/fp_mixed_precision_helpers.hembed"
615*c217d954SCole Faust     },
616*c217d954SCole Faust     {
617*c217d954SCole Faust         "common/experimental/gemm_fused_post_ops/act_eltwise_op_act/gemm_mm_native.cl",
618*c217d954SCole Faust #include "./cl_kernels/common/experimental/gemm_fused_post_ops/act_eltwise_op_act/gemm_mm_native.clembed"
619*c217d954SCole Faust     },
620*c217d954SCole Faust     {
621*c217d954SCole Faust         "common/experimental/gemm_fused_post_ops/act_eltwise_op_act/gemm_mm_reshaped.cl",
622*c217d954SCole Faust #include "./cl_kernels/common/experimental/gemm_fused_post_ops/act_eltwise_op_act/gemm_mm_reshaped.clembed"
623*c217d954SCole Faust     },
624*c217d954SCole Faust     {
625*c217d954SCole Faust         "common/experimental/gemm_fused_post_ops/act_eltwise_op_act/gemm_mm_reshaped_only_rhs.cl",
626*c217d954SCole Faust #include "./cl_kernels/common/experimental/gemm_fused_post_ops/act_eltwise_op_act/gemm_mm_reshaped_only_rhs.clembed"
627*c217d954SCole Faust     },
628*c217d954SCole Faust     {
629*c217d954SCole Faust         "common/gemmlowp.cl",
630*c217d954SCole Faust #include "./cl_kernels/common/gemmlowp.clembed"
631*c217d954SCole Faust     },
632*c217d954SCole Faust     {
633*c217d954SCole Faust         "common/gemmlowp_reshaped_only_rhs_mmul.cl",
634*c217d954SCole Faust #include "./cl_kernels/common/gemmlowp_reshaped_only_rhs_mmul.clembed"
635*c217d954SCole Faust     },
636*c217d954SCole Faust     {
637*c217d954SCole Faust         "common/gemv.cl",
638*c217d954SCole Faust #include "./cl_kernels/common/gemv.clembed"
639*c217d954SCole Faust     },
640*c217d954SCole Faust     {
641*c217d954SCole Faust         "common/generate_proposals.cl",
642*c217d954SCole Faust #include "./cl_kernels/common/generate_proposals.clembed"
643*c217d954SCole Faust     },
644*c217d954SCole Faust     {
645*c217d954SCole Faust         "common/generate_proposals_quantized.cl",
646*c217d954SCole Faust #include "./cl_kernels/common/generate_proposals_quantized.clembed"
647*c217d954SCole Faust     },
648*c217d954SCole Faust     {
649*c217d954SCole Faust         "gemm_helpers.h",
650*c217d954SCole Faust #include "./cl_kernels/gemm_helpers.hembed"
651*c217d954SCole Faust     },
652*c217d954SCole Faust     {
653*c217d954SCole Faust         "helpers.h",
654*c217d954SCole Faust #include "./cl_kernels/helpers.hembed"
655*c217d954SCole Faust     },
656*c217d954SCole Faust     {
657*c217d954SCole Faust         "helpers_asymm.h",
658*c217d954SCole Faust #include "./cl_kernels/helpers_asymm.hembed"
659*c217d954SCole Faust     },
660*c217d954SCole Faust     {
661*c217d954SCole Faust         "repeat.h",
662*c217d954SCole Faust #include "./cl_kernels/repeat.hembed"
663*c217d954SCole Faust     },
664*c217d954SCole Faust     {
665*c217d954SCole Faust         "tile_helpers.h",
666*c217d954SCole Faust #include "./cl_kernels/tile_helpers.hembed"
667*c217d954SCole Faust     },
668*c217d954SCole Faust     {
669*c217d954SCole Faust         "common/instance_normalization.cl",
670*c217d954SCole Faust #include "./cl_kernels/common/instance_normalization.clembed"
671*c217d954SCole Faust     },
672*c217d954SCole Faust     {
673*c217d954SCole Faust         "common/l2_normalize.cl",
674*c217d954SCole Faust #include "./cl_kernels/common/l2_normalize.clembed"
675*c217d954SCole Faust     },
676*c217d954SCole Faust     {
677*c217d954SCole Faust         "common/mean_stddev_normalization.cl",
678*c217d954SCole Faust #include "./cl_kernels/common/mean_stddev_normalization.clembed"
679*c217d954SCole Faust     },
680*c217d954SCole Faust     {
681*c217d954SCole Faust         "common/memset.cl",
682*c217d954SCole Faust #include "./cl_kernels/common/memset.clembed"
683*c217d954SCole Faust     },
684*c217d954SCole Faust     {
685*c217d954SCole Faust         "common/minmax_layer.cl",
686*c217d954SCole Faust #include "./cl_kernels/common/minmax_layer.clembed"
687*c217d954SCole Faust     },
688*c217d954SCole Faust     {
689*c217d954SCole Faust         "common/nonmax.cl",
690*c217d954SCole Faust #include "./cl_kernels/common/nonmax.clembed"
691*c217d954SCole Faust     },
692*c217d954SCole Faust     {
693*c217d954SCole Faust         "common/batchnormalization_layer.cl",
694*c217d954SCole Faust #include "./cl_kernels/common/batchnormalization_layer.clembed"
695*c217d954SCole Faust     },
696*c217d954SCole Faust     {
697*c217d954SCole Faust         "common/pad_layer.cl",
698*c217d954SCole Faust #include "./cl_kernels/common/pad_layer.clembed"
699*c217d954SCole Faust     },
700*c217d954SCole Faust     {
701*c217d954SCole Faust         "common/permute.cl",
702*c217d954SCole Faust #include "./cl_kernels/common/permute.clembed"
703*c217d954SCole Faust     },
704*c217d954SCole Faust     {
705*c217d954SCole Faust         "common/pixelwise_mul_float.cl",
706*c217d954SCole Faust #include "./cl_kernels/common/pixelwise_mul_float.clembed"
707*c217d954SCole Faust     },
708*c217d954SCole Faust     {
709*c217d954SCole Faust         "common/pixelwise_mul_int.cl",
710*c217d954SCole Faust #include "./cl_kernels/common/pixelwise_mul_int.clembed"
711*c217d954SCole Faust     },
712*c217d954SCole Faust     {
713*c217d954SCole Faust         "common/qlstm_layer_normalization.cl",
714*c217d954SCole Faust #include "./cl_kernels/common/qlstm_layer_normalization.clembed"
715*c217d954SCole Faust     },
716*c217d954SCole Faust     {
717*c217d954SCole Faust         "common/quantization_layer.cl",
718*c217d954SCole Faust #include "./cl_kernels/common/quantization_layer.clembed"
719*c217d954SCole Faust     },
720*c217d954SCole Faust     {
721*c217d954SCole Faust         "common/range.cl",
722*c217d954SCole Faust #include "./cl_kernels/common/range.clembed"
723*c217d954SCole Faust     },
724*c217d954SCole Faust     {
725*c217d954SCole Faust         "common/reduction_operation.cl",
726*c217d954SCole Faust #include "./cl_kernels/common/reduction_operation.clembed"
727*c217d954SCole Faust     },
728*c217d954SCole Faust     {
729*c217d954SCole Faust         "common/reshape_layer.cl",
730*c217d954SCole Faust #include "./cl_kernels/common/reshape_layer.clembed"
731*c217d954SCole Faust     },
732*c217d954SCole Faust     {
733*c217d954SCole Faust         "common/reverse.cl",
734*c217d954SCole Faust #include "./cl_kernels/common/reverse.clembed"
735*c217d954SCole Faust     },
736*c217d954SCole Faust     {
737*c217d954SCole Faust         "common/roi_align_layer.cl",
738*c217d954SCole Faust #include "./cl_kernels/common/roi_align_layer.clembed"
739*c217d954SCole Faust     },
740*c217d954SCole Faust     {
741*c217d954SCole Faust         "common/roi_align_layer_quantized.cl",
742*c217d954SCole Faust #include "./cl_kernels/common/roi_align_layer_quantized.clembed"
743*c217d954SCole Faust     },
744*c217d954SCole Faust     {
745*c217d954SCole Faust         "common/roi_pooling_layer.cl",
746*c217d954SCole Faust #include "./cl_kernels/common/roi_pooling_layer.clembed"
747*c217d954SCole Faust     },
748*c217d954SCole Faust     {
749*c217d954SCole Faust         "common/select.cl",
750*c217d954SCole Faust #include "./cl_kernels/common/select.clembed"
751*c217d954SCole Faust     },
752*c217d954SCole Faust     {
753*c217d954SCole Faust         "common/softmax_layer.cl",
754*c217d954SCole Faust #include "./cl_kernels/common/softmax_layer.clembed"
755*c217d954SCole Faust     },
756*c217d954SCole Faust     {
757*c217d954SCole Faust         "common/softmax_layer_quantized.cl",
758*c217d954SCole Faust #include "./cl_kernels/common/softmax_layer_quantized.clembed"
759*c217d954SCole Faust     },
760*c217d954SCole Faust     {
761*c217d954SCole Faust         "common/slice_ops.cl",
762*c217d954SCole Faust #include "./cl_kernels/common/slice_ops.clembed"
763*c217d954SCole Faust     },
764*c217d954SCole Faust     {
765*c217d954SCole Faust         "common/stack_layer.cl",
766*c217d954SCole Faust #include "./cl_kernels/common/stack_layer.clembed"
767*c217d954SCole Faust     },
768*c217d954SCole Faust     {
769*c217d954SCole Faust         "common/tile.cl",
770*c217d954SCole Faust #include "./cl_kernels/common/tile.clembed"
771*c217d954SCole Faust     },
772*c217d954SCole Faust     {
773*c217d954SCole Faust         "common/transpose.cl",
774*c217d954SCole Faust #include "./cl_kernels/common/transpose.clembed"
775*c217d954SCole Faust     },
776*c217d954SCole Faust     {
777*c217d954SCole Faust         "types.h",
778*c217d954SCole Faust #include "./cl_kernels/types.hembed"
779*c217d954SCole Faust     },
780*c217d954SCole Faust     {
781*c217d954SCole Faust         "common/unpooling_layer.cl",
782*c217d954SCole Faust #include "./cl_kernels/common/unpooling_layer.clembed"
783*c217d954SCole Faust     },
784*c217d954SCole Faust #ifdef ENABLE_NCHW_KERNELS
785*c217d954SCole Faust     {
786*c217d954SCole Faust         "nchw/batch_to_space.cl",
787*c217d954SCole Faust #include "./cl_kernels/nchw/batch_to_space.clembed"
788*c217d954SCole Faust     },
789*c217d954SCole Faust     {
790*c217d954SCole Faust         "nchw/channel_shuffle.cl",
791*c217d954SCole Faust #include "./cl_kernels/nchw/channel_shuffle.clembed"
792*c217d954SCole Faust     },
793*c217d954SCole Faust     {
794*c217d954SCole Faust         "nchw/upsample_layer.cl",
795*c217d954SCole Faust #include "./cl_kernels/nchw/upsample_layer.clembed"
796*c217d954SCole Faust     },
797*c217d954SCole Faust     {
798*c217d954SCole Faust         "nchw/depth_to_space.cl",
799*c217d954SCole Faust #include "./cl_kernels/nchw/depth_to_space.clembed"
800*c217d954SCole Faust     },
801*c217d954SCole Faust     {
802*c217d954SCole Faust         "nchw/dequantization_layer.cl",
803*c217d954SCole Faust #include "./cl_kernels/nchw/dequantization_layer.clembed"
804*c217d954SCole Faust     },
805*c217d954SCole Faust     {
806*c217d954SCole Faust         "nchw/direct_convolution.cl",
807*c217d954SCole Faust #include "./cl_kernels/nchw/direct_convolution.clembed"
808*c217d954SCole Faust     },
809*c217d954SCole Faust     {
810*c217d954SCole Faust         "nchw/im2col.cl",
811*c217d954SCole Faust #include "./cl_kernels/nchw/im2col.clembed"
812*c217d954SCole Faust     },
813*c217d954SCole Faust     {
814*c217d954SCole Faust         "nchw/normalization_layer.cl",
815*c217d954SCole Faust #include "./cl_kernels/nchw/normalization_layer.clembed"
816*c217d954SCole Faust     },
817*c217d954SCole Faust     {
818*c217d954SCole Faust         "nchw/normalize_planar_yuv_layer.cl",
819*c217d954SCole Faust #include "./cl_kernels/nchw/normalize_planar_yuv_layer.clembed"
820*c217d954SCole Faust     },
821*c217d954SCole Faust     {
822*c217d954SCole Faust         "nchw/normalize_planar_yuv_layer_quantized.cl",
823*c217d954SCole Faust #include "./cl_kernels/nchw/normalize_planar_yuv_layer_quantized.clembed"
824*c217d954SCole Faust     },
825*c217d954SCole Faust     {
826*c217d954SCole Faust         "nchw/batchnormalization_layer.cl",
827*c217d954SCole Faust #include "./cl_kernels/nchw/batchnormalization_layer.clembed"
828*c217d954SCole Faust     },
829*c217d954SCole Faust     {
830*c217d954SCole Faust         "nchw/pooling_layer.cl",
831*c217d954SCole Faust #include "./cl_kernels/nchw/pooling_layer.clembed"
832*c217d954SCole Faust     },
833*c217d954SCole Faust     {
834*c217d954SCole Faust         "nchw/prior_box_layer.cl",
835*c217d954SCole Faust #include "./cl_kernels/nchw/prior_box_layer.clembed"
836*c217d954SCole Faust     },
837*c217d954SCole Faust     {
838*c217d954SCole Faust         "nchw/reorg_layer.cl",
839*c217d954SCole Faust #include "./cl_kernels/nchw/reorg_layer.clembed"
840*c217d954SCole Faust     },
841*c217d954SCole Faust     {
842*c217d954SCole Faust         "nchw/scale.cl",
843*c217d954SCole Faust #include "./cl_kernels/nchw/scale.clembed"
844*c217d954SCole Faust     },
845*c217d954SCole Faust     {
846*c217d954SCole Faust         "nchw/space_to_batch.cl",
847*c217d954SCole Faust #include "./cl_kernels/nchw/space_to_batch.clembed"
848*c217d954SCole Faust     },
849*c217d954SCole Faust     {
850*c217d954SCole Faust         "nchw/space_to_depth.cl",
851*c217d954SCole Faust #include "./cl_kernels/nchw/space_to_depth.clembed"
852*c217d954SCole Faust     },
853*c217d954SCole Faust     {
854*c217d954SCole Faust         "nchw/winograd_filter_transform.cl",
855*c217d954SCole Faust #include "./cl_kernels/nchw/winograd_filter_transform.clembed"
856*c217d954SCole Faust     },
857*c217d954SCole Faust     {
858*c217d954SCole Faust         "nchw/winograd_input_transform.cl",
859*c217d954SCole Faust #include "./cl_kernels/nchw/winograd_input_transform.clembed"
860*c217d954SCole Faust     },
861*c217d954SCole Faust     {
862*c217d954SCole Faust         "nchw/winograd_output_transform.cl",
863*c217d954SCole Faust #include "./cl_kernels/nchw/winograd_output_transform.clembed"
864*c217d954SCole Faust     },
865*c217d954SCole Faust #endif /* ENABLE_NCHW_KERNELS */
866*c217d954SCole Faust 
867*c217d954SCole Faust #ifdef ENABLE_NHWC_KERNELS
868*c217d954SCole Faust     {
869*c217d954SCole Faust         "nhwc/batch_to_space.cl",
870*c217d954SCole Faust #include "./cl_kernels/nhwc/batch_to_space.clembed"
871*c217d954SCole Faust     },
872*c217d954SCole Faust     {
873*c217d954SCole Faust         "nhwc/channel_shuffle.cl",
874*c217d954SCole Faust #include "./cl_kernels/nhwc/channel_shuffle.clembed"
875*c217d954SCole Faust     },
876*c217d954SCole Faust     {
877*c217d954SCole Faust         "nhwc/upsample_layer.cl",
878*c217d954SCole Faust #include "./cl_kernels/nhwc/upsample_layer.clembed"
879*c217d954SCole Faust     },
880*c217d954SCole Faust     {
881*c217d954SCole Faust         "nhwc/depth_to_space.cl",
882*c217d954SCole Faust #include "./cl_kernels/nhwc/depth_to_space.clembed"
883*c217d954SCole Faust     },
884*c217d954SCole Faust     {
885*c217d954SCole Faust         "nhwc/dequantization_layer.cl",
886*c217d954SCole Faust #include "./cl_kernels/nhwc/dequantization_layer.clembed"
887*c217d954SCole Faust     },
888*c217d954SCole Faust     {
889*c217d954SCole Faust         "nhwc/direct_convolution.cl",
890*c217d954SCole Faust #include "./cl_kernels/nhwc/direct_convolution.clembed"
891*c217d954SCole Faust     },
892*c217d954SCole Faust     {
893*c217d954SCole Faust         "nhwc/direct_convolution3d.cl",
894*c217d954SCole Faust #include "./cl_kernels/nhwc/direct_convolution3d.clembed"
895*c217d954SCole Faust     },
896*c217d954SCole Faust     {
897*c217d954SCole Faust         "nhwc/dwc_native_fp_nhwc.cl",
898*c217d954SCole Faust #include "./cl_kernels/nhwc/dwc_native_fp_nhwc.clembed"
899*c217d954SCole Faust     },
900*c217d954SCole Faust     {
901*c217d954SCole Faust         "nhwc/dwc_native_quantized_nhwc.cl",
902*c217d954SCole Faust #include "./cl_kernels/nhwc/dwc_native_quantized_nhwc.clembed"
903*c217d954SCole Faust     },
904*c217d954SCole Faust     {
905*c217d954SCole Faust         "nhwc/normalization_layer.cl",
906*c217d954SCole Faust #include "./cl_kernels/nhwc/normalization_layer.clembed"
907*c217d954SCole Faust     },
908*c217d954SCole Faust     {
909*c217d954SCole Faust         "nhwc/normalize_planar_yuv_layer.cl",
910*c217d954SCole Faust #include "./cl_kernels/nhwc/normalize_planar_yuv_layer.clembed"
911*c217d954SCole Faust     },
912*c217d954SCole Faust     {
913*c217d954SCole Faust         "nhwc/normalize_planar_yuv_layer_quantized.cl",
914*c217d954SCole Faust #include "./cl_kernels/nhwc/normalize_planar_yuv_layer_quantized.clembed"
915*c217d954SCole Faust     },
916*c217d954SCole Faust     {
917*c217d954SCole Faust         "nhwc/im2col.cl",
918*c217d954SCole Faust #include "./cl_kernels/nhwc/im2col.clembed"
919*c217d954SCole Faust     },
920*c217d954SCole Faust     {
921*c217d954SCole Faust         "nhwc/indirect_convolution.cl",
922*c217d954SCole Faust #include "./cl_kernels/nhwc/indirect_convolution.clembed"
923*c217d954SCole Faust     },
924*c217d954SCole Faust     {
925*c217d954SCole Faust         "nhwc/batchnormalization_layer.cl",
926*c217d954SCole Faust #include "./cl_kernels/nhwc/batchnormalization_layer.clembed"
927*c217d954SCole Faust     },
928*c217d954SCole Faust     {
929*c217d954SCole Faust         "nhwc/pooling_layer.cl",
930*c217d954SCole Faust #include "./cl_kernels/nhwc/pooling_layer.clembed"
931*c217d954SCole Faust     },
932*c217d954SCole Faust     {
933*c217d954SCole Faust         "nhwc/pooling_3d_layer.cl",
934*c217d954SCole Faust #include "./cl_kernels/nhwc/pooling_3d_layer.clembed"
935*c217d954SCole Faust     },
936*c217d954SCole Faust     {
937*c217d954SCole Faust         "nhwc/pooling_3d_layer_quantized.cl",
938*c217d954SCole Faust #include "./cl_kernels/nhwc/pooling_3d_layer_quantized.clembed"
939*c217d954SCole Faust     },
940*c217d954SCole Faust     {
941*c217d954SCole Faust         "nhwc/pooling_layer_quantized.cl",
942*c217d954SCole Faust #include "./cl_kernels/nhwc/pooling_layer_quantized.clembed"
943*c217d954SCole Faust     },
944*c217d954SCole Faust     {
945*c217d954SCole Faust         "nhwc/reorg_layer.cl",
946*c217d954SCole Faust #include "./cl_kernels/nhwc/reorg_layer.clembed"
947*c217d954SCole Faust     },
948*c217d954SCole Faust     {
949*c217d954SCole Faust         "nhwc/scale.cl",
950*c217d954SCole Faust #include "./cl_kernels/nhwc/scale.clembed"
951*c217d954SCole Faust     },
952*c217d954SCole Faust     {
953*c217d954SCole Faust         "nhwc/space_to_batch.cl",
954*c217d954SCole Faust #include "./cl_kernels/nhwc/space_to_batch.clembed"
955*c217d954SCole Faust     },
956*c217d954SCole Faust     {
957*c217d954SCole Faust         "nhwc/space_to_depth.cl",
958*c217d954SCole Faust #include "./cl_kernels/nhwc/space_to_depth.clembed"
959*c217d954SCole Faust     },
960*c217d954SCole Faust     {
961*c217d954SCole Faust         "nhwc/transposed_convolution.cl",
962*c217d954SCole Faust #include "./cl_kernels/nhwc/transposed_convolution.clembed"
963*c217d954SCole Faust     },
964*c217d954SCole Faust     {
965*c217d954SCole Faust         "nhwc/winograd_filter_transform.cl",
966*c217d954SCole Faust #include "./cl_kernels/nhwc/winograd_filter_transform.clembed"
967*c217d954SCole Faust     },
968*c217d954SCole Faust     {
969*c217d954SCole Faust         "nhwc/winograd_input_transform.cl",
970*c217d954SCole Faust #include "./cl_kernels/nhwc/winograd_input_transform.clembed"
971*c217d954SCole Faust     },
972*c217d954SCole Faust     {
973*c217d954SCole Faust         "nhwc/winograd_output_transform.cl",
974*c217d954SCole Faust #include "./cl_kernels/nhwc/winograd_output_transform.clembed"
975*c217d954SCole Faust     },
976*c217d954SCole Faust #endif /* ENABLE_NHWC_KERNELS */
977*c217d954SCole Faust #endif /* EMBEDDED_KERNELS */
978*c217d954SCole Faust };
979*c217d954SCole Faust 
get()980*c217d954SCole Faust ClKernelLibrary &ClKernelLibrary::get()
981*c217d954SCole Faust {
982*c217d954SCole Faust     static ClKernelLibrary _kernel_library;
983*c217d954SCole Faust     return _kernel_library;
984*c217d954SCole Faust }
985*c217d954SCole Faust 
program_name(const std::string & kernel_name) const986*c217d954SCole Faust std::string ClKernelLibrary::program_name(const std::string &kernel_name) const
987*c217d954SCole Faust {
988*c217d954SCole Faust     // Find which program contains the kernel
989*c217d954SCole Faust     auto kernel_program_it = _kernel_program_map.find(kernel_name);
990*c217d954SCole Faust 
991*c217d954SCole Faust     if(_kernel_program_map.end() == kernel_program_it)
992*c217d954SCole Faust     {
993*c217d954SCole Faust         ARM_COMPUTE_ERROR_VAR("Kernel %s not found in the CLKernelLibrary", kernel_name.c_str());
994*c217d954SCole Faust     }
995*c217d954SCole Faust 
996*c217d954SCole Faust     const std::string program_name = kernel_program_it->second;
997*c217d954SCole Faust 
998*c217d954SCole Faust     return program_name;
999*c217d954SCole Faust }
1000*c217d954SCole Faust 
set_kernel_path(std::string kernel_path)1001*c217d954SCole Faust void ClKernelLibrary::set_kernel_path(std::string kernel_path)
1002*c217d954SCole Faust {
1003*c217d954SCole Faust     _kernel_path = std::move(kernel_path);
1004*c217d954SCole Faust     _kernel_path += "/";
1005*c217d954SCole Faust }
1006*c217d954SCole Faust 
kernel_path() const1007*c217d954SCole Faust const std::string &ClKernelLibrary::kernel_path() const
1008*c217d954SCole Faust {
1009*c217d954SCole Faust     return _kernel_path;
1010*c217d954SCole Faust }
1011*c217d954SCole Faust 
program(const std::string & program_name) const1012*c217d954SCole Faust ClKernelLibrary::ClProgramInfo ClKernelLibrary::program(const std::string &program_name) const
1013*c217d954SCole Faust {
1014*c217d954SCole Faust #ifdef EMBEDDED_KERNELS
1015*c217d954SCole Faust #ifdef ARM_COMPUTE_COMPRESSED_KERNELS
1016*c217d954SCole Faust     const auto inflatted_program_source_it = _decompressed_source_map.find(program_name);
1017*c217d954SCole Faust     if(inflatted_program_source_it != _decompressed_source_map.end())
1018*c217d954SCole Faust     {
1019*c217d954SCole Faust         return ClProgramInfo{ inflatted_program_source_it->second, false };
1020*c217d954SCole Faust     }
1021*c217d954SCole Faust #endif /* ARM_COMPUTE_COMPRESSED_KERNELS */
1022*c217d954SCole Faust 
1023*c217d954SCole Faust     const auto program_source_it = _program_source_map.find(program_name);
1024*c217d954SCole Faust     if(program_source_it == _program_source_map.end())
1025*c217d954SCole Faust     {
1026*c217d954SCole Faust         ARM_COMPUTE_ERROR_VAR("Embedded program for %s does not exist.", program_name.c_str());
1027*c217d954SCole Faust     }
1028*c217d954SCole Faust     std::string program_source = program_source_it->second;
1029*c217d954SCole Faust 
1030*c217d954SCole Faust #ifdef ARM_COMPUTE_COMPRESSED_KERNELS
1031*c217d954SCole Faust     std::string decompressed_program_source = decompress_zlib(decode_base64(program_source_it->second));
1032*c217d954SCole Faust     ARM_COMPUTE_ERROR_ON_MSG(decompressed_program_source.empty(), "Cannot de-compress requested program");
1033*c217d954SCole Faust     _decompressed_source_map.insert(std::make_pair(program_name, decompressed_program_source));
1034*c217d954SCole Faust     program_source = std::move(decompressed_program_source);
1035*c217d954SCole Faust #endif /* ARM_COMPUTE_COMPRESSED_KERNELS */
1036*c217d954SCole Faust 
1037*c217d954SCole Faust     return ClProgramInfo{ program_source, false };
1038*c217d954SCole Faust #else  /* EMBEDDED_KERNELS */
1039*c217d954SCole Faust     // Check for binary
1040*c217d954SCole Faust     std::string source_name = _kernel_path + program_name;
1041*c217d954SCole Faust     std::string binary_name = source_name + "bin";
1042*c217d954SCole Faust     std::string program_source{};
1043*c217d954SCole Faust     bool        is_binary = false;
1044*c217d954SCole Faust 
1045*c217d954SCole Faust     if(std::ifstream(binary_name).is_open())
1046*c217d954SCole Faust     {
1047*c217d954SCole Faust         program_source = read_file(binary_name, true);
1048*c217d954SCole Faust         is_binary      = true;
1049*c217d954SCole Faust     }
1050*c217d954SCole Faust     else if(std::ifstream(source_name).is_open())
1051*c217d954SCole Faust     {
1052*c217d954SCole Faust         program_source = read_file(source_name, false);
1053*c217d954SCole Faust     }
1054*c217d954SCole Faust     else
1055*c217d954SCole Faust     {
1056*c217d954SCole Faust         ARM_COMPUTE_ERROR_VAR("Kernel file %s does not exist.", source_name.c_str());
1057*c217d954SCole Faust     }
1058*c217d954SCole Faust 
1059*c217d954SCole Faust     return ClProgramInfo{ program_source, is_binary };
1060*c217d954SCole Faust #endif /* EMBEDDED_KERNELS */
1061*c217d954SCole Faust }
1062*c217d954SCole Faust } // namespace opencl
1063*c217d954SCole Faust } // namespace arm_compute
1064