1 /*
2  * Copyright (c) 2022-2023 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #include "ClTemplateWriter.h"
25 
26 #include "arm_compute/core/CL/CLKernelLibrary.h"
27 #include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
28 #include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h"
29 
30 namespace arm_compute
31 {
32 namespace experimental
33 {
34 namespace dynamic_fusion
35 {
36 /// @note: some tags can be unused since they could be used only for the macros, or only for the component code
replace_tags(const std::string & code_template,const TagLUT & tags)37 std::string ClTemplateWriter::replace_tags(const std::string &code_template, const TagLUT &tags)
38 {
39     std::string replaced_code    = "";
40     bool        scanning_pattern = false;
41     std::string pattern_found    = "";
42     for(size_t i = 0; i < code_template.size() - 1; ++i)
43     {
44         if(!scanning_pattern)
45         {
46             if(code_template[i] == '{' && code_template[i + 1] == '{')
47             {
48                 i += 1;
49                 scanning_pattern = true;
50                 pattern_found    = "";
51             }
52             else
53             {
54                 replaced_code += code_template[i];
55             }
56         }
57         else
58         {
59             if(code_template[i] == '}' && code_template[i + 1] == '}')
60             {
61                 i += 1;
62                 scanning_pattern = false;
63                 std::string err  = "Pattern " + pattern_found + " not found in tags";
64                 ARM_COMPUTE_ERROR_ON_MSG(tags.find(pattern_found) == tags.end(), err.c_str());
65                 replaced_code += tags.find(pattern_found)->second.value;
66             }
67             else
68             {
69                 pattern_found += code_template[i];
70             }
71         }
72     }
73 
74     return replaced_code;
75 }
~ClTemplateWriter()76 ClTemplateWriter::~ClTemplateWriter()
77 {
78 }
ClTemplateWriter(const GpuKernelComponentGroup & components)79 ClTemplateWriter::ClTemplateWriter(const GpuKernelComponentGroup &components)
80     : _components{ components }
81 {
82 }
get_name()83 std::string ClTemplateWriter::get_name()
84 {
85     return write_kernel_name();
86 }
get_code()87 std::string ClTemplateWriter::get_code()
88 {
89     return write_code();
90 }
get_config_id()91 std::string ClTemplateWriter::get_config_id()
92 {
93     std::string config_id = get_name();
94     for(const auto &comp : _components)
95     {
96         config_id += "--" + comp->template_writer()->get_config_id() + "--";
97     }
98 
99     return config_id;
100 }
101 
get_build_options()102 CLBuildOptions ClTemplateWriter::get_build_options()
103 {
104     CLBuildOptions build_opts{};
105 
106     for(const auto &comp : _components)
107     {
108         build_opts.add_options(comp->template_writer()->get_build_options(_components).options());
109     }
110 
111     return build_opts;
112 }
113 
get_window() const114 Window ClTemplateWriter::get_window() const
115 {
116     const auto root_comp = _components.get_root_component();
117     ARM_COMPUTE_ERROR_ON_MSG(root_comp == nullptr, "No root component found");
118     return root_comp->template_writer()->get_window();
119 }
120 
get_tensors()121 std::map<ITensorInfo::Id, GpuKernelArgument> ClTemplateWriter::get_tensors()
122 {
123     // Assemble GpuKernelArguments
124     std::map<ITensorInfo::Id, GpuKernelArgument> tensors;
125     for(const auto t : _components.get_argument_tensors())
126     {
127         tensors.emplace(
128             t->id(),
129             GpuKernelArgument{ *t, _vtable.get_variable(t).kernel_argument_info });
130     }
131     return tensors;
132 }
133 
write_code()134 std::string ClTemplateWriter::write_code()
135 {
136     ARM_COMPUTE_ERROR_ON_MSG(_components.empty(), "No components found");
137 
138     // These data structures will hold the data from all the components in the blueprint
139     std::set<std::string>    headers_list{};
140     std::set<std::string>    additional_macros{};
141     std::vector<std::string> component_codes{}; // vector because order matters
142 
143     // Pass 1: Declare all kernel variables
144     for(auto &component : _components)
145     {
146         component->template_writer()->declare_variables(_vtable, _components);
147     }
148     // Pass 2: Generate component codes
149     for(auto &component : _components)
150     {
151         const auto component_writer       = component->template_writer();
152         auto       curr_headers_list      = component_writer->get_headers_list();
153         auto       curr_additional_macros = component_writer->get_additional_macros();
154         auto       curr_component_code    = component_writer->get_component_code(_components);
155         const auto var_lut                = component_writer->get_tag_lut(_vtable, _components); // Ideally can be merged with get_component_code once we have finer-grained code generation technique
156         component_codes.push_back(replace_tags(curr_component_code, var_lut));
157 
158         headers_list.insert(curr_headers_list.begin(), curr_headers_list.end());
159         if(!additional_macros.empty()) // Some components might not have any
160         {
161             additional_macros.insert(replace_tags(curr_additional_macros, var_lut));
162         }
163     }
164 
165     // Step 3: Assemble the data gathered by traversing the graph into the string "code"
166     std::string code = "";
167 
168     for(auto &header : headers_list)
169     {
170 #if defined(EMBEDDED_KERNELS)
171         code += CLKernelLibrary::get().get_program(header).first;
172 #else  // defined(EMBEDDED_KERNELS)
173         code += "#include \"" + header + "\"\n";
174 #endif // defined(EMBEDDED_KERNELS)
175     }
176 
177     for(auto &macros : additional_macros)
178     {
179         code += macros;
180     }
181 
182     auto arguments = _components.get_argument_tensors();
183     std::sort(arguments.begin(), arguments.end(), [](const ITensorInfo * l, const ITensorInfo * r)
184     {
185         return l->id() < r->id();
186     });
187     code += write_kernel_signature(_vtable.get_variable_list(arguments));
188 
189     code += "\n{\n\n";
190 
191     code += "    //------------------ START KERNEL_BUILDER_COORDINATE ---------------------\n\n";
192     code += write_global_section();
193     code += "    //------------------ END KERNEL_BUILDER_COORDINATE ---------------------\n";
194 
195     {
196         const auto        tiles = _components.get_tiles();
197         std::stringstream tiles_ss;
198 
199         tiles_ss << "    //------------------ START TILE DECLARATION ---------------------\n";
200 
201         for(auto tile : tiles)
202         {
203             const auto var       = _vtable.get_variable(tile);
204             const auto data_type = get_cl_type_from_data_type(tile->data_type());
205             const auto var_name  = var.uniq_name;
206 
207             tiles_ss << "    TILE(" << data_type << ", M0, N0, " << var_name << ");\n";
208         }
209 
210         tiles_ss << "    //------------------ END TILE DECLARATION ---------------------\n";
211 
212         code += tiles_ss.str();
213     }
214 
215     for(const auto &component_code : component_codes)
216     {
217         code += component_code;
218         code += "\n";
219     }
220 
221     code += "}\n";
222 
223     return code;
224 }
write_global_section() const225 std::string ClTemplateWriter::write_global_section() const
226 {
227     const auto dst_info   = _components.get_any_dst_tensor();
228     const auto dst_w      = dst_info->dimension(0);
229     const auto tile_w     = std::max(1, get_window().x().step());
230     const auto tile_h     = std::max(1, get_window().y().step());
231     auto       leftover_w = dst_w % tile_w;
232 
233     std::string code = "";
234     code += std::string("    int g_ind_0 = GET_SPATIAL_IDX(0, ") + std::to_string(tile_w) + ", " + std::to_string(leftover_w) + ");\n";
235     code += std::string("    int g_ind_1 = GET_SPATIAL_IDX(1, ") + std::to_string(tile_h) + ", " + "0);\n";
236     code += std::string("    int g_ind_2 = GET_SPATIAL_IDX(2, 1, 0);\n\n");
237 
238     code += "    const bool g_cond_x = (g_ind_0 == 0);\n";
239     code += "    const bool g_cond_y = (g_ind_1 == 0);\n";
240 
241     return code;
242 }
write_argument_declaration(const GpuKernelVariableTable::TensorVariable & var) const243 std::string ClTemplateWriter::write_argument_declaration(const GpuKernelVariableTable::TensorVariable &var) const
244 {
245     std::string code;
246     switch(var.kernel_argument_info.type)
247     {
248         case GpuKernelArgumentInfo::Type::Vector:
249         {
250             code += "\n    VECTOR_DECLARATION(" + var.uniq_name + ")";
251             break;
252         }
253         case GpuKernelArgumentInfo::Type::Image:
254         {
255             code += "\n    IMAGE_DECLARATION(" + var.uniq_name + ")";
256             break;
257         }
258         case GpuKernelArgumentInfo::Type::Image_3D:
259         {
260             code += "\n    IMAGE_DECLARATION(" + var.uniq_name + "),";
261             code += "\n    unsigned int " + var.uniq_name + "_stride_z";
262             break;
263         }
264         case GpuKernelArgumentInfo::Type::Image_3D_Export_To_ClImage2D:
265         {
266             code += "\n    __read_only image2d_t " + var.uniq_name + "_img,";
267             code += "\n    unsigned int " + var.uniq_name + "_stride_z";
268             break;
269         }
270         case GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer:
271         {
272             code += "\n    TENSOR4D_T(" + var.uniq_name + ", BUFFER)";
273             break;
274         }
275         case GpuKernelArgumentInfo::Type::Tensor_4D_t_Image:
276         {
277             code += "\n    TENSOR4D_T(" + var.uniq_name + ", IMAGE)";
278             break;
279         }
280         case GpuKernelArgumentInfo::Type::Tensor_3D:
281         {
282             code += "\n    TENSOR3D_DECLARATION(" + var.uniq_name + ")";
283             break;
284         }
285         default:
286         {
287             ARM_COMPUTE_ERROR("Unsupported declaration generation for GpuKernelArgumentInfo::Type");
288         }
289     }
290     return code;
291 }
write_kernel_signature(const GpuKernelVariableTable::VariableList & argument_list) const292 std::string ClTemplateWriter::write_kernel_signature(const GpuKernelVariableTable::VariableList &argument_list) const
293 {
294     std::string code = "\n__kernel void " + write_kernel_name() + "(";
295 
296     for(int i = 0; i < static_cast<int>(argument_list.size()) - 1; ++i)
297     {
298         code += write_argument_declaration(argument_list[i]) + ",";
299     }
300     if(static_cast<int>(argument_list.size()) - 1 >= 0)
301     {
302         code += write_argument_declaration(argument_list[argument_list.size() - 1]);
303     }
304 
305     code += ')';
306 
307     return code;
308 }
write_kernel_name() const309 std::string ClTemplateWriter::write_kernel_name() const
310 {
311     if(_components.empty())
312     {
313         return "empty_kernel";
314     }
315     std::string name = _components.empty() ? "" : _components[0]->template_writer()->get_name();
316     for(size_t i = 1; i < _components.size(); ++i)
317     {
318         name += "___";
319         name += _components[i]->template_writer()->get_name();
320     }
321 
322     return name;
323 }
324 } // namespace dynamic_fusion
325 } // namespace experimental
326 } // namespace arm_compute
327