xref: /aosp_15_r20/external/ComputeLibrary/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.cpp (revision c217d954acce2dbc11938adb493fc0abd69584f3)
1 /*
2  * Copyright (c) 2022-2023 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #include "ClTemplateCast.h"
25 
26 #include "src/core/helpers/WindowHelpers.h"
27 #include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
28 
29 namespace arm_compute
30 {
31 namespace experimental
32 {
33 namespace dynamic_fusion
34 {
ClTemplateCast(ComponentId id,const ArgumentPack<ITensorInfo> & tensors,const Attributes & attributes)35 ClTemplateCast::ClTemplateCast(ComponentId id, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes)
36     : IGpuTemplateComponentWriter{ id, tensors }, _src{}, _dst{}, _attributes{ attributes }
37 {
38     _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
39     _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0);
40 
41     ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _dst);
42 }
43 
get_name() const44 std::string ClTemplateCast::get_name() const
45 {
46     const size_t src_size = data_size_from_type(_src->data_type());
47     const size_t dst_size = data_size_from_type(_dst->data_type());
48 
49     return (src_size >= dst_size) ? "cast_down" : "cast_up";
50 }
51 
get_component_code(const ComponentGroup & comp_group) const52 std::string ClTemplateCast::get_component_code(const ComponentGroup &comp_group) const
53 {
54     ARM_COMPUTE_UNUSED(comp_group);
55 
56     const std::string kernel_name = get_name();
57     const auto        is_root     = (comp_group.get_root_component()->id() == this->id());
58 
59     std::string code = R"_(
60 //------------------ START KERNEL {{meta_kernel_id}} CAST ---------------------
61 )_";
62 
63     if(is_root)
64     {
65         code += R"_(
66 // IN_0(src)            {{src}}
67 // OUT(dst, accum)      {{dst}}
68 
69 TILE(uint, M0, 1, g_dst_indirect_y);
70 {
71     {{src}}_offset_first_element_in_bytes += get_global_id(2) * {{src}}_stride_z;
72 
73     TILE({{DATA_TYPE_IN}}, M0, N0, {{tmp}});
74     T_LOAD({{DATA_TYPE_IN}}, M0, N0, BUFFER, {{src}}, g_ind_0, g_ind_1, 1, {{src}}_stride_y, {{tmp}});
75 )_";
76     }
77 
78     code += R"_(
79     LOOP_UNROLLING(int, m0, 0, 1, M0,
80     {
81 )_";
82 
83     if(kernel_name == "cast_down" && is_data_type_quantized(_src->data_type()))
84     {
85         code += R"_(
86     {{tmp}}[m0].v ^= (VEC_DATA_TYPE({{DATA_TYPE_IN}}, N0))0x80;
87 )_";
88     }
89 
90     if(kernel_name == "cast_down" && (is_data_type_float(_src->data_type()) || _attributes.convert_policy() == ConvertPolicy::SATURATE))
91     {
92         code += R"_(
93     {{dst}}[m0].v = CONVERT_SAT({{tmp}}[m0].v, VEC_DATA_TYPE({{DATA_TYPE_OUT}}, N0));
94 )_";
95     }
96     else
97     {
98         code += R"_(
99     {{dst}}[m0].v = CONVERT({{tmp}}[m0].v, VEC_DATA_TYPE({{DATA_TYPE_OUT}}, N0));
100 )_";
101     }
102 
103     code += R"_(
104     })
105 )_";
106 
107     if(is_root)
108     {
109         code += R"_(
110     LOOP_UNROLLING(int, i, 0, 1, M0,
111     {
112         g_dst_indirect_y[i].v = (uint)min((int)(g_ind_1 + i), (int)({{arg_dst}}_w) - 1);
113         g_dst_indirect_y[i].v += (int)(g_ind_2 % {{arg_dst}}_h) * (int)({{arg_dst}}_w);
114         g_dst_indirect_y[i].v += (int)(g_ind_2 / {{arg_dst}}_h) * (int)({{arg_dst}}_w * {{arg_dst}}_h);
115     })
116 }
117 )_";
118     }
119 
120     code += R"_(
121 //------------------ END KERNEL {{meta_kernel_id}} CAST ---------------------
122 )_";
123 
124     return code;
125 }
126 
declare_variables(GpuKernelVariableTable & vtable,const ComponentGroup & comp_group) const127 void ClTemplateCast::declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
128 {
129     vtable.declare_variable(
130         comp_group,
131         _src,
132         GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
133         "src");
134 
135     vtable.declare_variable(
136         comp_group,
137         _dst,
138         GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
139         "dst");
140 }
141 
get_tag_lut(const GpuKernelVariableTable & vtable,const ComponentGroup & comp_group) const142 TagLUT ClTemplateCast::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
143 {
144     const auto is_root = (comp_group.get_root_component()->id() == this->id());
145 
146     TagLUT lut{};
147 
148     // Arguments and global shared variables
149     lut["src"] = vtable.get_variable(_src);
150     lut["dst"] = vtable.get_variable(_dst);
151     lut["tmp"] = (is_root) ? lut["src"].value + "_in_data" : lut["src"];
152 
153     const auto dst_argument = vtable.get_variable(comp_group.get_any_dst_tensor());
154     lut["arg_dst"]          = dst_argument.uniq_name;
155 
156     // Local build options
157     lut["meta_kernel_id"] = id();
158 
159     lut["DATA_TYPE_IN"]  = get_cl_type_from_data_type(_src->data_type());
160     lut["DATA_TYPE_OUT"] = get_cl_type_from_data_type(_dst->data_type());
161 
162     return lut;
163 }
164 
get_build_options(const ComponentGroup & comp_group) const165 CLBuildOptions ClTemplateCast::get_build_options(const ComponentGroup &comp_group) const
166 {
167     ARM_COMPUTE_UNUSED(comp_group);
168 
169     const auto         root_window = comp_group.get_root_component()->template_writer()->get_window();
170     const unsigned int n0          = root_window.x().step();
171     const unsigned int m0          = root_window.y().step();
172 
173     // Set build options
174     CLBuildOptions build_opts{};
175     build_opts.add_option("-DN0=" + support::cpp11::to_string(n0));
176     build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(_src->dimension(0) % n0));
177     build_opts.add_option("-DM0=" + support::cpp11::to_string(m0));
178 
179     return build_opts;
180 }
181 
get_config_id() const182 std::string ClTemplateCast::get_config_id() const
183 {
184     std::string config_id{};
185 
186     config_id += "_";
187     config_id += lower_string(string_from_data_type(_src->data_type()));
188     config_id += "_";
189     config_id += lower_string(string_from_data_type(_dst->data_type()));
190     config_id += "_";
191     config_id += support::cpp11::to_string(_src->dimension(0));
192     config_id += "_";
193     config_id += support::cpp11::to_string(_src->dimension(1));
194 
195     return config_id;
196 }
197 
get_headers_list() const198 std::set<std::string> ClTemplateCast::get_headers_list() const
199 {
200     return std::set<std::string>{ "helpers.h", "tile_helpers.h" };
201 }
202 
get_window() const203 Window ClTemplateCast::get_window() const
204 {
205     ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized");
206 
207     const unsigned int n0  = adjust_vec_size(16 / _dst->element_size(), _dst->dimension(0));
208     Window             win = calculate_max_window(*_dst, Steps(n0));
209     return win.collapse(win, Window::DimZ);
210 }
211 
212 } // namespace dynamic_fusion
213 } // namespace experimental
214 } // namespace arm_compute
215