1 /* 2 * Copyright (c) 2016-2022 Arm Limited. 3 * 4 * SPDX-License-Identifier: MIT 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to 8 * deal in the Software without restriction, including without limitation the 9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 10 * sell copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in all 14 * copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 */ 24 #ifndef ARM_COMPUTE_CLHELPERS_H 25 #define ARM_COMPUTE_CLHELPERS_H 26 27 #include "arm_compute/core/CL/CLTypes.h" 28 #include "arm_compute/core/CL/OpenCL.h" 29 #include "arm_compute/core/Types.h" 30 31 #include <set> 32 #include <string> 33 34 namespace arm_compute 35 { 36 class CLCompileContext; 37 class CLBuildOptions; 38 39 enum class DataType; 40 41 /** Max vector width of an OpenCL vector */ 42 static constexpr unsigned int max_cl_vector_width = 16; 43 44 /** Max number of manual loop unrolling */ 45 static constexpr int max_manual_loop_unrolling = 128; 46 47 /** Translates a tensor data type to the appropriate OpenCL type. 48 * 49 * @param[in] dt @ref DataType to be translated to OpenCL type. 50 * 51 * @return The string specifying the OpenCL type to be used. 52 */ 53 std::string get_cl_type_from_data_type(const DataType &dt); 54 55 /** Translates a tensor data type to the appropriate OpenCL promoted type. 56 * 57 * @param[in] dt @ref DataType to be used to get the promoted OpenCL type. 58 * 59 * @return The string specifying the OpenCL type to be used. 60 */ 61 std::string get_cl_promoted_type_from_data_type(const DataType &dt); 62 63 /** Translates the element size to an unsigned integer data type 64 * 65 * @param[in] element_size Size in bytes of an element. 66 * 67 * @return The string specifying the OpenCL type to be used. 68 */ 69 std::string get_cl_unsigned_type_from_element_size(size_t element_size); 70 71 /** Translates the element size to an signed integer data type 72 * 73 * @param[in] element_size Size in bytes of an element. 74 * 75 * @return The string specifying the OpenCL type to be used. 76 */ 77 std::string get_cl_signed_type_from_element_size(size_t element_size); 78 79 /** Translates a tensor data type to the appropriate OpenCL select type. 80 * 81 * @param[in] dt @ref DataType to be translated to OpenCL select type. 82 * 83 * @return The string specifying the OpenCL select type to be used. 84 */ 85 std::string get_cl_select_type_from_data_type(const DataType &dt); 86 87 /** Translates a tensor data type to the appropriate OpenCL dot8 accumulator type. 88 * 89 * @param[in] dt @ref DataType to be translated to OpenCL dot8 accumulator type. 90 * 91 * @return The string specifying the OpenCL dot8 accumulator type to be used. 92 */ 93 std::string get_cl_dot8_acc_type_from_data_type(const DataType &dt); 94 95 /** Get the size of a data type in number of bits. 96 * 97 * @param[in] dt @ref DataType. 98 * 99 * @return Number of bits in the data type specified. 100 */ 101 std::string get_data_size_from_data_type(const DataType &dt); 102 103 /** Helper function to get the GPU target from CL device 104 * 105 * @param[in] device A CL device 106 * 107 * @return the GPU target 108 */ 109 GPUTarget get_target_from_device(const cl::Device &device); 110 111 /** Helper function to get the highest OpenCL version supported 112 * 113 * @param[in] device A CL device 114 * 115 * @return the highest OpenCL version supported 116 */ 117 CLVersion get_cl_version(const cl::Device &device); 118 119 /** Helper function to get the cl_image pitch alignment in pixels 120 * 121 * @param[in] device A CL device 122 * 123 * @return the cl_image pitch alignment in pixels. If an error occurs, the function will return 0 124 */ 125 size_t get_cl_image_pitch_alignment(const cl::Device &device); 126 127 /** Helper function to check whether non-uniform work group is supported 128 * 129 * @param[in] device A CL device 130 * 131 * @return True if the feature is supported 132 */ 133 bool get_cl_non_uniform_work_group_supported(const cl::Device &device); 134 135 /** Helper function to check whether a given extension is supported 136 * 137 * @param[in] device A CL device 138 * @param[in] extension_name Name of the extension to be checked 139 * 140 * @return True if the extension is supported 141 */ 142 bool device_supports_extension(const cl::Device &device, const char *extension_name); 143 144 /** Helper function to check whether the cl_khr_fp16 extension is supported 145 * 146 * @param[in] device A CL device 147 * 148 * @return True if the extension is supported 149 */ 150 bool fp16_supported(const cl::Device &device); 151 /** Helper function to check whether the arm_non_uniform_work_group_size extension is supported 152 * 153 * @param[in] device A CL device 154 * 155 * @return True if the extension is supported 156 */ 157 bool arm_non_uniform_workgroup_supported(const cl::Device &device); 158 /** Helper function to check whether the cl_arm_integer_dot_product_int8 extension is supported 159 * 160 * @param[in] device A CL device 161 * 162 * @return True if the extension is supported 163 */ 164 bool dot8_supported(const cl::Device &device); 165 166 /** Helper function to check whether the cl_arm_integer_dot_product_accumulate_int8 extension is supported 167 * 168 * @param[in] device A CL device 169 * 170 * @return True if the extension is supported 171 */ 172 bool dot8_acc_supported(const cl::Device &device); 173 174 /** This function checks if the Winograd configuration (defined through the output tile, kernel size and the data layout) is supported on OpenCL 175 * 176 * @param[in] output_tile Output tile for the Winograd filtering algorithm 177 * @param[in] kernel_size Kernel size for the Winograd filtering algorithm 178 * @param[in] data_layout Data layout of the input tensor 179 * 180 * @return True if the configuration is supported 181 */ 182 bool cl_winograd_convolution_layer_supported(const Size2D &output_tile, const Size2D &kernel_size, DataLayout data_layout); 183 184 /** Helper function to get the preferred native vector width size for built-in scalar types that can be put into vectors 185 * 186 * @param[in] device A CL device 187 * @param[in] dt data type 188 * 189 * @return preferred vector width 190 */ 191 size_t preferred_vector_width(const cl::Device &device, DataType dt); 192 193 /** Helper function to check if "dummy work-items" are preferred to have a power of two NDRange 194 * In case dummy work-items is enabled, it is OpenCL kernel responsibility to check if the work-item is out-of range or not 195 * 196 * @param[in] device A CL device 197 * 198 * @return True if dummy work-items should be preferred to dispatch the NDRange 199 */ 200 bool preferred_dummy_work_items_support(const cl::Device &device); 201 202 /** Helper function to check whether the cl_khr_image2d_from_buffer extension is supported 203 * 204 * @param[in] device A CL device 205 * 206 * @return True if the extension is supported 207 */ 208 bool image2d_from_buffer_supported(const cl::Device &device); 209 210 /** Creates an opencl kernel using a compile context 211 * 212 * @param[in] ctx A compile context to be used to create the opencl kernel. 213 * @param[in] kernel_name The kernel name. 214 * @param[in] build_opts The build options to be used for the opencl kernel compilation. 215 * 216 * @return An opencl kernel 217 */ 218 cl::Kernel create_kernel(const CLCompileContext &ctx, const std::string &kernel_name, const std::set<std::string> &build_opts = std::set<std::string>()); 219 220 /** Creates a suitable LWS hint object for parallel implementations. Sets the number of WG based on the input size. 221 * If input width is smaller than 128 we can use fewer threads than 8. 222 * 223 * @param[in] input_dimension number of elements along the dimension to apply the parallellization 224 * @param[in] vector_size size of the vector in OpenCL 225 * 226 * @return An LWS hint object 227 */ 228 cl::NDRange create_lws_hint_parallel_implementations(unsigned int input_dimension, unsigned int vector_size); 229 230 /* Helper function to check if the workgroup batch size modifier parameter is supported on the cl device 231 * 232 * @param[in] device cl device to check for support 233 * 234 * @return true if the workgroup batch size modifier parameter is supported, false otherwise 235 */ 236 bool get_wbsm_support_info(const cl::Device &device); 237 238 /* Helper function to set the workgroup batch size modifier parameter in the kernel 239 * 240 * @param[in] kernel cl kernel to set the workgroup batch size modifier parameter 241 * @param[in] wbsm_hint workgroup batch size modifier to use 242 */ 243 void set_wbsm(cl::Kernel &kernel, cl_int wbsm_hint); 244 245 /* Helper function to check if we can export the tensor to cl_image 246 * 247 * @param[in] input tensor 248 * 249 * @return true if we can export the tensor to cl_image 250 */ 251 bool export_to_cl_image(const ITensorInfo *tensor); 252 253 /* Helper function to force unroll with pragma when any of the input values (iterations) are greater than @ref max_manual_loop_unrolling 254 * 255 * This function passes UNROLL_WITH_PRAGMA at compile time when any of the input values are greater than @ref max_manual_loop_unrolling 256 * 257 * @param[in] built_opts OpenCL kernel build options 258 * @param[in] values Input values (iterations) 259 * 260 */ 261 void set_unroll_with_pragma(CLBuildOptions &built_opts, std::initializer_list<int> values); 262 263 /** Helper function to check whether the cl_arm_matrix_multiply extension is supported 264 * 265 * @param[in] device A CL device 266 * 267 * @return True if the extension is supported 268 */ 269 bool arm_matrix_multiply_supported(const cl::Device &device); 270 } // namespace arm_compute 271 #endif /* ARM_COMPUTE_CLHELPERS_H */ 272