xref: /aosp_15_r20/external/ComputeLibrary/src/gpu/cl/kernels/gemm/ClGemmHelpers.h (revision c217d954acce2dbc11938adb493fc0abd69584f3)
1 /*
2  * Copyright (c) 2019-2022 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifndef ARM_COMPUTE_CL_GEMM_HELPERS_H
25 #define ARM_COMPUTE_CL_GEMM_HELPERS_H
26 
27 #include "arm_compute/core/TensorInfo.h"
28 #include "arm_compute/core/Types.h"
29 
30 namespace arm_compute
31 {
32 namespace opencl
33 {
34 namespace kernels
35 {
36 namespace gemm
37 {
38 /** Configure @ref GEMMLHSMatrixInfo and @ref GEMMRHSMatrixInfo
39  *
40  * @param[in] m                  Number of rows (M) in the LHS matrix not reshaped
41  * @param[in] n                  Number of columns (N) in the RHS matrix not reshaped
42  * @param[in] m0                 Number of rows processed by each thread/work-item
43  * @param[in] n0                 Number of columns processed by each thread/work-item
44  * @param[in] k0                 Number of inner accumulation performed by each thread/work-item
45  * @param[in] v0                 Number of vertical blocks of size (m0xk0) stored on the same output row
46  * @param[in] h0                 Number of horizontal blocks of size (k0xn0) stored on the same output row
47  * @param[in] lhs_interleave     True if the v0 (m0xk0) blocks have to be interleaved in the output row
48  * @param[in] rhs_interleave     True if the h0 (k0xn0) blocks have to be interleaved in the output row
49  * @param[in] lhs_transpose      True if the (m0xk0) block has to be transposed before been stored
50  * @param[in] rhs_transpose      True if the (k0xn0) block has to be transposed before been stored
51  * @param[in] export_to_cl_image (Optional) True if the RHS reshaped matrix has to be exported to cl_image
52  *
53  * @return @ref GEMMLHSMatrixInfo and @ref GEMMRHSMatrixInfo
54  */
55 std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_lhs_rhs_info(unsigned int m, unsigned int n, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int v0, unsigned int h0,
56                                                                        bool lhs_interleave, bool rhs_interleave, bool lhs_transpose, bool rhs_transpose, bool export_to_cl_image = false);
57 
58 /** Select @ref GEMMLHSMatrixInfo and @ref GEMMRHSMatrixInfo
59  *
60  * This function accepts two pairs of GEMMLHSMatrixInfo/GEMMRHSMatrixInfo where only the first is with cl_image2d support,
61  * and selects the valid one validating the GEMMRHSMatrixInfo. If the validation passes, the functions will return
62  * the first GEMMLHSMatrixInfo/GEMMRHSMatrixInfo pair with cl_image2d support.
63  *
64  * @param[in] info_img  GEMMLHSMatrixInfo/GEMMRHSMatrixInfo with cl_image2d support
65  * @param[in] info_buf  GEMMLHSMatrixInfo/GEMMRHSMatrixInfo to fall-back if cl_image2d cannot be used
66  * @param[in] n         Number of columns (N) in the RHS matrix not reshaped
67  * @param[in] k         Number of rows (K) in the RHS matrix not reshaped
68  * @param[in] b         Batch size
69  * @param[in] data_type Data type
70  *
71  * @return @ref GEMMLHSMatrixInfo and @ref GEMMRHSMatrixInfo
72  */
73 std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> select_lhs_rhs_info(std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> info_img,
74                                                                     std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> info_buf,
75                                                                     unsigned int n, unsigned int k, unsigned int b, DataType data_type);
76 
77 /** Update padding required to export the OpenCL buffer to OpenCL image2d
78  *
79  * @param[in,out] tensor ITensorInfo of the tensor required to be exported to OpenCL image2d
80  */
81 void update_padding_for_cl_image(ITensorInfo *tensor);
82 
83 /** Utility function to validate the image2d OpenCL object support on the RHS reshaped matrix
84  *
85  * @param[in] tensor_reshaped_info TensorInfo for the RHS reshaped matrix
86  * @param[in] rhs_info             @ref GEMMRHSMatrixInfo
87  *
88  * @return Status reporting if we can use the image2d OpenCL object on the RHS reshaped matrix
89  */
90 Status validate_image2d_support_on_rhs(const ITensorInfo &tensor_reshaped_info, const GEMMRHSMatrixInfo &rhs_info);
91 
92 /** Determine if the MMUL kernels should be preferred
93  *
94  * @param[in]      m         Number of rows of the LHS matrix
95  * @param[in]      n         Number of columns of the RHS matrix
96  * @param[in]      k         Number of columns of the LHS matrix, rows of the RHS matrix
97  * @param[in]      b         Batch size
98  * @param[in]      data_type Data type FP32/FP16
99  * @param[in, out] best_m0   Suggested M0 (number of rows of the output block) for the kernel
100  * @param[in, out] best_n0   Suggested N0 (number of columns of the output block) for the kernel
101  *
102  * @return true if MMUL kernel is preferred over kernels w/o MMUL, false otherwise
103  */
104 bool is_mmul_kernel_preferred(const unsigned int m, const unsigned int n, const unsigned int k, const unsigned int b,
105                               const DataType data_type, unsigned int &best_m0, unsigned int &best_n0);
106 } // namespace gemm
107 } // namespace kernels
108 } // namespace opencl
109 } // namespace arm_compute
110 #endif /* ARM_COMPUTE_CL_GEMM_HELPERS_H */
111