xref: /aosp_15_r20/external/ComputeLibrary/src/core/CL/cl_kernels/repeat.h (revision c217d954acce2dbc11938adb493fc0abd69584f3)
1 /*
2  * Copyright (c) 2019-2020 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifndef ARM_COMPUTE_REPEAT_H
25 #define ARM_COMPUTE_REPEAT_H
26 
27 #include "helpers.h"
28 
29 /** Macros that help in loop unrolling */
30 //Repeat macros with 3 param, excluding the implicit ID param
31 #define REPEAT_3_1(P_X, P_A, P_B, P_C) P_X##_DEF(0, P_A, P_B, P_C)
32 #define REPEAT_3_2(P_X, P_A, P_B, P_C) \
33     P_X##_DEF(1, P_A, P_B, P_C);       \
34     REPEAT_3_1(P_X, P_A, P_B, P_C)
35 #define REPEAT_3_3(P_X, P_A, P_B, P_C) \
36     P_X##_DEF(2, P_A, P_B, P_C);       \
37     REPEAT_3_2(P_X, P_A, P_B, P_C)
38 #define REPEAT_3_4(P_X, P_A, P_B, P_C) \
39     P_X##_DEF(3, P_A, P_B, P_C);       \
40     REPEAT_3_3(P_X, P_A, P_B, P_C)
41 #define REPEAT_3_5(P_X, P_A, P_B, P_C) \
42     P_X##_DEF(4, P_A, P_B, P_C);       \
43     REPEAT_3_4(P_X, P_A, P_B, P_C)
44 #define REPEAT_3_6(P_X, P_A, P_B, P_C) \
45     P_X##_DEF(5, P_A, P_B, P_C);       \
46     REPEAT_3_5(P_X, P_A, P_B, P_C)
47 #define REPEAT_3_7(P_X, P_A, P_B, P_C) \
48     P_X##_DEF(6, P_A, P_B, P_C);       \
49     REPEAT_3_6(P_X, P_A, P_B, P_C)
50 #define REPEAT_3_8(P_X, P_A, P_B, P_C) \
51     P_X##_DEF(7, P_A, P_B, P_C);       \
52     REPEAT_3_7(P_X, P_A, P_B, P_C)
53 #define REPEAT_3_9(P_X, P_A, P_B, P_C) \
54     P_X##_DEF(8, P_A, P_B, P_C);       \
55     REPEAT_3_8(P_X, P_A, P_B, P_C)
56 #define REPEAT_3_10(P_X, P_A, P_B, P_C) \
57     P_X##_DEF(9, P_A, P_B, P_C);        \
58     REPEAT_3_9(P_X, P_A, P_B, P_C)
59 #define REPEAT_3_11(P_X, P_A, P_B, P_C) \
60     P_X##_DEF(A, P_A, P_B, P_C);        \
61     REPEAT_3_10(P_X, P_A, P_B, P_C)
62 #define REPEAT_3_12(P_X, P_A, P_B, P_C) \
63     P_X##_DEF(B, P_A, P_B, P_C);        \
64     REPEAT_3_11(P_X, P_A, P_B, P_C)
65 #define REPEAT_3_13(P_X, P_A, P_B, P_C) \
66     P_X##_DEF(C, P_A, P_B, P_C);        \
67     REPEAT_3_12(P_X, P_A, P_B, P_C)
68 #define REPEAT_3_14(P_X, P_A, P_B, P_C) \
69     P_X##_DEF(D, P_A, P_B, P_C);        \
70     REPEAT_3_13(P_X, P_A, P_B, P_C)
71 #define REPEAT_3_15(P_X, P_A, P_B, P_C) \
72     P_X##_DEF(E, P_A, P_B, P_C);        \
73     REPEAT_3_14(P_X, P_A, P_B, P_C)
74 #define REPEAT_3_16(P_X, P_A, P_B, P_C) \
75     P_X##_DEF(F, P_A, P_B, P_C);        \
76     REPEAT_3_15(P_X, P_A, P_B, P_C)
77 
78 #define REPEAT_DEF_3_N(P_NUM, P_OP, P_A, P_B, P_C) REPEAT_3_##P_NUM(P_OP, P_A, P_B, P_C) //One level of indirection to ensure order of expansion does not affect preprocessing P_NUM
79 #define REPEAT_3_N(P_NUM, P_OP, P_A, P_B, P_C) REPEAT_DEF_3_N(P_NUM, P_OP, P_A, P_B, P_C)
80 
81 // Repeat macros with 4 param, excluding the implicit ID param
82 #define REPEAT_4_1(P_X, P_A, P_B, P_C, P_D) P_X##_DEF(0, P_A, P_B, P_C, P_D)
83 #define REPEAT_4_2(P_X, P_A, P_B, P_C, P_D) \
84     P_X##_DEF(1, P_A, P_B, P_C, P_D);       \
85     REPEAT_4_1(P_X, P_A, P_B, P_C, P_D)
86 #define REPEAT_4_3(P_X, P_A, P_B, P_C, P_D) \
87     P_X##_DEF(2, P_A, P_B, P_C, P_D);       \
88     REPEAT_4_2(P_X, P_A, P_B, P_C, P_D)
89 #define REPEAT_4_4(P_X, P_A, P_B, P_C, P_D) \
90     P_X##_DEF(3, P_A, P_B, P_C, P_D);       \
91     REPEAT_4_3(P_X, P_A, P_B, P_C, P_D)
92 #define REPEAT_4_5(P_X, P_A, P_B, P_C, P_D) \
93     P_X##_DEF(4, P_A, P_B, P_C, P_D);       \
94     REPEAT_4_4(P_X, P_A, P_B, P_C, P_D)
95 #define REPEAT_4_6(P_X, P_A, P_B, P_C, P_D) \
96     P_X##_DEF(5, P_A, P_B, P_C, P_D);       \
97     REPEAT_4_5(P_X, P_A, P_B, P_C, P_D)
98 #define REPEAT_4_7(P_X, P_A, P_B, P_C, P_D) \
99     P_X##_DEF(6, P_A, P_B, P_C, P_D);       \
100     REPEAT_4_6(P_X, P_A, P_B, P_C, P_D)
101 #define REPEAT_4_8(P_X, P_A, P_B, P_C, P_D) \
102     P_X##_DEF(7, P_A, P_B, P_C, P_D);       \
103     REPEAT_4_7(P_X, P_A, P_B, P_C, P_D)
104 #define REPEAT_4_9(P_X, P_A, P_B, P_C, P_D) \
105     P_X##_DEF(8, P_A, P_B, P_C, P_D);       \
106     REPEAT_4_8(P_X, P_A, P_B, P_C, P_D)
107 #define REPEAT_4_10(P_X, P_A, P_B, P_C, P_D) \
108     P_X##_DEF(9, P_A, P_B, P_C, P_D);        \
109     REPEAT_4_9(P_X, P_A, P_B, P_C, P_D)
110 #define REPEAT_4_11(P_X, P_A, P_B, P_C, P_D) \
111     P_X##_DEF(A, P_A, P_B, P_C, P_D);        \
112     REPEAT_4_10(P_X, P_A, P_B, P_C, P_D)
113 #define REPEAT_4_12(P_X, P_A, P_B, P_C, P_D) \
114     P_X##_DEF(B, P_A, P_B, P_C, P_D);        \
115     REPEAT_4_11(P_X, P_A, P_B, P_C, P_D)
116 #define REPEAT_4_13(P_X, P_A, P_B, P_C, P_D) \
117     P_X##_DEF(C, P_A, P_B, P_C, P_D);        \
118     REPEAT_4_12(P_X, P_A, P_B, P_C, P_D)
119 #define REPEAT_4_14(P_X, P_A, P_B, P_C, P_D) \
120     P_X##_DEF(D, P_A, P_B, P_C, P_D);        \
121     REPEAT_4_13(P_X, P_A, P_B, P_C, P_D)
122 #define REPEAT_4_15(P_X, P_A, P_B, P_C, P_D) \
123     P_X##_DEF(E, P_A, P_B, P_C, P_D);        \
124     REPEAT_4_14(P_X, P_A, P_B, P_C, P_D)
125 #define REPEAT_4_16(P_X, P_A, P_B, P_C, P_D) \
126     P_X##_DEF(F, P_A, P_B, P_C, P_D);        \
127     REPEAT_4_15(P_X, P_A, P_B, P_C, P_D)
128 
129 #define REPEAT_DEF_4_N(P_NUM, P_OP, P_A, P_B, P_C, P_D) REPEAT_4_##P_NUM(P_OP, P_A, P_B, P_C, P_D) //One level of indirection to ensure order of expansion does not affect preprocessing P_NUM
130 #define REPEAT_4_N(P_NUM, P_OP, P_A, P_B, P_C, P_D) REPEAT_DEF_4_N(P_NUM, P_OP, P_A, P_B, P_C, P_D)
131 
132 // Macro for initializing N variables. Generates N statements that defines VAR##N = RHS_ACCESSOR_DEF(...)
133 #define VAR_INIT_TO_CONST_DEF(ID, TYPE, VAR, VAL) TYPE VAR##ID = VAL
134 #define REPEAT_VAR_INIT_TO_CONST(N, TYPE, VAR, VAL) REPEAT_3_N(N, VAR_INIT_TO_CONST, TYPE, VAR, VAL)
135 
136 // Macro for initializing N variables by converting the data type. Generates N statements that defines VAR##N = RHS_ACCESSOR_DEF(...)
137 #define VAR_INIT_CONVERT_DEF(ID, TYPE_OUT, VAR_IN, VAR_OUT) TYPE_OUT VAR_OUT##ID = CONVERT(VAR_IN##ID, TYPE_OUT)
138 #define REPEAT_VAR_INIT_CONVERT(N, TYPE_OUT, VAR_IN, VAR_OUT) REPEAT_3_N(N, VAR_INIT_CONVERT, TYPE_OUT, VAR_IN, VAR_OUT)
139 
140 // Macro for initializing N variables by converting the data type with saturation. Generates N statements that defines VAR##N = RHS_ACCESSOR_DEF(...)
141 #define VAR_INIT_CONVERT_SAT_DEF(ID, TYPE_OUT, VAR_IN, VAR_OUT) TYPE_OUT VAR_OUT##ID = CONVERT_SAT(VAR_IN##ID, TYPE_OUT)
142 #define REPEAT_VAR_INIT_CONVERT_SAT(N, TYPE_OUT, VAR_IN, VAR_OUT) REPEAT_3_N(N, VAR_INIT_CONVERT_SAT, TYPE_OUT, VAR_IN, VAR_OUT)
143 
144 // Macro for adding a constant to N variables. Generates N statements that defines VAR##N =RHS_ACCESSOR_DEF(...)
145 #define ADD_CONST_TO_VAR_DEF(ID, TYPE, VAR, VAL) VAR##ID += (TYPE)VAL
146 #define REPEAT_ADD_CONST_TO_VAR(N, TYPE, VAR, VAL) REPEAT_3_N(N, ADD_CONST_TO_VAR, TYPE, VAR, VAL)
147 
148 // Macro for multiplying N variables (VAR_B) by a constant (VAL) and adding to other N variables (VAR_A). Generates N statements that defines VAR_A##N =RHS_ACCESSOR_DEF(...)
149 #define MLA_VAR_WITH_CONST_VEC_DEF(ID, VAR_A, VAR_B, VAL) VAR_A##ID += VAR_B##ID * VAL
150 #define REPEAT_MLA_VAR_WITH_CONST_VEC(N, VAR_A, VAR_B, VAL) REPEAT_3_N(N, MLA_VAR_WITH_CONST_VEC, VAR_A, VAR_B, VAL)
151 
152 // Macro for adding a vector to N-variables. Generates N statements that defines VAR##N =RHS_ACCESSOR_DEF(...)
153 #define ADD_VECTOR_TO_VAR_DEF(ID, TYPE, VAR, VEC) VAR##ID += VEC
154 #define REPEAT_ADD_VECTOR_TO_VAR(N, VAR, VEC) REPEAT_3_N(N, ADD_VECTOR_TO_VAR, "", VAR, VEC)
155 
156 // Macro for adding a two N-variables. Generates N statements that defines VAR##N =RHS_ACCESSOR_DEF(...)
157 #define ADD_TWO_VARS_DEF(ID, TYPE, VAR_A, VAR_B) VAR_A##ID += VAR_B##ID
158 #define REPEAT_ADD_TWO_VARS(N, VAR_A, VAR_B) REPEAT_3_N(N, ADD_TWO_VARS, "", VAR_A, VAR_B)
159 
160 // Macro for performing Max between a constant and N variables. Generates N statements that defines VAR##N =RHS_ACCESSOR_DEF(...)
161 #define MAX_CONST_VAR_DEF(ID, TYPE, VAR, VAL) VAR##ID = max(VAR##ID, (TYPE)VAL)
162 #define REPEAT_MAX_CONST_VAR(N, TYPE, VAR, VAL) REPEAT_3_N(N, MAX_CONST_VAR, TYPE, VAR, VAL)
163 
164 // Macro for performing Min between a constant and N variables. Generates N statements that defines VAR##N =RHS_ACCESSOR_DEF(...)
165 #define MIN_CONST_VAR_DEF(ID, TYPE, VAR, VAL) VAR##ID = min(VAR##ID, (TYPE)VAL)
166 #define REPEAT_MIN_CONST_VAR(N, TYPE, VAR, VAL) REPEAT_3_N(N, MIN_CONST_VAR, TYPE, VAR, VAL)
167 
168 // Macro for performing ASYMM_MULT_BY_QUANT_MULTIPLIER_GREATER_THAN_ONE to N variables. Generates N statements that defines VAR##N =RHS_ACCESSOR_DEF(...)
169 #define ASYMM_MULT_BY_QUANT_MULTIPLIER_GREATER_THAN_ONE_DEF(ID, SIZE, VAR, RES_MUL, RES_SHIFT) VAR##ID = ASYMM_MULT_BY_QUANT_MULTIPLIER_GREATER_THAN_ONE(VAR##ID, RES_MUL, RES_SHIFT, SIZE)
170 #define REPEAT_ASYMM_MULT_BY_QUANT_MULTIPLIER_GREATER_THAN_ONE(N, SIZE, VAR, RES_MUL, RES_SHIFT) REPEAT_4_N(N, ASYMM_MULT_BY_QUANT_MULTIPLIER_GREATER_THAN_ONE, SIZE, VAR, RES_MUL, RES_SHIFT)
171 
172 // Macro for performing ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE to N variables. Generates N statements that defines VAR##N =RHS_ACCESSOR_DEF(...)
173 #define ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE_DEF(ID, SIZE, VAR, RES_MUL, RES_SHIFT) VAR##ID = ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(VAR##ID, RES_MUL, RES_SHIFT, SIZE)
174 #define REPEAT_ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(N, SIZE, VAR, RES_MUL, RES_SHIFT) REPEAT_4_N(N, ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE, SIZE, VAR, RES_MUL, RES_SHIFT)
175 
176 // Macro for performing per-channel ASYMM_MULT_BY_QUANT_MULTIPLIER to N variables.
177 #define ASYMM_MULT_BY_QUANT_MULTIPLIER_PER_CHANNEL_DEF(ID, SIZE, VAR, RES_MUL, RES_SHIFT)                     \
178     ({                                                                                                        \
179         VEC_DATA_TYPE(int, N0)                                                                                \
180         VAR##ID_shift_lt0 = ASYMM_MULT_BY_QUANT_MULTIPLIER_GREATER_THAN_ONE(VAR##ID, RES_MUL, RES_SHIFT, N0); \
181         VEC_DATA_TYPE(int, N0)                                                                                \
182         VAR##ID_shift_gt0 = ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(VAR##ID, RES_MUL, RES_SHIFT, N0);    \
183         VAR##ID           = select(VAR##ID_shift_lt0, VAR##ID_shift_gt0, RES_SHIFT >= 0);                     \
184     })
185 #define REPEAT_ASYMM_MULT_BY_QUANT_MULTIPLIER_PER_CHANNEL(N, SIZE, VAR, RES_MUL, RES_SHIFT) REPEAT_4_N(N, ASYMM_MULT_BY_QUANT_MULTIPLIER_PER_CHANNEL, SIZE, VAR, RES_MUL, RES_SHIFT)
186 
187 #endif // ARM_COMPUTE_REPEAT_H
188