xref: /aosp_15_r20/external/mesa3d/src/compiler/nir/nir_lower_amul.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2019 Google, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 #include "nir.h"
25 #include "nir_vla.h"
26 
27 /* Lowering for amul instructions, for drivers that support imul24.
28  * This pass will analyze indirect derefs, and convert corresponding
29  * amul instructions to either imul or imul24, depending on the
30  * required range.
31  *
32  * 1) Analyze the uniform variables and build a table of UBOs and SSBOs
33  *    that are either too large, or might be too large (unknown size)
34  *    for imul24
35  *
36  * 2) Loop thru looking at all the intrinsics, finding dereferences of
37  *    large variables, and recursively replacing all amul instructions
38  *    used with imul
39  *
40  * 3) Finally loop again thru all instructions replacing any remaining
41  *    amul with imul24.  At this point any remaining amul instructions
42  *    are not involved in calculating an offset into a large variable,
43  *    thanks to the 2nd step, so they can be safely replace with imul24.
44  *
45  * Using two passes over all the instructions lets us handle the case
46  * where, due to CSE, an amul is used to calculate an offset into both
47  * a large and small variable.
48  */
49 
50 typedef struct {
51    nir_shader *shader;
52 
53    int (*type_size)(const struct glsl_type *, bool);
54 
55    /* Tables of UBOs and SSBOs mapping driver_location/base whether
56     * they are too large to use imul24:
57     */
58    bool *large_ubos;
59    bool *large_ssbos;
60 
61    /* for cases that we cannot determine UBO/SSBO index, track if *any*
62     * UBO/SSBO is too large for imul24:
63     */
64    bool has_large_ubo;
65    bool has_large_ssbo;
66 
67    unsigned max_slot;
68 
69    bool progress;
70 } lower_state;
71 
72 /* Lower 'amul's in offset src of large variables to 'imul': */
73 static bool
lower_large_src(nir_src * src,void * s)74 lower_large_src(nir_src *src, void *s)
75 {
76    lower_state *state = s;
77 
78    nir_instr *parent = src->ssa->parent_instr;
79 
80    /* No need to visit instructions we've already visited.. this also
81     * avoids infinite recursion when phi's are involved:
82     */
83    if (parent->pass_flags)
84       return false;
85 
86    nir_foreach_src(parent, lower_large_src, state);
87 
88    if (parent->type == nir_instr_type_alu) {
89       nir_alu_instr *alu = nir_instr_as_alu(parent);
90       if (alu->op == nir_op_amul) {
91          alu->op = nir_op_imul;
92          state->progress = true;
93       }
94    }
95 
96    parent->pass_flags = 1;
97 
98    return true;
99 }
100 
101 static bool
large_ubo(lower_state * state,nir_src src)102 large_ubo(lower_state *state, nir_src src)
103 {
104    if (!nir_src_is_const(src))
105       return state->has_large_ubo;
106    unsigned idx = nir_src_as_uint(src);
107    assert(idx < state->shader->info.num_ubos);
108    return state->large_ubos[idx];
109 }
110 
111 static bool
large_ssbo(lower_state * state,nir_src src)112 large_ssbo(lower_state *state, nir_src src)
113 {
114    if (!nir_src_is_const(src))
115       return state->has_large_ssbo;
116    unsigned idx = nir_src_as_uint(src);
117    assert(idx < state->shader->info.num_ssbos);
118    return state->large_ssbos[idx];
119 }
120 
121 static void
lower_intrinsic(lower_state * state,nir_intrinsic_instr * intr)122 lower_intrinsic(lower_state *state, nir_intrinsic_instr *intr)
123 {
124    switch (intr->intrinsic) {
125    case nir_intrinsic_load_ubo:
126       // # src[] = { buffer_index, offset }.
127       if (large_ubo(state, intr->src[0]))
128          lower_large_src(&intr->src[1], state);
129       return;
130 
131    case nir_intrinsic_load_ssbo:
132       // # src[] = { buffer_index, offset }.
133       if (large_ssbo(state, intr->src[0]))
134          lower_large_src(&intr->src[1], state);
135       return;
136 
137    case nir_intrinsic_store_ssbo:
138       // # src[] = { value, block_index, offset }
139       if (large_ssbo(state, intr->src[1]))
140          lower_large_src(&intr->src[2], state);
141       return;
142 
143    case nir_intrinsic_ssbo_atomic:
144    case nir_intrinsic_ssbo_atomic_swap:
145       /* 0: SSBO index
146        * 1: offset
147        */
148       if (large_ssbo(state, intr->src[0]))
149          lower_large_src(&intr->src[1], state);
150       return;
151 
152    case nir_intrinsic_global_atomic:
153    case nir_intrinsic_global_atomic_swap:
154    case nir_intrinsic_load_global_constant:
155    case nir_intrinsic_load_global:
156       /* just assume we that 24b is not sufficient: */
157       lower_large_src(&intr->src[0], state);
158       return;
159 
160    case nir_intrinsic_store_global:
161       /* just assume we that 24b is not sufficient: */
162       lower_large_src(&intr->src[1], state);
163       return;
164 
165    /* These should all be small enough to unconditionally use imul24: */
166    case nir_intrinsic_shared_atomic:
167    case nir_intrinsic_shared_atomic_swap:
168    case nir_intrinsic_load_uniform:
169    case nir_intrinsic_load_input:
170    case nir_intrinsic_load_output:
171    case nir_intrinsic_store_output:
172    default:
173       return;
174    }
175 }
176 
177 static void
lower_instr(lower_state * state,nir_instr * instr)178 lower_instr(lower_state *state, nir_instr *instr)
179 {
180    if (instr->type == nir_instr_type_intrinsic) {
181       lower_intrinsic(state, nir_instr_as_intrinsic(instr));
182    }
183 }
184 
185 static bool
is_large(lower_state * state,nir_variable * var)186 is_large(lower_state *state, nir_variable *var)
187 {
188    const struct glsl_type *type = glsl_without_array(var->type);
189    unsigned size = state->type_size(type, false);
190 
191    /* if size is not known (ie. VLA) then assume the worst: */
192    if (!size)
193       return true;
194 
195    return size >= (1 << 23);
196 }
197 
198 bool
nir_lower_amul(nir_shader * shader,int (* type_size)(const struct glsl_type *,bool))199 nir_lower_amul(nir_shader *shader,
200                int (*type_size)(const struct glsl_type *, bool))
201 {
202    assert(shader->options->has_imul24);
203    assert(type_size);
204 
205    NIR_VLA_FILL(bool, large_ubos, shader->info.num_ubos, 0);
206    NIR_VLA_FILL(bool, large_ssbos, shader->info.num_ssbos, 0);
207 
208    lower_state state = {
209       .shader = shader,
210       .type_size = type_size,
211       .large_ubos = large_ubos,
212       .large_ssbos = large_ssbos,
213    };
214 
215    /* Figure out which UBOs or SSBOs are large enough to be
216     * disqualified from imul24:
217     */
218    nir_foreach_variable_in_shader(var, shader) {
219       if (var->data.mode == nir_var_mem_ubo) {
220          if (is_large(&state, var)) {
221             state.has_large_ubo = true;
222             unsigned size = MAX2(1, glsl_array_size(var->type));
223             for (unsigned i = 0; i < size; i++)
224                state.large_ubos[var->data.binding + i] = true;
225          }
226       } else if (var->data.mode == nir_var_mem_ssbo) {
227          if (is_large(&state, var)) {
228             state.has_large_ssbo = true;
229             unsigned size = MAX2(1, glsl_array_size(var->type));
230             for (unsigned i = 0; i < size; i++)
231                state.large_ssbos[var->data.binding + i] = true;
232          }
233       }
234    }
235 
236    nir_shader_clear_pass_flags(shader);
237 
238    nir_foreach_function_impl(impl, shader) {
239       nir_foreach_block(block, impl) {
240          nir_foreach_instr(instr, block) {
241             lower_instr(&state, instr);
242          }
243       }
244    }
245 
246    /* At this point, all 'amul's used in calculating an offset into
247     * a large variable have been replaced with 'imul'.  So remaining
248     * 'amul's can be replaced with 'imul24':
249     *
250     * Note the exception for 64b (such as load/store_global where
251     * address size is 64b) as imul24 cannot have 64b bitsize
252     */
253    nir_foreach_function_impl(impl, shader) {
254       nir_foreach_block(block, impl) {
255          nir_foreach_instr(instr, block) {
256             if (instr->type != nir_instr_type_alu)
257                continue;
258 
259             nir_alu_instr *alu = nir_instr_as_alu(instr);
260             if (alu->op != nir_op_amul)
261                continue;
262 
263             if (alu->def.bit_size <= 32)
264                alu->op = nir_op_imul24;
265             else
266                alu->op = nir_op_imul;
267 
268             state.progress |= true;
269          }
270       }
271 
272       nir_metadata_preserve(impl, nir_metadata_control_flow);
273    }
274 
275    return state.progress;
276 }
277