1 /*
2 * Copyright © 2019 Google, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include "nir.h"
25 #include "nir_vla.h"
26
27 /* Lowering for amul instructions, for drivers that support imul24.
28 * This pass will analyze indirect derefs, and convert corresponding
29 * amul instructions to either imul or imul24, depending on the
30 * required range.
31 *
32 * 1) Analyze the uniform variables and build a table of UBOs and SSBOs
33 * that are either too large, or might be too large (unknown size)
34 * for imul24
35 *
36 * 2) Loop thru looking at all the intrinsics, finding dereferences of
37 * large variables, and recursively replacing all amul instructions
38 * used with imul
39 *
40 * 3) Finally loop again thru all instructions replacing any remaining
41 * amul with imul24. At this point any remaining amul instructions
42 * are not involved in calculating an offset into a large variable,
43 * thanks to the 2nd step, so they can be safely replace with imul24.
44 *
45 * Using two passes over all the instructions lets us handle the case
46 * where, due to CSE, an amul is used to calculate an offset into both
47 * a large and small variable.
48 */
49
50 typedef struct {
51 nir_shader *shader;
52
53 int (*type_size)(const struct glsl_type *, bool);
54
55 /* Tables of UBOs and SSBOs mapping driver_location/base whether
56 * they are too large to use imul24:
57 */
58 bool *large_ubos;
59 bool *large_ssbos;
60
61 /* for cases that we cannot determine UBO/SSBO index, track if *any*
62 * UBO/SSBO is too large for imul24:
63 */
64 bool has_large_ubo;
65 bool has_large_ssbo;
66
67 unsigned max_slot;
68
69 bool progress;
70 } lower_state;
71
72 /* Lower 'amul's in offset src of large variables to 'imul': */
73 static bool
lower_large_src(nir_src * src,void * s)74 lower_large_src(nir_src *src, void *s)
75 {
76 lower_state *state = s;
77
78 nir_instr *parent = src->ssa->parent_instr;
79
80 /* No need to visit instructions we've already visited.. this also
81 * avoids infinite recursion when phi's are involved:
82 */
83 if (parent->pass_flags)
84 return false;
85
86 nir_foreach_src(parent, lower_large_src, state);
87
88 if (parent->type == nir_instr_type_alu) {
89 nir_alu_instr *alu = nir_instr_as_alu(parent);
90 if (alu->op == nir_op_amul) {
91 alu->op = nir_op_imul;
92 state->progress = true;
93 }
94 }
95
96 parent->pass_flags = 1;
97
98 return true;
99 }
100
101 static bool
large_ubo(lower_state * state,nir_src src)102 large_ubo(lower_state *state, nir_src src)
103 {
104 if (!nir_src_is_const(src))
105 return state->has_large_ubo;
106 unsigned idx = nir_src_as_uint(src);
107 assert(idx < state->shader->info.num_ubos);
108 return state->large_ubos[idx];
109 }
110
111 static bool
large_ssbo(lower_state * state,nir_src src)112 large_ssbo(lower_state *state, nir_src src)
113 {
114 if (!nir_src_is_const(src))
115 return state->has_large_ssbo;
116 unsigned idx = nir_src_as_uint(src);
117 assert(idx < state->shader->info.num_ssbos);
118 return state->large_ssbos[idx];
119 }
120
121 static void
lower_intrinsic(lower_state * state,nir_intrinsic_instr * intr)122 lower_intrinsic(lower_state *state, nir_intrinsic_instr *intr)
123 {
124 switch (intr->intrinsic) {
125 case nir_intrinsic_load_ubo:
126 // # src[] = { buffer_index, offset }.
127 if (large_ubo(state, intr->src[0]))
128 lower_large_src(&intr->src[1], state);
129 return;
130
131 case nir_intrinsic_load_ssbo:
132 // # src[] = { buffer_index, offset }.
133 if (large_ssbo(state, intr->src[0]))
134 lower_large_src(&intr->src[1], state);
135 return;
136
137 case nir_intrinsic_store_ssbo:
138 // # src[] = { value, block_index, offset }
139 if (large_ssbo(state, intr->src[1]))
140 lower_large_src(&intr->src[2], state);
141 return;
142
143 case nir_intrinsic_ssbo_atomic:
144 case nir_intrinsic_ssbo_atomic_swap:
145 /* 0: SSBO index
146 * 1: offset
147 */
148 if (large_ssbo(state, intr->src[0]))
149 lower_large_src(&intr->src[1], state);
150 return;
151
152 case nir_intrinsic_global_atomic:
153 case nir_intrinsic_global_atomic_swap:
154 case nir_intrinsic_load_global_constant:
155 case nir_intrinsic_load_global:
156 /* just assume we that 24b is not sufficient: */
157 lower_large_src(&intr->src[0], state);
158 return;
159
160 case nir_intrinsic_store_global:
161 /* just assume we that 24b is not sufficient: */
162 lower_large_src(&intr->src[1], state);
163 return;
164
165 /* These should all be small enough to unconditionally use imul24: */
166 case nir_intrinsic_shared_atomic:
167 case nir_intrinsic_shared_atomic_swap:
168 case nir_intrinsic_load_uniform:
169 case nir_intrinsic_load_input:
170 case nir_intrinsic_load_output:
171 case nir_intrinsic_store_output:
172 default:
173 return;
174 }
175 }
176
177 static void
lower_instr(lower_state * state,nir_instr * instr)178 lower_instr(lower_state *state, nir_instr *instr)
179 {
180 if (instr->type == nir_instr_type_intrinsic) {
181 lower_intrinsic(state, nir_instr_as_intrinsic(instr));
182 }
183 }
184
185 static bool
is_large(lower_state * state,nir_variable * var)186 is_large(lower_state *state, nir_variable *var)
187 {
188 const struct glsl_type *type = glsl_without_array(var->type);
189 unsigned size = state->type_size(type, false);
190
191 /* if size is not known (ie. VLA) then assume the worst: */
192 if (!size)
193 return true;
194
195 return size >= (1 << 23);
196 }
197
198 bool
nir_lower_amul(nir_shader * shader,int (* type_size)(const struct glsl_type *,bool))199 nir_lower_amul(nir_shader *shader,
200 int (*type_size)(const struct glsl_type *, bool))
201 {
202 assert(shader->options->has_imul24);
203 assert(type_size);
204
205 NIR_VLA_FILL(bool, large_ubos, shader->info.num_ubos, 0);
206 NIR_VLA_FILL(bool, large_ssbos, shader->info.num_ssbos, 0);
207
208 lower_state state = {
209 .shader = shader,
210 .type_size = type_size,
211 .large_ubos = large_ubos,
212 .large_ssbos = large_ssbos,
213 };
214
215 /* Figure out which UBOs or SSBOs are large enough to be
216 * disqualified from imul24:
217 */
218 nir_foreach_variable_in_shader(var, shader) {
219 if (var->data.mode == nir_var_mem_ubo) {
220 if (is_large(&state, var)) {
221 state.has_large_ubo = true;
222 unsigned size = MAX2(1, glsl_array_size(var->type));
223 for (unsigned i = 0; i < size; i++)
224 state.large_ubos[var->data.binding + i] = true;
225 }
226 } else if (var->data.mode == nir_var_mem_ssbo) {
227 if (is_large(&state, var)) {
228 state.has_large_ssbo = true;
229 unsigned size = MAX2(1, glsl_array_size(var->type));
230 for (unsigned i = 0; i < size; i++)
231 state.large_ssbos[var->data.binding + i] = true;
232 }
233 }
234 }
235
236 nir_shader_clear_pass_flags(shader);
237
238 nir_foreach_function_impl(impl, shader) {
239 nir_foreach_block(block, impl) {
240 nir_foreach_instr(instr, block) {
241 lower_instr(&state, instr);
242 }
243 }
244 }
245
246 /* At this point, all 'amul's used in calculating an offset into
247 * a large variable have been replaced with 'imul'. So remaining
248 * 'amul's can be replaced with 'imul24':
249 *
250 * Note the exception for 64b (such as load/store_global where
251 * address size is 64b) as imul24 cannot have 64b bitsize
252 */
253 nir_foreach_function_impl(impl, shader) {
254 nir_foreach_block(block, impl) {
255 nir_foreach_instr(instr, block) {
256 if (instr->type != nir_instr_type_alu)
257 continue;
258
259 nir_alu_instr *alu = nir_instr_as_alu(instr);
260 if (alu->op != nir_op_amul)
261 continue;
262
263 if (alu->def.bit_size <= 32)
264 alu->op = nir_op_imul24;
265 else
266 alu->op = nir_op_imul;
267
268 state.progress |= true;
269 }
270 }
271
272 nir_metadata_preserve(impl, nir_metadata_control_flow);
273 }
274
275 return state.progress;
276 }
277