xref: /aosp_15_r20/external/mesa3d/src/intel/compiler/intel_nir_lower_non_uniform_resource_intel.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2023 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "compiler/nir/nir_builder.h"
25 #include "util/u_dynarray.h"
26 
27 #include "intel_nir.h"
28 
29 static bool
nir_instr_is_resource_intel(nir_instr * instr)30 nir_instr_is_resource_intel(nir_instr *instr)
31 {
32    return instr->type == nir_instr_type_intrinsic &&
33       nir_instr_as_intrinsic(instr)->intrinsic == nir_intrinsic_resource_intel;
34 }
35 
36 static bool
add_src_instr(nir_src * src,void * state)37 add_src_instr(nir_src *src, void *state)
38 {
39    struct util_dynarray *inst_array = state;
40    util_dynarray_foreach(inst_array, nir_instr *, instr_ptr) {
41       if (*instr_ptr == src->ssa->parent_instr)
42          return true;
43    }
44 
45    util_dynarray_append(inst_array, nir_instr *, src->ssa->parent_instr);
46 
47    return true;
48 }
49 
50 static nir_intrinsic_instr *
find_resource_intel(struct util_dynarray * inst_array,nir_def * def)51 find_resource_intel(struct util_dynarray *inst_array,
52                     nir_def *def)
53 {
54    /* If resouce_intel is already directly in front of the instruction, there
55     * is nothing to do.
56     */
57    if (nir_instr_is_resource_intel(def->parent_instr))
58       return NULL;
59 
60    util_dynarray_append(inst_array, nir_instr *, def->parent_instr);
61 
62    unsigned idx = 0, scan_index = 0;
63    while (idx < util_dynarray_num_elements(inst_array, nir_instr *)) {
64       nir_instr *instr = *util_dynarray_element(inst_array, nir_instr *, idx++);
65 
66       for (; scan_index < util_dynarray_num_elements(inst_array, nir_instr *); scan_index++) {
67          nir_instr *scan_instr = *util_dynarray_element(inst_array, nir_instr *, scan_index);
68          if (nir_instr_is_resource_intel(scan_instr))
69             return nir_instr_as_intrinsic(scan_instr);
70       }
71 
72       nir_foreach_src(instr, add_src_instr, inst_array);
73    }
74 
75    return NULL;
76 }
77 
78 static bool
intel_nir_lower_non_uniform_intrinsic(nir_builder * b,nir_intrinsic_instr * intrin,struct util_dynarray * inst_array)79 intel_nir_lower_non_uniform_intrinsic(nir_builder *b,
80                                       nir_intrinsic_instr *intrin,
81                                       struct util_dynarray *inst_array)
82 {
83    unsigned source;
84    switch (intrin->intrinsic) {
85    case nir_intrinsic_load_ubo:
86    case nir_intrinsic_load_ssbo:
87    case nir_intrinsic_get_ssbo_size:
88    case nir_intrinsic_ssbo_atomic:
89    case nir_intrinsic_ssbo_atomic_swap:
90    case nir_intrinsic_load_ssbo_block_intel:
91    case nir_intrinsic_store_ssbo_block_intel:
92    case nir_intrinsic_load_ubo_uniform_block_intel:
93    case nir_intrinsic_load_ssbo_uniform_block_intel:
94    case nir_intrinsic_image_load_raw_intel:
95    case nir_intrinsic_image_store_raw_intel:
96    case nir_intrinsic_image_load:
97    case nir_intrinsic_image_store:
98    case nir_intrinsic_image_atomic:
99    case nir_intrinsic_image_atomic_swap:
100    case nir_intrinsic_bindless_image_load:
101    case nir_intrinsic_bindless_image_store:
102    case nir_intrinsic_bindless_image_atomic:
103    case nir_intrinsic_bindless_image_atomic_swap:
104    case nir_intrinsic_image_size:
105    case nir_intrinsic_bindless_image_size:
106       source = 0;
107       break;
108 
109    case nir_intrinsic_store_ssbo:
110       source = 1;
111       break;
112 
113    default:
114       return false;
115    }
116 
117    b->cursor = nir_before_instr(&intrin->instr);
118 
119    util_dynarray_clear(inst_array);
120 
121    nir_intrinsic_instr *old_resource_intel =
122       find_resource_intel(inst_array, intrin->src[source].ssa);
123    if (old_resource_intel == NULL)
124       return false;
125 
126    nir_instr *new_instr =
127       nir_instr_clone(b->shader, &old_resource_intel->instr);
128 
129    nir_instr_insert(b->cursor, new_instr);
130 
131    nir_intrinsic_instr *new_resource_intel =
132       nir_instr_as_intrinsic(new_instr);
133 
134    nir_src_rewrite(&new_resource_intel->src[1], intrin->src[source].ssa);
135    nir_src_rewrite(&intrin->src[source], &new_resource_intel->def);
136 
137    return true;
138 }
139 
140 static bool
intel_nir_lower_non_uniform_tex(nir_builder * b,nir_tex_instr * tex,struct util_dynarray * inst_array)141 intel_nir_lower_non_uniform_tex(nir_builder *b,
142                                 nir_tex_instr *tex,
143                                 struct util_dynarray *inst_array)
144 {
145    b->cursor = nir_before_instr(&tex->instr);
146 
147    bool progress = false;
148    for (unsigned s = 0; s < tex->num_srcs; s++) {
149       if (tex->src[s].src_type != nir_tex_src_texture_handle &&
150           tex->src[s].src_type != nir_tex_src_sampler_handle)
151          continue;
152 
153       util_dynarray_clear(inst_array);
154 
155       nir_intrinsic_instr *old_resource_intel =
156          find_resource_intel(inst_array, tex->src[s].src.ssa);
157       if (old_resource_intel == NULL)
158          continue;
159 
160       nir_instr *new_instr =
161          nir_instr_clone(b->shader, &old_resource_intel->instr);
162 
163       nir_instr_insert(b->cursor, new_instr);
164 
165       nir_intrinsic_instr *new_resource_intel =
166          nir_instr_as_intrinsic(new_instr);
167 
168       nir_src_rewrite(&new_resource_intel->src[1], tex->src[s].src.ssa);
169       nir_src_rewrite(&tex->src[s].src, &new_resource_intel->def);
170 
171       progress = true;
172    }
173 
174    return progress;
175 }
176 
177 static bool
intel_nir_lower_non_uniform_instr(nir_builder * b,nir_instr * instr,void * cb_data)178 intel_nir_lower_non_uniform_instr(nir_builder *b,
179                                   nir_instr *instr,
180                                   void *cb_data)
181 {
182    struct util_dynarray *inst_array = cb_data;
183 
184    switch (instr->type) {
185    case nir_instr_type_intrinsic:
186       return intel_nir_lower_non_uniform_intrinsic(b,
187                                                    nir_instr_as_intrinsic(instr),
188                                                    inst_array);
189 
190    case nir_instr_type_tex:
191       return intel_nir_lower_non_uniform_tex(b,
192                                              nir_instr_as_tex(instr),
193                                              inst_array);
194 
195    default:
196       return false;
197    }
198 }
199 
200 /** This pass rematerializes resource_intel intrinsics closer to their use.
201  *
202  * For example will turn this :
203  *    ssa_1 = iadd ...
204  *    ssa_2 = resource_intel ..., ssa_1, ...
205  *    ssa_3 = read_first_invocation ssa_2
206  *    ssa_4 = load_ssbo ssa_3, ...
207  *
208  * into this :
209  *    ssa_1 = iadd ...
210  *    ssa_3 = read_first_invocation ssa_1
211  *    ssa_5 = resource_intel ..., ssa_3, ...
212  *    ssa_4 = load_ssbo ssa_5, ...
213  *
214  * The goal is to have the resource_intel immediately before its use so that
215  * the backend compiler can know how the load_ssbo should be compiled (binding
216  * table or bindless access, etc...).
217  */
218 bool
intel_nir_lower_non_uniform_resource_intel(nir_shader * shader)219 intel_nir_lower_non_uniform_resource_intel(nir_shader *shader)
220 {
221    void *mem_ctx = ralloc_context(NULL);
222 
223    struct util_dynarray inst_array;
224    util_dynarray_init(&inst_array, mem_ctx);
225 
226    bool ret = nir_shader_instructions_pass(shader,
227                                            intel_nir_lower_non_uniform_instr,
228                                            nir_metadata_control_flow,
229                                            &inst_array);
230 
231    ralloc_free(mem_ctx);
232 
233    return ret;
234 }
235 
236 static bool
skip_resource_intel_cleanup(nir_instr * instr)237 skip_resource_intel_cleanup(nir_instr *instr)
238 {
239    switch (instr->type) {
240    case nir_instr_type_tex:
241       return true;
242 
243    case nir_instr_type_intrinsic: {
244       nir_intrinsic_instr *intrin =
245          nir_instr_as_intrinsic(instr);
246       switch (intrin->intrinsic) {
247       case nir_intrinsic_load_ubo:
248       case nir_intrinsic_load_ssbo:
249       case nir_intrinsic_store_ssbo:
250       case nir_intrinsic_get_ssbo_size:
251       case nir_intrinsic_ssbo_atomic:
252       case nir_intrinsic_ssbo_atomic_swap:
253       case nir_intrinsic_load_ssbo_block_intel:
254       case nir_intrinsic_store_ssbo_block_intel:
255       case nir_intrinsic_load_ssbo_uniform_block_intel:
256       case nir_intrinsic_image_load_raw_intel:
257       case nir_intrinsic_image_store_raw_intel:
258       case nir_intrinsic_image_load:
259       case nir_intrinsic_image_store:
260       case nir_intrinsic_image_atomic:
261       case nir_intrinsic_image_atomic_swap:
262       case nir_intrinsic_bindless_image_load:
263       case nir_intrinsic_bindless_image_store:
264       case nir_intrinsic_bindless_image_atomic:
265       case nir_intrinsic_bindless_image_atomic_swap:
266       case nir_intrinsic_image_size:
267       case nir_intrinsic_bindless_image_size:
268          return true;
269 
270       default:
271          return false;
272       }
273    }
274 
275    default:
276       return false;
277    }
278 }
279 
280 static bool
intel_nir_cleanup_resource_intel_instr(nir_builder * b,nir_intrinsic_instr * intrin,void * cb_data)281 intel_nir_cleanup_resource_intel_instr(nir_builder *b,
282                                        nir_intrinsic_instr *intrin,
283                                        void *cb_data)
284 {
285    if (intrin->intrinsic != nir_intrinsic_resource_intel)
286       return false;
287 
288    bool progress = false;
289    nir_foreach_use_safe(src, &intrin->def) {
290       if (!nir_src_is_if(src) && skip_resource_intel_cleanup(nir_src_parent_instr(src)))
291          continue;
292 
293       progress = true;
294       nir_src_rewrite(src, intrin->src[1].ssa);
295    }
296 
297    return progress;
298 }
299 
300 /** This pass removes unnecessary resource_intel intrinsics
301  *
302  * This pass must not be run before intel_nir_lower_non_uniform_resource_intel.
303  */
304 bool
intel_nir_cleanup_resource_intel(nir_shader * shader)305 intel_nir_cleanup_resource_intel(nir_shader *shader)
306 {
307    void *mem_ctx = ralloc_context(NULL);
308 
309    bool ret = nir_shader_intrinsics_pass(shader,
310                                          intel_nir_cleanup_resource_intel_instr,
311                                          nir_metadata_control_flow,
312                                          NULL);
313 
314    ralloc_free(mem_ctx);
315 
316    return ret;
317 }
318