xref: /aosp_15_r20/external/mesa3d/src/compiler/nir/nir_opt_combine_stores.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2019 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "nir.h"
25 #include "nir_builder.h"
26 #include "nir_deref.h"
27 
28 #include "util/bitscan.h"
29 #include "util/list.h"
30 #include "util/u_math.h"
31 
32 /* Combine stores of vectors to the same deref into a single store.
33  *
34  * This per-block pass keeps track of stores of vectors to the same
35  * destination and combines them into the last store of the sequence.  Dead
36  * stores (or parts of the store) found during the process are removed.
37  *
38  * A pending combination becomes an actual combination in various situations:
39  * at the end of the block, when another instruction uses the memory or due to
40  * barriers.
41  *
42  * Besides vectors, the pass also look at array derefs of vectors.  For direct
43  * array derefs, it works like a write mask access to the given component.
44  * For indirect access there's no way to know before hand what component it
45  * will overlap with, so the combination is finished -- the indirect remains
46  * unmodified.
47  */
48 
49 /* Keep track of a group of stores that can be combined.  All stores share the
50  * same destination.
51  */
52 struct combined_store {
53    struct list_head link;
54 
55    nir_component_mask_t write_mask;
56    nir_deref_instr *dst;
57 
58    /* Latest store added.  It is reused when combining. */
59    nir_intrinsic_instr *latest;
60 
61    /* Original store for each component.  The number of times a store appear
62     * in this array is kept in the store's pass_flags.
63     */
64    nir_intrinsic_instr *stores[NIR_MAX_VEC_COMPONENTS];
65 };
66 
67 struct combine_stores_state {
68    nir_variable_mode modes;
69 
70    /* Pending store combinations. */
71    struct list_head pending;
72 
73    /* Per function impl state. */
74    nir_builder b;
75    bool progress;
76 
77    /* Allocator and freelist to reuse structs between functions. */
78    linear_ctx *lin_ctx;
79    struct list_head freelist;
80 };
81 
82 static struct combined_store *
alloc_combined_store(struct combine_stores_state * state)83 alloc_combined_store(struct combine_stores_state *state)
84 {
85    struct combined_store *result;
86    if (list_is_empty(&state->freelist)) {
87       result = linear_zalloc_child(state->lin_ctx, sizeof(*result));
88    } else {
89       result = list_first_entry(&state->freelist,
90                                 struct combined_store,
91                                 link);
92       list_del(&result->link);
93       memset(result, 0, sizeof(*result));
94    }
95    return result;
96 }
97 
98 static void
free_combined_store(struct combine_stores_state * state,struct combined_store * combo)99 free_combined_store(struct combine_stores_state *state,
100                     struct combined_store *combo)
101 {
102    list_del(&combo->link);
103    combo->write_mask = 0;
104    list_add(&combo->link, &state->freelist);
105 }
106 
107 static void
combine_stores(struct combine_stores_state * state,struct combined_store * combo)108 combine_stores(struct combine_stores_state *state,
109                struct combined_store *combo)
110 {
111    assert(combo->latest);
112    assert(combo->latest->intrinsic == nir_intrinsic_store_deref);
113 
114    /* If the combined writemask is the same as the latest store, we know there
115     * is only one store in the combination, so nothing to combine.
116     */
117    if ((combo->write_mask & nir_intrinsic_write_mask(combo->latest)) ==
118        combo->write_mask)
119       return;
120 
121    state->b.cursor = nir_before_instr(&combo->latest->instr);
122 
123    /* Build a new vec, to be used as source for the combined store.  As it
124     * gets build, remove previous stores that are not needed anymore.
125     */
126    nir_scalar comps[NIR_MAX_VEC_COMPONENTS] = { 0 };
127    unsigned num_components = glsl_get_vector_elements(combo->dst->type);
128    unsigned bit_size = combo->latest->src[1].ssa->bit_size;
129    for (unsigned i = 0; i < num_components; i++) {
130       nir_intrinsic_instr *store = combo->stores[i];
131       if (combo->write_mask & (1 << i)) {
132          assert(store);
133 
134          /* If store->num_components == 1 then we are in the deref-of-vec case
135           * and store->src[1] is a scalar.  Otherwise, we're a regular vector
136           * load and we have to pick off a component.
137           */
138          comps[i] = nir_get_scalar(store->src[1].ssa, store->num_components == 1 ? 0 : i);
139 
140          assert(store->instr.pass_flags > 0);
141          if (--store->instr.pass_flags == 0 && store != combo->latest)
142             nir_instr_remove(&store->instr);
143       } else {
144          comps[i] = nir_get_scalar(nir_undef(&state->b, 1, bit_size), 0);
145       }
146    }
147    assert(combo->latest->instr.pass_flags == 0);
148    nir_def *vec = nir_vec_scalars(&state->b, comps, num_components);
149 
150    /* Fix the latest store with the combined information. */
151    nir_intrinsic_instr *store = combo->latest;
152 
153    /* In this case, our store is as an array deref of a vector so we need to
154     * rewrite it to use a deref to the whole vector.
155     */
156    if (store->num_components == 1) {
157       store->num_components = num_components;
158       nir_src_rewrite(&store->src[0], &combo->dst->def);
159    }
160 
161    assert(store->num_components == num_components);
162    nir_intrinsic_set_write_mask(store, combo->write_mask);
163    nir_src_rewrite(&store->src[1], vec);
164    state->progress = true;
165 }
166 
167 static void
combine_stores_with_deref(struct combine_stores_state * state,nir_deref_instr * deref)168 combine_stores_with_deref(struct combine_stores_state *state,
169                           nir_deref_instr *deref)
170 {
171    if (!nir_deref_mode_may_be(deref, state->modes))
172       return;
173 
174    list_for_each_entry_safe(struct combined_store, combo, &state->pending, link) {
175       if (nir_compare_derefs(combo->dst, deref) & nir_derefs_may_alias_bit) {
176          combine_stores(state, combo);
177          free_combined_store(state, combo);
178       }
179    }
180 }
181 
182 static void
combine_stores_with_modes(struct combine_stores_state * state,nir_variable_mode modes)183 combine_stores_with_modes(struct combine_stores_state *state,
184                           nir_variable_mode modes)
185 {
186    if ((state->modes & modes) == 0)
187       return;
188 
189    list_for_each_entry_safe(struct combined_store, combo, &state->pending, link) {
190       if (nir_deref_mode_may_be(combo->dst, modes)) {
191          combine_stores(state, combo);
192          free_combined_store(state, combo);
193       }
194    }
195 }
196 
197 static struct combined_store *
find_matching_combined_store(struct combine_stores_state * state,nir_deref_instr * deref)198 find_matching_combined_store(struct combine_stores_state *state,
199                              nir_deref_instr *deref)
200 {
201    list_for_each_entry(struct combined_store, combo, &state->pending, link) {
202       if (nir_compare_derefs(combo->dst, deref) & nir_derefs_equal_bit)
203          return combo;
204    }
205    return NULL;
206 }
207 
208 static void
update_combined_store(struct combine_stores_state * state,nir_intrinsic_instr * intrin)209 update_combined_store(struct combine_stores_state *state,
210                       nir_intrinsic_instr *intrin)
211 {
212    nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]);
213    if (!nir_deref_mode_may_be(dst, state->modes))
214       return;
215 
216    unsigned vec_mask;
217    nir_deref_instr *vec_dst;
218 
219    if (glsl_type_is_vector(dst->type)) {
220       vec_mask = nir_intrinsic_write_mask(intrin);
221       vec_dst = dst;
222    } else {
223       /* Besides vectors, only direct array derefs of vectors are handled. */
224       if (dst->deref_type != nir_deref_type_array ||
225           !nir_src_is_const(dst->arr.index) ||
226           !glsl_type_is_vector(nir_deref_instr_parent(dst)->type)) {
227          combine_stores_with_deref(state, dst);
228          return;
229       }
230 
231       uint64_t index = nir_src_as_uint(dst->arr.index);
232       vec_dst = nir_deref_instr_parent(dst);
233 
234       if (index >= glsl_get_vector_elements(vec_dst->type)) {
235          /* Storing to an invalid index is a no-op. */
236          nir_instr_remove(&intrin->instr);
237          state->progress = true;
238          return;
239       }
240 
241       vec_mask = 1 << index;
242    }
243 
244    struct combined_store *combo = find_matching_combined_store(state, vec_dst);
245    if (!combo) {
246       combo = alloc_combined_store(state);
247       combo->dst = vec_dst;
248       list_add(&combo->link, &state->pending);
249    }
250 
251    /* Use pass_flags to reference count the store based on how many
252     * components are still used by the combination.
253     */
254    intrin->instr.pass_flags = util_bitcount(vec_mask);
255    combo->latest = intrin;
256 
257    /* Update the combined_store, clearing up older overlapping references. */
258    combo->write_mask |= vec_mask;
259    while (vec_mask) {
260       unsigned i = u_bit_scan(&vec_mask);
261       nir_intrinsic_instr *prev_store = combo->stores[i];
262 
263       if (prev_store) {
264          if (--prev_store->instr.pass_flags == 0) {
265             nir_instr_remove(&prev_store->instr);
266          } else {
267             assert(glsl_type_is_vector(
268                nir_src_as_deref(prev_store->src[0])->type));
269             nir_component_mask_t prev_mask = nir_intrinsic_write_mask(prev_store);
270             nir_intrinsic_set_write_mask(prev_store, prev_mask & ~(1 << i));
271          }
272          state->progress = true;
273       }
274       combo->stores[i] = combo->latest;
275    }
276 }
277 
278 static void
combine_stores_block(struct combine_stores_state * state,nir_block * block)279 combine_stores_block(struct combine_stores_state *state, nir_block *block)
280 {
281    nir_foreach_instr_safe(instr, block) {
282       if (instr->type == nir_instr_type_call) {
283          combine_stores_with_modes(state, nir_var_shader_out |
284                                              nir_var_shader_temp |
285                                              nir_var_function_temp |
286                                              nir_var_mem_ssbo |
287                                              nir_var_mem_shared |
288                                              nir_var_mem_global);
289          continue;
290       }
291 
292       if (instr->type != nir_instr_type_intrinsic)
293          continue;
294 
295       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
296       switch (intrin->intrinsic) {
297       case nir_intrinsic_store_deref:
298          if (nir_intrinsic_access(intrin) & ACCESS_VOLATILE) {
299             nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]);
300             /* When we see a volatile store, we go ahead and combine all
301              * previous non-volatile stores which touch that address and
302              * specifically don't add the volatile store to the list.  This
303              * way we guarantee that the volatile store isn't combined with
304              * anything and no non-volatile stores are combined across a
305              * volatile store.
306              */
307             combine_stores_with_deref(state, dst);
308          } else {
309             update_combined_store(state, intrin);
310          }
311          break;
312 
313       case nir_intrinsic_barrier:
314          if (nir_intrinsic_memory_semantics(intrin) & NIR_MEMORY_RELEASE) {
315             combine_stores_with_modes(state,
316                                       nir_intrinsic_memory_modes(intrin));
317          }
318          break;
319 
320       case nir_intrinsic_emit_vertex:
321       case nir_intrinsic_emit_vertex_with_counter:
322          combine_stores_with_modes(state, nir_var_shader_out);
323          break;
324 
325       case nir_intrinsic_report_ray_intersection:
326          combine_stores_with_modes(state, nir_var_mem_ssbo |
327                                              nir_var_mem_global |
328                                              nir_var_shader_call_data |
329                                              nir_var_ray_hit_attrib);
330          break;
331 
332       case nir_intrinsic_ignore_ray_intersection:
333       case nir_intrinsic_terminate_ray:
334          combine_stores_with_modes(state, nir_var_mem_ssbo |
335                                              nir_var_mem_global |
336                                              nir_var_shader_call_data);
337          break;
338 
339       case nir_intrinsic_load_deref: {
340          nir_deref_instr *src = nir_src_as_deref(intrin->src[0]);
341          combine_stores_with_deref(state, src);
342          break;
343       }
344 
345       case nir_intrinsic_load_deref_block_intel:
346       case nir_intrinsic_store_deref_block_intel: {
347          /* Combine all the stores that may alias with the whole variable (or
348           * cast).
349           */
350          nir_deref_instr *operand = nir_src_as_deref(intrin->src[0]);
351          while (nir_deref_instr_parent(operand))
352             operand = nir_deref_instr_parent(operand);
353          assert(operand->deref_type == nir_deref_type_var ||
354                 operand->deref_type == nir_deref_type_cast);
355 
356          combine_stores_with_deref(state, operand);
357          break;
358       }
359 
360       case nir_intrinsic_copy_deref:
361       case nir_intrinsic_memcpy_deref: {
362          nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]);
363          nir_deref_instr *src = nir_src_as_deref(intrin->src[1]);
364          combine_stores_with_deref(state, dst);
365          combine_stores_with_deref(state, src);
366          break;
367       }
368 
369       case nir_intrinsic_trace_ray:
370       case nir_intrinsic_execute_callable:
371       case nir_intrinsic_rt_trace_ray:
372       case nir_intrinsic_rt_execute_callable: {
373          nir_deref_instr *payload =
374             nir_src_as_deref(*nir_get_shader_call_payload_src(intrin));
375          combine_stores_with_deref(state, payload);
376          break;
377       }
378 
379       case nir_intrinsic_deref_atomic:
380       case nir_intrinsic_deref_atomic_swap: {
381          nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]);
382          combine_stores_with_deref(state, dst);
383          break;
384       }
385 
386       default:
387          break;
388       }
389    }
390 
391    /* At the end of the block, try all the remaining combinations. */
392    combine_stores_with_modes(state, state->modes);
393 }
394 
395 static bool
combine_stores_impl(struct combine_stores_state * state,nir_function_impl * impl)396 combine_stores_impl(struct combine_stores_state *state, nir_function_impl *impl)
397 {
398    state->progress = false;
399    state->b = nir_builder_create(impl);
400 
401    nir_foreach_block(block, impl)
402       combine_stores_block(state, block);
403 
404    if (state->progress) {
405       nir_metadata_preserve(impl, nir_metadata_control_flow);
406    } else {
407       nir_metadata_preserve(impl, nir_metadata_all);
408    }
409 
410    return state->progress;
411 }
412 
413 bool
nir_opt_combine_stores(nir_shader * shader,nir_variable_mode modes)414 nir_opt_combine_stores(nir_shader *shader, nir_variable_mode modes)
415 {
416    void *mem_ctx = ralloc_context(NULL);
417    struct combine_stores_state state = {
418       .modes = modes,
419       .lin_ctx = linear_context(mem_ctx),
420    };
421 
422    list_inithead(&state.pending);
423    list_inithead(&state.freelist);
424 
425    bool progress = false;
426 
427    nir_foreach_function_impl(impl, shader) {
428       progress |= combine_stores_impl(&state, impl);
429    }
430 
431    ralloc_free(mem_ctx);
432    return progress;
433 }
434