xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/lima/ir/lima_nir_split_loads.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright (c) 2019 Connor Abbott <[email protected]>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sub license,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the
12  * next paragraph) shall be included in all copies or substantial portions
13  * of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  *
23  */
24 
25 #include "nir.h"
26 #include "nir_builder.h"
27 #include "lima_ir.h"
28 
29 /* This pass clones certain input intrinsics, creating a copy for each user.
30  * Inputs are relatively cheap, since in both PP and GP one input can be
31  * loaded "for free" in each instruction bundle. In GP especially, if there is
32  * a load instruction with multiple uses in different basic blocks, we need to
33  * split it in NIR so that we don't generate a register write and reads for
34  * it, which is almost certainly more expensive than splitting. Hence this
35  * pass is more aggressive than nir_opt_move, which just moves the intrinsic
36  * down but won't split it.
37  */
38 
39 static nir_def *
clone_intrinsic(nir_builder * b,nir_intrinsic_instr * intrin)40 clone_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin)
41 {
42    nir_intrinsic_instr *new_intrin =
43       nir_instr_as_intrinsic(nir_instr_clone(b->shader, &intrin->instr));
44 
45    nir_builder_instr_insert(b, &new_intrin->instr);
46 
47    return &new_intrin->def;
48 }
49 
50 static bool
replace_intrinsic(nir_builder * b,nir_intrinsic_instr * intrin)51 replace_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin)
52 {
53    if (intrin->intrinsic != nir_intrinsic_load_input &&
54        intrin->intrinsic != nir_intrinsic_load_uniform)
55       return false;
56 
57    if (intrin->src[0].ssa->parent_instr->type == nir_instr_type_load_const)
58       return false;
59 
60    struct hash_table *visited_instrs = _mesa_pointer_hash_table_create(NULL);
61 
62    nir_foreach_use_safe(src, &intrin->def) {
63       struct hash_entry *entry =
64          _mesa_hash_table_search(visited_instrs, nir_src_parent_instr(src));
65       if (entry && (nir_src_parent_instr(src)->type != nir_instr_type_phi)) {
66          nir_def *def = entry->data;
67          nir_src_rewrite(src, def);
68          continue;
69       }
70       b->cursor = nir_before_src(src);
71       nir_def *new = clone_intrinsic(b, intrin);
72       nir_src_rewrite(src, new);
73       _mesa_hash_table_insert(visited_instrs, nir_src_parent_instr(src), new);
74    }
75    nir_foreach_if_use_safe(src, &intrin->def) {
76       b->cursor = nir_before_src(src);
77       nir_src_rewrite(&nir_src_parent_if(src)->condition, clone_intrinsic(b, intrin));
78    }
79 
80    nir_instr_remove(&intrin->instr);
81    _mesa_hash_table_destroy(visited_instrs, NULL);
82    return true;
83 }
84 
85 static void
replace_load_const(nir_builder * b,nir_load_const_instr * load_const)86 replace_load_const(nir_builder *b, nir_load_const_instr *load_const)
87 {
88    struct hash_table *visited_instrs = _mesa_pointer_hash_table_create(NULL);
89 
90    nir_foreach_use_safe(src, &load_const->def) {
91       struct hash_entry *entry =
92          _mesa_hash_table_search(visited_instrs, nir_src_parent_instr(src));
93       if (entry && (nir_src_parent_instr(src)->type != nir_instr_type_phi)) {
94          nir_def *def = entry->data;
95          nir_src_rewrite(src, def);
96          continue;
97       }
98       b->cursor = nir_before_src(src);
99       nir_def *new = nir_build_imm(b, load_const->def.num_components,
100                                        load_const->def.bit_size,
101                                        load_const->value);
102       nir_src_rewrite(src, new);
103       _mesa_hash_table_insert(visited_instrs, nir_src_parent_instr(src), new);
104    }
105 
106    nir_instr_remove(&load_const->instr);
107    _mesa_hash_table_destroy(visited_instrs, NULL);
108 }
109 
110 bool
lima_nir_split_loads(nir_shader * shader)111 lima_nir_split_loads(nir_shader *shader)
112 {
113    bool progress = false;
114 
115    nir_foreach_function_impl(impl, shader) {
116       nir_builder b = nir_builder_create(impl);
117 
118       nir_foreach_block_reverse(block, impl) {
119          nir_foreach_instr_reverse_safe(instr, block) {
120             if (instr->type == nir_instr_type_load_const) {
121                replace_load_const(&b, nir_instr_as_load_const(instr));
122                progress = true;
123             } else if (instr->type == nir_instr_type_intrinsic) {
124                progress |= replace_intrinsic(&b, nir_instr_as_intrinsic(instr));
125             }
126          }
127       }
128    }
129 
130    return progress;
131 }
132 
133