xref: /aosp_15_r20/external/mesa3d/src/intel/compiler/brw_nir_opt_fsat.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2024 Intel Corporation
3  * SPDX-License-Identifier: MIT
4  */
5 
6 /**
7  * \file
8  * Move fsat instructions closer to the source when it is likely to be
9  * profitable.
10  *
11  * Intel GPUs have a saturate destination modifier, and
12  * brw_fs_opt_saturate_propagation tries to replace explicit saturate
13  * operations with this destination modifier. That pass is limited in several
14  * ways. If the source of the explicit saturate is in a different block or if
15  * the source of the explicit saturate is live after the explicit saturate,
16  * brw_fs_opt_saturate_propagation will be unable to make progress.
17  *
18  * This optimization exists to help brw_fs_opt_saturate_propagation make more
19  * progress. It tries to move NIR fsat instructions to the same block that
20  * contains the definition of its source. It does this only in cases where it
21  * will not create additional live values. It also attempts to do this only in
22  * cases where the explicit saturate will ultimiately be converted to a
23  * destination modifier.
24  *
25  * The optimization scans all instructions. For each fsat instruction found,
26  * the optimization operates 4 main steps:
27  *
28  * 1. Find the source of the fsat instruction. If the source is an ALU
29  *    instruction, add it a worklist called Sources. This occurs in the
30  *    function \c collect_reaching_defs.
31  *
32  * 2. Process the Sources worklist. Iterate the uses of each instruction on
33  *    the worklist. If a use is a fsat instruction or a phi node, add the
34  *    instruction to a set of instructions to be "fixed up" in step 3, below.
35  *    If a use is a phi node, add its uses to the worklist. If a use is
36  *    neither a fsat instruction nor a phi node, return failure. This
37  *    indicates that there is some path from one of the definitions to a use
38  *    that is not fsat. This occurs in the function \c verify_users.
39  *
40  * 3. For each instruction in the "fix up" set created in step 2 that is not
41  *    an fsat, insert a new fsat instruction immediately following it.
42  *    Replace all uses of instruction with the new fsat.
43  *
44  * 4. Convert the old fsat instruction to a simple move instruction. This can
45  *    be eliminated by other optimizations.
46  *
47  * If there are many fsat users of a particular instruction, the algorithm
48  * will only perform step 3 for the first encountered fsat. Each fsat
49  * encountered later will detect that it is an fsat of an fsat (with the
50  * latter in a different basic block). Step 4 ensures that each fsat
51  * encountered later will still be eliminated.
52  *
53  * \note This optimization could find even more fsat instructions to move by
54  * walking "up" the phi web in step 1. If the source of the fsat is a phi
55  * node, repeatedly iterate through the phi web to find all of the reaching
56  * definitions.
57  *
58  * This has already been implemented. Unfortunately, moving some fsat
59  * instructions in some large ray tracing shaders in fossil-db causes the
60  * scheduler and register allocator to make bad choices. This results in
61  * additional spills and fills.
62  */
63 #include "brw_nir.h"
64 #include "nir_worklist.h"
65 
66 static nir_instr_worklist *
nir_instr_worklist_create_or_clear(nir_instr_worklist * wl)67 nir_instr_worklist_create_or_clear(nir_instr_worklist * wl)
68 {
69    if (wl == NULL) {
70       return nir_instr_worklist_create();
71    } else {
72       /* Clear any old cruft in the worklist. */
73       nir_foreach_instr_in_worklist(_, wl)
74          ;
75 
76       return wl;
77    }
78 }
79 
80 static struct set *
_mesa_pointer_set_create_or_clear(void * mem_ctx,struct set * s,void (* delete_function)(struct set_entry * entry))81 _mesa_pointer_set_create_or_clear(void *mem_ctx, struct set *s,
82                                   void (*delete_function)(struct set_entry *entry))
83 {
84    if (s == NULL) {
85       return _mesa_pointer_set_create(mem_ctx);
86    } else {
87       _mesa_set_clear(s, delete_function);
88       return s;
89    }
90 }
91 
92 static void
collect_reaching_defs(nir_alu_instr * fsat,nir_instr_worklist * sources)93 collect_reaching_defs(nir_alu_instr *fsat, nir_instr_worklist *sources)
94 {
95    nir_def *def = fsat->src[0].src.ssa;
96 
97    /* If the source of the fsat is in the same block,
98     * brw_fs_opt_saturate_propagation will already have enough information to
99     * do its job. Adding another fsat will not help.
100     */
101    if (def->parent_instr->type == nir_instr_type_alu &&
102        def->parent_instr->block != fsat->instr.block) {
103       nir_instr_worklist_push_tail(sources, def->parent_instr);
104    }
105 }
106 
107 static bool
verify_users(nir_instr_worklist * sources,struct set * verified_phis,struct set * fixup)108 verify_users(nir_instr_worklist *sources, struct set *verified_phis,
109              struct set *fixup)
110 {
111    bool progress = false;
112 
113    /* For each source in the set, check that each possible user is an fsat. If
114     * the source itself is an fsat, the users don't matter.
115     */
116    nir_foreach_instr_in_worklist(src, sources) {
117       if (src->type == nir_instr_type_phi) {
118          /* The phi web graph may have cycles. Don't revisit phi nodes to
119           * prevent infinite loops.
120           */
121          if (_mesa_set_search(verified_phis, src) != NULL)
122             continue;
123       } else if (src->type == nir_instr_type_alu) {
124          /* If a reachable definition is already an fsat, there is no more
125           * work to be done for that instruction.
126           *
127           * FINISHME: This could be made slightly better. Range analysis could
128           * be used to determine that src is a number (not NaN) and that
129           * number is already [0, 1]. This would detect cases like 'b2f(a)' or
130           * 'bcsel(a, fsat(b), 0.0)'.
131           */
132          if (nir_instr_as_alu(src)->op == nir_op_fsat) {
133             progress = true;
134             continue;
135          }
136       }
137 
138       nir_def *src_def = nir_instr_def(src);
139 
140       /* It should not be possible for an instruction to get added to the
141        * worklist that does not have a def.
142        */
143       assert(src_def != NULL);
144 
145       if (nir_def_used_by_if(src_def))
146          return false;
147 
148       nir_foreach_use(use, src_def) {
149          nir_instr *user_instr = nir_src_parent_instr(use);
150 
151          if (user_instr->type == nir_instr_type_phi) {
152             nir_instr_worklist_push_tail(sources, user_instr);
153          } else if (user_instr->type != nir_instr_type_alu ||
154                     nir_instr_as_alu(user_instr)->op != nir_op_fsat) {
155             return false;
156          }
157       }
158 
159       if (src->type == nir_instr_type_phi) {
160          /* Now that the phi is verified, add it to the cache. */
161          _mesa_set_add(verified_phis, src);
162       } else {
163          /* Add this source to the set of instructions that need to be
164           * modified.
165           */
166          _mesa_set_search_or_add(fixup, src, NULL);
167          progress = true;
168       }
169    }
170 
171    return progress;
172 }
173 
174 static void
fixup_defs(struct set * fixup)175 fixup_defs(struct set *fixup)
176 {
177    /* For each instruction in the fixup set, add an fsat user of it, and
178     * replace all of its old uses with the new fsat.
179     */
180    set_foreach_remove(fixup, entry) {
181       nir_instr *src = (nir_instr *) entry->key;
182       nir_def *src_def = nir_instr_def(src);
183 
184       /* It should not be possible for an instruction to get added to the
185        * fixup set that does not have a def.
186        */
187       assert(src_def != NULL);
188 
189       nir_builder b = nir_builder_at(nir_after_instr(src));
190 
191       nir_def *new_fsat = nir_fsat(&b, src_def);
192 
193       nir_def_rewrite_uses_after(src_def, new_fsat, new_fsat->parent_instr);
194    }
195 }
196 
197 bool
brw_nir_opt_fsat(nir_shader * shader)198 brw_nir_opt_fsat(nir_shader *shader)
199 {
200    bool progress = false;
201    void *mem_ctx = ralloc_context(NULL);
202    nir_instr_worklist *sources = NULL;
203    struct set *fixup = NULL;
204    struct set *verified_phis = NULL;
205 
206    nir_foreach_function_impl(impl, shader) {
207       bool progress_impl = false;
208 
209       nir_foreach_block(block, impl) {
210          nir_foreach_instr(instr, block) {
211             if (instr->type != nir_instr_type_alu)
212                continue;
213 
214             nir_alu_instr *alu = nir_instr_as_alu(instr);
215             if (alu->op != nir_op_fsat)
216                continue;
217 
218             sources = nir_instr_worklist_create_or_clear(sources);
219             fixup = _mesa_pointer_set_create_or_clear(mem_ctx, fixup, NULL);
220 
221             collect_reaching_defs(alu, sources);
222 
223             /* verified_phis is a cache of phi nodes where all users of the
224              * phi node are (eventually) fsat. Once a phi node is verified, it
225              * will always be valid. It is not necessary to clear this set
226              * between passes.
227              */
228             if (verified_phis == NULL)
229                verified_phis = _mesa_pointer_set_create(mem_ctx);
230 
231             if (verify_users(sources, verified_phis, fixup)) {
232                fixup_defs(fixup);
233 
234                /* All defs that can reach the old fsat instruction must
235                 * already be saturated. For simplicity, convert the old fsat
236                 * to a simple move. Other optimization passes can eliminate
237                 * the move.
238                 */
239                alu->op = nir_op_mov;
240                progress_impl = true;
241             }
242          }
243       }
244 
245       if (progress_impl) {
246          nir_metadata_preserve(impl, nir_metadata_control_flow);
247          progress = true;
248       } else {
249          nir_metadata_preserve(impl, nir_metadata_all);
250       }
251    }
252 
253    if (sources != NULL)
254       nir_instr_worklist_destroy(sources);
255 
256    ralloc_free(mem_ctx);
257 
258    return progress;
259 }
260