xref: /aosp_15_r20/external/mesa3d/src/intel/compiler/brw_def_analysis.cpp (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2023 Intel Corporation
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include "brw_fs.h"
8 #include "brw_cfg.h"
9 #include "brw_ir_analysis.h"
10 
11 /**
12  * An opportunistic SSA-def analysis pass.
13  *
14  * VGRFs are considered defs (SSA values) when:
15  *
16  * 1. One instruction wholly defines the register (including all offsets)
17  * 2. The single defining write dominates all uses
18  * 3. All sources of the definition are also defs (for non-VGRF files)
19  *
20  * We don't consider non-VGRF sources to prevent an instruction from forming
21  * an SSA def.  The other files represent immediates, pushed uniforms, inputs
22  * to shaders, thread payload fields, and so on.  In theory, we could mutate
23  * FIXED_GRF register values, but we don't today, so it isn't an issue.
24  *
25  * Limitations:
26  * - We do not track uses, only definitions.
27  * - We do not handle flags, address registers, or accumulators yet.
28  *
29  * Usage:
30  *
31  *    const def_analysis &defs = s.def_analysis.require();
32  *    fs_inst *def = defs.get(inst->src[i]); // returns NULL if non-SSA
33  *    bblock_t *block = defs.get_block(inst->src[i]); // block containing def
34  *
35  * Def analysis requires the dominator tree, but not liveness information.
36  */
37 
38 using namespace brw;
39 
40 static fs_inst *const UNSEEN = (fs_inst *) (uintptr_t) 1;
41 
42 void
mark_invalid(int nr)43 def_analysis::mark_invalid(int nr)
44 {
45    def_blocks[nr] = NULL;
46    def_insts[nr] = NULL;
47 }
48 
49 void
update_for_reads(const idom_tree & idom,bblock_t * block,fs_inst * inst)50 def_analysis::update_for_reads(const idom_tree &idom,
51                                bblock_t *block,
52                                fs_inst *inst)
53 {
54    /* We don't track accumulator use for def analysis, so if an instruction
55     * implicitly reads the accumulator, we don't consider it to produce a def.
56     */
57    if (inst->reads_accumulator_implicitly())
58       mark_invalid(inst->dst.nr);
59 
60    for (int i = 0; i < inst->sources; i++) {
61       const int nr = inst->src[i].nr;
62 
63       if (inst->src[i].file != VGRF) {
64          /* Similarly, explicit reads of accumulators, address registers,
65           * and flags make the destination not a def, as we don't track those.
66           */
67          if (inst->src[i].file == ARF &&
68              (nr == BRW_ARF_ADDRESS ||
69               nr == BRW_ARF_ACCUMULATOR ||
70               nr == BRW_ARF_FLAG))
71             mark_invalid(inst->dst.nr);
72 
73          continue;
74       }
75 
76       def_use_counts[nr]++;
77 
78       if (def_insts[nr]) {
79          /* Mark the source def invalid in two cases:
80           *
81           * 1. The register is used before being written
82           * 2. The def doesn't dominate our use.
83           *
84           */
85          if (def_insts[nr] == UNSEEN ||
86              !idom.dominates(def_blocks[nr], block))
87             mark_invalid(nr);
88       }
89 
90       /* Additionally, if one of our sources is not a def, then our
91        * destination may have multiple dynamic assignments.
92        */
93       if (!def_insts[nr] && inst->dst.file == VGRF)
94          mark_invalid(inst->dst.nr);
95    }
96 }
97 
98 bool
fully_defines(const fs_visitor * v,fs_inst * inst)99 def_analysis::fully_defines(const fs_visitor *v, fs_inst *inst)
100 {
101    return v->alloc.sizes[inst->dst.nr] * REG_SIZE == inst->size_written &&
102           !inst->is_partial_write();
103 }
104 
105 void
update_for_write(const fs_visitor * v,bblock_t * block,fs_inst * inst)106 def_analysis::update_for_write(const fs_visitor *v,
107                                bblock_t *block,
108                                fs_inst *inst)
109 {
110    const int nr = inst->dst.nr;
111 
112    if (inst->dst.file != VGRF || !def_insts[nr])
113       return;
114 
115    /* If this is our first write to the destination, and it fully defines
116     * the destination, then consider it an SSA def for now.
117     */
118    if (def_insts[nr] == UNSEEN && fully_defines(v, inst)) {
119       def_insts[nr] = inst;
120       def_blocks[nr] = block;
121    } else {
122       /* Otherwise this is a second write or a partial write, in which
123        * case we know with certainty that this isn't an SSA def.
124        */
125       mark_invalid(nr);
126    }
127 }
128 
def_analysis(const fs_visitor * v)129 def_analysis::def_analysis(const fs_visitor *v)
130 {
131    const idom_tree &idom = v->idom_analysis.require();
132 
133    def_count = v->alloc.count;
134 
135    def_insts      = new fs_inst*[def_count]();
136    def_blocks     = new bblock_t*[def_count]();
137    def_use_counts = new uint32_t[def_count]();
138 
139    for (unsigned i = 0; i < def_count; i++)
140       def_insts[i] = UNSEEN;
141 
142    foreach_block_and_inst(block, fs_inst, inst, v->cfg) {
143       if (inst->opcode != SHADER_OPCODE_UNDEF) {
144          update_for_reads(idom, block, inst);
145          update_for_write(v, block, inst);
146       }
147    }
148 
149    bool iterate;
150    do {
151       iterate = false;
152 
153       for (unsigned d = 0; d < def_count; d++) {
154          /* Anything still unseen was never written and thus dead code. */
155          if (def_insts[d] == UNSEEN)
156             def_insts[d] = NULL;
157 
158          fs_inst *def = def_insts[d];
159          if (!def)
160             continue;
161 
162          for (int i = 0; i < def->sources; i++) {
163             if (def->src[i].file != VGRF)
164                continue;
165 
166             const int nr = def->src[i].nr;
167 
168             /* If our "def" reads a non-SSA source, then it isn't a def. */
169             if (!def_insts[nr] || def_insts[nr] == UNSEEN) {
170                mark_invalid(def->dst.nr);
171                iterate = true;
172                break;
173             }
174          }
175       }
176    } while (iterate);
177 }
178 
~def_analysis()179 def_analysis::~def_analysis()
180 {
181    delete[] def_insts;
182    delete[] def_blocks;
183    delete[] def_use_counts;
184 }
185 
186 bool
validate(const fs_visitor * v) const187 def_analysis::validate(const fs_visitor *v) const
188 {
189    for (unsigned i = 0; i < def_count; i++) {
190       assert(!def_insts[i] == !def_blocks[i]);
191    }
192 
193    return true;
194 }
195 
196 void
print_stats(const fs_visitor * v) const197 def_analysis::print_stats(const fs_visitor *v) const
198 {
199    unsigned defs = 0;
200 
201    for (unsigned i = 0; i < def_count; i++) {
202       if (def_insts[i])
203          ++defs;
204    }
205 
206    fprintf(stderr, "DEFS: %u registers, %u SSA, %u non-SSA => %.1f SSA\n",
207            def_count, defs, def_count - defs,
208            100.0f * float(defs) / float(def_count));
209 }
210