xref: /aosp_15_r20/external/mesa3d/src/amd/compiler/aco_form_hard_clauses.cpp (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2020 Valve Corporation
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include "aco_builder.h"
8 #include "aco_ir.h"
9 
10 #include <vector>
11 
12 namespace aco {
13 namespace {
14 
15 /* there can also be LDS and VALU clauses, but I don't see how those are interesting */
16 enum clause_type {
17    clause_smem,
18    clause_other,
19    /* GFX10: */
20    clause_vmem,
21    clause_flat,
22    /* GFX11: */
23    clause_mimg_load,
24    clause_mimg_store,
25    clause_mimg_atomic,
26    clause_mimg_sample,
27    clause_vmem_load,
28    clause_vmem_store,
29    clause_vmem_atomic,
30    clause_flat_load,
31    clause_flat_store,
32    clause_flat_atomic,
33    clause_bvh,
34 };
35 
36 void
emit_clause(Builder & bld,unsigned num_instrs,aco_ptr<Instruction> * instrs)37 emit_clause(Builder& bld, unsigned num_instrs, aco_ptr<Instruction>* instrs)
38 {
39    unsigned start = 0;
40    unsigned end = num_instrs;
41 
42    if (bld.program->gfx_level < GFX11) {
43       /* skip any stores at the start */
44       for (; (start < num_instrs) && instrs[start]->definitions.empty(); start++)
45          bld.insert(std::move(instrs[start]));
46 
47       for (end = start; (end < num_instrs) && !instrs[end]->definitions.empty(); end++)
48          ;
49    }
50 
51    unsigned clause_size = end - start;
52    if (clause_size > 1)
53       bld.sopp(aco_opcode::s_clause, clause_size - 1);
54 
55    for (unsigned i = start; i < num_instrs; i++)
56       bld.insert(std::move(instrs[i]));
57 }
58 
59 clause_type
get_type(Program * program,aco_ptr<Instruction> & instr)60 get_type(Program* program, aco_ptr<Instruction>& instr)
61 {
62    if (instr->isSMEM() && !instr->operands.empty())
63       return clause_smem;
64 
65    if (program->gfx_level >= GFX11) {
66       if (instr->isMIMG()) {
67          switch (instr->opcode) {
68          case aco_opcode::image_bvh_intersect_ray:
69          case aco_opcode::image_bvh64_intersect_ray: return clause_bvh;
70          case aco_opcode::image_atomic_swap:
71          case aco_opcode::image_atomic_cmpswap:
72          case aco_opcode::image_atomic_add:
73          case aco_opcode::image_atomic_sub:
74          case aco_opcode::image_atomic_rsub:
75          case aco_opcode::image_atomic_smin:
76          case aco_opcode::image_atomic_umin:
77          case aco_opcode::image_atomic_smax:
78          case aco_opcode::image_atomic_umax:
79          case aco_opcode::image_atomic_and:
80          case aco_opcode::image_atomic_or:
81          case aco_opcode::image_atomic_xor:
82          case aco_opcode::image_atomic_inc:
83          case aco_opcode::image_atomic_dec:
84          case aco_opcode::image_atomic_fcmpswap:
85          case aco_opcode::image_atomic_fmin:
86          case aco_opcode::image_atomic_fmax: return clause_mimg_atomic;
87          default:
88             if (instr->definitions.empty())
89                return clause_mimg_store;
90             else
91                return !instr->operands[1].isUndefined() && instr->operands[1].regClass() == s4
92                          ? clause_mimg_sample
93                          : clause_mimg_load;
94          }
95       } else if (instr->isMTBUF() || instr->isScratch()) {
96          return instr->definitions.empty() ? clause_vmem_store : clause_vmem_load;
97       } else if (instr->isMUBUF()) {
98          switch (instr->opcode) {
99          case aco_opcode::buffer_atomic_add:
100          case aco_opcode::buffer_atomic_and_x2:
101          case aco_opcode::buffer_atomic_rsub:
102          case aco_opcode::buffer_atomic_umax:
103          case aco_opcode::buffer_atomic_dec:
104          case aco_opcode::buffer_atomic_smax:
105          case aco_opcode::buffer_atomic_fmax:
106          case aco_opcode::buffer_atomic_rsub_x2:
107          case aco_opcode::buffer_atomic_smin:
108          case aco_opcode::buffer_atomic_sub:
109          case aco_opcode::buffer_atomic_sub_x2:
110          case aco_opcode::buffer_atomic_xor_x2:
111          case aco_opcode::buffer_atomic_add_f32:
112          case aco_opcode::buffer_atomic_inc:
113          case aco_opcode::buffer_atomic_swap_x2:
114          case aco_opcode::buffer_atomic_cmpswap:
115          case aco_opcode::buffer_atomic_fmin_x2:
116          case aco_opcode::buffer_atomic_umin:
117          case aco_opcode::buffer_atomic_or:
118          case aco_opcode::buffer_atomic_umax_x2:
119          case aco_opcode::buffer_atomic_smin_x2:
120          case aco_opcode::buffer_atomic_umin_x2:
121          case aco_opcode::buffer_atomic_cmpswap_x2:
122          case aco_opcode::buffer_atomic_add_x2:
123          case aco_opcode::buffer_atomic_swap:
124          case aco_opcode::buffer_atomic_and:
125          case aco_opcode::buffer_atomic_fmin:
126          case aco_opcode::buffer_atomic_fcmpswap_x2:
127          case aco_opcode::buffer_atomic_or_x2:
128          case aco_opcode::buffer_atomic_fcmpswap:
129          case aco_opcode::buffer_atomic_xor:
130          case aco_opcode::buffer_atomic_dec_x2:
131          case aco_opcode::buffer_atomic_fmax_x2:
132          case aco_opcode::buffer_atomic_csub:
133          case aco_opcode::buffer_atomic_inc_x2:
134          case aco_opcode::buffer_atomic_smax_x2: return clause_vmem_atomic;
135          default: return instr->definitions.empty() ? clause_vmem_store : clause_vmem_load;
136          }
137       } else if (instr->isGlobal()) {
138          switch (instr->opcode) {
139          case aco_opcode::global_atomic_swap:
140          case aco_opcode::global_atomic_umax:
141          case aco_opcode::global_atomic_cmpswap:
142          case aco_opcode::global_atomic_and_x2:
143          case aco_opcode::global_atomic_fmax:
144          case aco_opcode::global_atomic_smax_x2:
145          case aco_opcode::global_atomic_fmax_x2:
146          case aco_opcode::global_atomic_dec:
147          case aco_opcode::global_atomic_dec_x2:
148          case aco_opcode::global_atomic_umin:
149          case aco_opcode::global_atomic_fcmpswap_x2:
150          case aco_opcode::global_atomic_inc:
151          case aco_opcode::global_atomic_and:
152          case aco_opcode::global_atomic_fmin:
153          case aco_opcode::global_atomic_fcmpswap:
154          case aco_opcode::global_atomic_or_x2:
155          case aco_opcode::global_atomic_smax:
156          case aco_opcode::global_atomic_sub:
157          case aco_opcode::global_atomic_xor:
158          case aco_opcode::global_atomic_swap_x2:
159          case aco_opcode::global_atomic_umax_x2:
160          case aco_opcode::global_atomic_umin_x2:
161          case aco_opcode::global_atomic_xor_x2:
162          case aco_opcode::global_atomic_inc_x2:
163          case aco_opcode::global_atomic_fmin_x2:
164          case aco_opcode::global_atomic_add_f32:
165          case aco_opcode::global_atomic_add:
166          case aco_opcode::global_atomic_or:
167          case aco_opcode::global_atomic_add_x2:
168          case aco_opcode::global_atomic_smin_x2:
169          case aco_opcode::global_atomic_smin:
170          case aco_opcode::global_atomic_csub:
171          case aco_opcode::global_atomic_sub_x2:
172          case aco_opcode::global_atomic_cmpswap_x2: return clause_vmem_atomic;
173          default: return instr->definitions.empty() ? clause_vmem_store : clause_vmem_load;
174          }
175       } else if (instr->isFlat()) {
176          switch (instr->opcode) {
177          case aco_opcode::flat_atomic_smax:
178          case aco_opcode::flat_atomic_fcmpswap_x2:
179          case aco_opcode::flat_atomic_inc_x2:
180          case aco_opcode::flat_atomic_dec:
181          case aco_opcode::flat_atomic_fmin:
182          case aco_opcode::flat_atomic_umax_x2:
183          case aco_opcode::flat_atomic_add_f32:
184          case aco_opcode::flat_atomic_or:
185          case aco_opcode::flat_atomic_smax_x2:
186          case aco_opcode::flat_atomic_umin:
187          case aco_opcode::flat_atomic_sub:
188          case aco_opcode::flat_atomic_swap:
189          case aco_opcode::flat_atomic_swap_x2:
190          case aco_opcode::flat_atomic_cmpswap_x2:
191          case aco_opcode::flat_atomic_fcmpswap:
192          case aco_opcode::flat_atomic_add:
193          case aco_opcode::flat_atomic_umin_x2:
194          case aco_opcode::flat_atomic_xor_x2:
195          case aco_opcode::flat_atomic_smin:
196          case aco_opcode::flat_atomic_fmax_x2:
197          case aco_opcode::flat_atomic_cmpswap:
198          case aco_opcode::flat_atomic_dec_x2:
199          case aco_opcode::flat_atomic_sub_x2:
200          case aco_opcode::flat_atomic_add_x2:
201          case aco_opcode::flat_atomic_umax:
202          case aco_opcode::flat_atomic_xor:
203          case aco_opcode::flat_atomic_and_x2:
204          case aco_opcode::flat_atomic_inc:
205          case aco_opcode::flat_atomic_and:
206          case aco_opcode::flat_atomic_fmin_x2:
207          case aco_opcode::flat_atomic_smin_x2:
208          case aco_opcode::flat_atomic_or_x2:
209          case aco_opcode::flat_atomic_fmax: return clause_flat_atomic;
210          default: return instr->definitions.empty() ? clause_flat_store : clause_flat_load;
211          }
212       }
213    } else {
214       if (instr->isVMEM() && !instr->operands.empty()) {
215          if (program->gfx_level == GFX10 && instr->isMIMG() && get_mimg_nsa_dwords(instr.get()) > 0)
216             return clause_other;
217          else
218             return clause_vmem;
219       } else if (instr->isScratch() || instr->isGlobal()) {
220          return clause_vmem;
221       } else if (instr->isFlat()) {
222          return clause_flat;
223       }
224    }
225    return clause_other;
226 }
227 
228 } /* end namespace */
229 
230 void
form_hard_clauses(Program * program)231 form_hard_clauses(Program* program)
232 {
233    /* The ISA documentation says 63 is the maximum for GFX11/12, but according to
234     * LLVM there are HW bugs with more than 32 instructions.
235     */
236    const unsigned max_clause_length = program->gfx_level >= GFX11 ? 32 : 63;
237    for (Block& block : program->blocks) {
238       unsigned num_instrs = 0;
239       aco_ptr<Instruction> current_instrs[63];
240       clause_type current_type = clause_other;
241 
242       std::vector<aco_ptr<Instruction>> new_instructions;
243       new_instructions.reserve(block.instructions.size());
244       Builder bld(program, &new_instructions);
245 
246       for (unsigned i = 0; i < block.instructions.size(); i++) {
247          aco_ptr<Instruction>& instr = block.instructions[i];
248 
249          clause_type type = get_type(program, instr);
250          if (type != current_type || num_instrs == max_clause_length ||
251              (num_instrs && !should_form_clause(current_instrs[0].get(), instr.get()))) {
252             emit_clause(bld, num_instrs, current_instrs);
253             num_instrs = 0;
254             current_type = type;
255          }
256 
257          if (type == clause_other) {
258             bld.insert(std::move(instr));
259             continue;
260          }
261 
262          current_instrs[num_instrs++] = std::move(instr);
263       }
264 
265       emit_clause(bld, num_instrs, current_instrs);
266 
267       block.instructions = std::move(new_instructions);
268    }
269 }
270 } // namespace aco
271