xref: /aosp_15_r20/external/mesa3d/src/amd/compiler/aco_ir.cpp (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1*61046927SAndroid Build Coastguard Worker /*
2*61046927SAndroid Build Coastguard Worker  * Copyright © 2020 Valve Corporation
3*61046927SAndroid Build Coastguard Worker  *
4*61046927SAndroid Build Coastguard Worker  * SPDX-License-Identifier: MIT
5*61046927SAndroid Build Coastguard Worker  */
6*61046927SAndroid Build Coastguard Worker 
7*61046927SAndroid Build Coastguard Worker #include "aco_ir.h"
8*61046927SAndroid Build Coastguard Worker 
9*61046927SAndroid Build Coastguard Worker #include "aco_builder.h"
10*61046927SAndroid Build Coastguard Worker 
11*61046927SAndroid Build Coastguard Worker #include "util/u_debug.h"
12*61046927SAndroid Build Coastguard Worker 
13*61046927SAndroid Build Coastguard Worker #include "c11/threads.h"
14*61046927SAndroid Build Coastguard Worker 
15*61046927SAndroid Build Coastguard Worker namespace aco {
16*61046927SAndroid Build Coastguard Worker 
17*61046927SAndroid Build Coastguard Worker thread_local aco::monotonic_buffer_resource* instruction_buffer = nullptr;
18*61046927SAndroid Build Coastguard Worker 
19*61046927SAndroid Build Coastguard Worker uint64_t debug_flags = 0;
20*61046927SAndroid Build Coastguard Worker 
21*61046927SAndroid Build Coastguard Worker static const struct debug_control aco_debug_options[] = {
22*61046927SAndroid Build Coastguard Worker    {"validateir", DEBUG_VALIDATE_IR},
23*61046927SAndroid Build Coastguard Worker    {"validatera", DEBUG_VALIDATE_RA},
24*61046927SAndroid Build Coastguard Worker    {"validate-livevars", DEBUG_VALIDATE_LIVE_VARS},
25*61046927SAndroid Build Coastguard Worker    {"novalidateir", DEBUG_NO_VALIDATE_IR},
26*61046927SAndroid Build Coastguard Worker    {"force-waitcnt", DEBUG_FORCE_WAITCNT},
27*61046927SAndroid Build Coastguard Worker    {"force-waitdeps", DEBUG_FORCE_WAITDEPS},
28*61046927SAndroid Build Coastguard Worker    {"novn", DEBUG_NO_VN},
29*61046927SAndroid Build Coastguard Worker    {"noopt", DEBUG_NO_OPT},
30*61046927SAndroid Build Coastguard Worker    {"nosched", DEBUG_NO_SCHED | DEBUG_NO_SCHED_ILP | DEBUG_NO_SCHED_VOPD},
31*61046927SAndroid Build Coastguard Worker    {"nosched-ilp", DEBUG_NO_SCHED_ILP},
32*61046927SAndroid Build Coastguard Worker    {"nosched-vopd", DEBUG_NO_SCHED_VOPD},
33*61046927SAndroid Build Coastguard Worker    {"perfinfo", DEBUG_PERF_INFO},
34*61046927SAndroid Build Coastguard Worker    {"liveinfo", DEBUG_LIVE_INFO},
35*61046927SAndroid Build Coastguard Worker    {NULL, 0}};
36*61046927SAndroid Build Coastguard Worker 
37*61046927SAndroid Build Coastguard Worker static once_flag init_once_flag = ONCE_FLAG_INIT;
38*61046927SAndroid Build Coastguard Worker 
39*61046927SAndroid Build Coastguard Worker static void
init_once()40*61046927SAndroid Build Coastguard Worker init_once()
41*61046927SAndroid Build Coastguard Worker {
42*61046927SAndroid Build Coastguard Worker    debug_flags = parse_debug_string(getenv("ACO_DEBUG"), aco_debug_options);
43*61046927SAndroid Build Coastguard Worker 
44*61046927SAndroid Build Coastguard Worker #ifndef NDEBUG
45*61046927SAndroid Build Coastguard Worker    /* enable some flags by default on debug builds */
46*61046927SAndroid Build Coastguard Worker    debug_flags |= aco::DEBUG_VALIDATE_IR;
47*61046927SAndroid Build Coastguard Worker #endif
48*61046927SAndroid Build Coastguard Worker 
49*61046927SAndroid Build Coastguard Worker    if (debug_flags & aco::DEBUG_NO_VALIDATE_IR)
50*61046927SAndroid Build Coastguard Worker       debug_flags &= ~aco::DEBUG_VALIDATE_IR;
51*61046927SAndroid Build Coastguard Worker }
52*61046927SAndroid Build Coastguard Worker 
53*61046927SAndroid Build Coastguard Worker void
init()54*61046927SAndroid Build Coastguard Worker init()
55*61046927SAndroid Build Coastguard Worker {
56*61046927SAndroid Build Coastguard Worker    call_once(&init_once_flag, init_once);
57*61046927SAndroid Build Coastguard Worker }
58*61046927SAndroid Build Coastguard Worker 
59*61046927SAndroid Build Coastguard Worker void
init_program(Program * program,Stage stage,const struct aco_shader_info * info,enum amd_gfx_level gfx_level,enum radeon_family family,bool wgp_mode,ac_shader_config * config)60*61046927SAndroid Build Coastguard Worker init_program(Program* program, Stage stage, const struct aco_shader_info* info,
61*61046927SAndroid Build Coastguard Worker              enum amd_gfx_level gfx_level, enum radeon_family family, bool wgp_mode,
62*61046927SAndroid Build Coastguard Worker              ac_shader_config* config)
63*61046927SAndroid Build Coastguard Worker {
64*61046927SAndroid Build Coastguard Worker    instruction_buffer = &program->m;
65*61046927SAndroid Build Coastguard Worker    program->stage = stage;
66*61046927SAndroid Build Coastguard Worker    program->config = config;
67*61046927SAndroid Build Coastguard Worker    program->info = *info;
68*61046927SAndroid Build Coastguard Worker    program->gfx_level = gfx_level;
69*61046927SAndroid Build Coastguard Worker    if (family == CHIP_UNKNOWN) {
70*61046927SAndroid Build Coastguard Worker       switch (gfx_level) {
71*61046927SAndroid Build Coastguard Worker       case GFX6: program->family = CHIP_TAHITI; break;
72*61046927SAndroid Build Coastguard Worker       case GFX7: program->family = CHIP_BONAIRE; break;
73*61046927SAndroid Build Coastguard Worker       case GFX8: program->family = CHIP_POLARIS10; break;
74*61046927SAndroid Build Coastguard Worker       case GFX9: program->family = CHIP_VEGA10; break;
75*61046927SAndroid Build Coastguard Worker       case GFX10: program->family = CHIP_NAVI10; break;
76*61046927SAndroid Build Coastguard Worker       case GFX10_3: program->family = CHIP_NAVI21; break;
77*61046927SAndroid Build Coastguard Worker       case GFX11: program->family = CHIP_NAVI31; break;
78*61046927SAndroid Build Coastguard Worker       case GFX12: program->family = CHIP_GFX1200; break;
79*61046927SAndroid Build Coastguard Worker       default: program->family = CHIP_UNKNOWN; break;
80*61046927SAndroid Build Coastguard Worker       }
81*61046927SAndroid Build Coastguard Worker    } else {
82*61046927SAndroid Build Coastguard Worker       program->family = family;
83*61046927SAndroid Build Coastguard Worker    }
84*61046927SAndroid Build Coastguard Worker    program->wave_size = info->wave_size;
85*61046927SAndroid Build Coastguard Worker    program->lane_mask = program->wave_size == 32 ? s1 : s2;
86*61046927SAndroid Build Coastguard Worker 
87*61046927SAndroid Build Coastguard Worker    program->dev.lds_encoding_granule = gfx_level >= GFX11 && stage == fragment_fs ? 1024
88*61046927SAndroid Build Coastguard Worker                                        : gfx_level >= GFX7                        ? 512
89*61046927SAndroid Build Coastguard Worker                                                                                   : 256;
90*61046927SAndroid Build Coastguard Worker    program->dev.lds_alloc_granule = gfx_level >= GFX10_3 ? 1024 : program->dev.lds_encoding_granule;
91*61046927SAndroid Build Coastguard Worker 
92*61046927SAndroid Build Coastguard Worker    /* GFX6: There is 64KB LDS per CU, but a single workgroup can only use 32KB. */
93*61046927SAndroid Build Coastguard Worker    program->dev.lds_limit = gfx_level >= GFX7 ? 65536 : 32768;
94*61046927SAndroid Build Coastguard Worker 
95*61046927SAndroid Build Coastguard Worker    /* apparently gfx702 also has 16-bank LDS but I can't find a family for that */
96*61046927SAndroid Build Coastguard Worker    program->dev.has_16bank_lds = family == CHIP_KABINI || family == CHIP_STONEY;
97*61046927SAndroid Build Coastguard Worker 
98*61046927SAndroid Build Coastguard Worker    program->dev.vgpr_limit = stage == raytracing_cs ? 128 : 256;
99*61046927SAndroid Build Coastguard Worker    program->dev.physical_vgprs = 256;
100*61046927SAndroid Build Coastguard Worker    program->dev.vgpr_alloc_granule = 4;
101*61046927SAndroid Build Coastguard Worker 
102*61046927SAndroid Build Coastguard Worker    if (gfx_level >= GFX10) {
103*61046927SAndroid Build Coastguard Worker       program->dev.physical_sgprs = 128 * 20; /* enough for max waves */
104*61046927SAndroid Build Coastguard Worker       program->dev.sgpr_alloc_granule = 128;
105*61046927SAndroid Build Coastguard Worker       program->dev.sgpr_limit =
106*61046927SAndroid Build Coastguard Worker          108; /* includes VCC, which can be treated as s[106-107] on GFX10+ */
107*61046927SAndroid Build Coastguard Worker 
108*61046927SAndroid Build Coastguard Worker       if (family == CHIP_NAVI31 || family == CHIP_NAVI32 || family == CHIP_GFX1151 ||
109*61046927SAndroid Build Coastguard Worker           gfx_level >= GFX12) {
110*61046927SAndroid Build Coastguard Worker          program->dev.physical_vgprs = program->wave_size == 32 ? 1536 : 768;
111*61046927SAndroid Build Coastguard Worker          program->dev.vgpr_alloc_granule = program->wave_size == 32 ? 24 : 12;
112*61046927SAndroid Build Coastguard Worker       } else {
113*61046927SAndroid Build Coastguard Worker          program->dev.physical_vgprs = program->wave_size == 32 ? 1024 : 512;
114*61046927SAndroid Build Coastguard Worker          if (gfx_level >= GFX10_3)
115*61046927SAndroid Build Coastguard Worker             program->dev.vgpr_alloc_granule = program->wave_size == 32 ? 16 : 8;
116*61046927SAndroid Build Coastguard Worker          else
117*61046927SAndroid Build Coastguard Worker             program->dev.vgpr_alloc_granule = program->wave_size == 32 ? 8 : 4;
118*61046927SAndroid Build Coastguard Worker       }
119*61046927SAndroid Build Coastguard Worker    } else if (program->gfx_level >= GFX8) {
120*61046927SAndroid Build Coastguard Worker       program->dev.physical_sgprs = 800;
121*61046927SAndroid Build Coastguard Worker       program->dev.sgpr_alloc_granule = 16;
122*61046927SAndroid Build Coastguard Worker       program->dev.sgpr_limit = 102;
123*61046927SAndroid Build Coastguard Worker       if (family == CHIP_TONGA || family == CHIP_ICELAND)
124*61046927SAndroid Build Coastguard Worker          program->dev.sgpr_alloc_granule = 96; /* workaround hardware bug */
125*61046927SAndroid Build Coastguard Worker    } else {
126*61046927SAndroid Build Coastguard Worker       program->dev.physical_sgprs = 512;
127*61046927SAndroid Build Coastguard Worker       program->dev.sgpr_alloc_granule = 8;
128*61046927SAndroid Build Coastguard Worker       program->dev.sgpr_limit = 104;
129*61046927SAndroid Build Coastguard Worker    }
130*61046927SAndroid Build Coastguard Worker 
131*61046927SAndroid Build Coastguard Worker    program->dev.scratch_alloc_granule = gfx_level >= GFX11 ? 256 : 1024;
132*61046927SAndroid Build Coastguard Worker 
133*61046927SAndroid Build Coastguard Worker    program->dev.max_waves_per_simd = 10;
134*61046927SAndroid Build Coastguard Worker    if (program->gfx_level >= GFX10_3)
135*61046927SAndroid Build Coastguard Worker       program->dev.max_waves_per_simd = 16;
136*61046927SAndroid Build Coastguard Worker    else if (program->gfx_level == GFX10)
137*61046927SAndroid Build Coastguard Worker       program->dev.max_waves_per_simd = 20;
138*61046927SAndroid Build Coastguard Worker    else if (program->family >= CHIP_POLARIS10 && program->family <= CHIP_VEGAM)
139*61046927SAndroid Build Coastguard Worker       program->dev.max_waves_per_simd = 8;
140*61046927SAndroid Build Coastguard Worker 
141*61046927SAndroid Build Coastguard Worker    program->dev.simd_per_cu = program->gfx_level >= GFX10 ? 2 : 4;
142*61046927SAndroid Build Coastguard Worker 
143*61046927SAndroid Build Coastguard Worker    switch (program->family) {
144*61046927SAndroid Build Coastguard Worker    /* GFX8 APUs */
145*61046927SAndroid Build Coastguard Worker    case CHIP_CARRIZO:
146*61046927SAndroid Build Coastguard Worker    case CHIP_STONEY:
147*61046927SAndroid Build Coastguard Worker    /* GFX9 APUS */
148*61046927SAndroid Build Coastguard Worker    case CHIP_RAVEN:
149*61046927SAndroid Build Coastguard Worker    case CHIP_RAVEN2:
150*61046927SAndroid Build Coastguard Worker    case CHIP_RENOIR: program->dev.xnack_enabled = true; break;
151*61046927SAndroid Build Coastguard Worker    default: break;
152*61046927SAndroid Build Coastguard Worker    }
153*61046927SAndroid Build Coastguard Worker 
154*61046927SAndroid Build Coastguard Worker    program->dev.sram_ecc_enabled = program->family == CHIP_MI100;
155*61046927SAndroid Build Coastguard Worker    /* apparently gfx702 also has fast v_fma_f32 but I can't find a family for that */
156*61046927SAndroid Build Coastguard Worker    program->dev.has_fast_fma32 = program->gfx_level >= GFX9;
157*61046927SAndroid Build Coastguard Worker    if (program->family == CHIP_TAHITI || program->family == CHIP_CARRIZO ||
158*61046927SAndroid Build Coastguard Worker        program->family == CHIP_HAWAII)
159*61046927SAndroid Build Coastguard Worker       program->dev.has_fast_fma32 = true;
160*61046927SAndroid Build Coastguard Worker    program->dev.has_mac_legacy32 = program->gfx_level <= GFX7 || program->gfx_level == GFX10;
161*61046927SAndroid Build Coastguard Worker    program->dev.has_fmac_legacy32 = program->gfx_level >= GFX10_3 && program->gfx_level < GFX12;
162*61046927SAndroid Build Coastguard Worker 
163*61046927SAndroid Build Coastguard Worker    program->dev.fused_mad_mix = program->gfx_level >= GFX10;
164*61046927SAndroid Build Coastguard Worker    if (program->family == CHIP_VEGA12 || program->family == CHIP_VEGA20 ||
165*61046927SAndroid Build Coastguard Worker        program->family == CHIP_MI100 || program->family == CHIP_MI200)
166*61046927SAndroid Build Coastguard Worker       program->dev.fused_mad_mix = true;
167*61046927SAndroid Build Coastguard Worker 
168*61046927SAndroid Build Coastguard Worker    if (program->gfx_level >= GFX11) {
169*61046927SAndroid Build Coastguard Worker       program->dev.scratch_global_offset_min = -4096;
170*61046927SAndroid Build Coastguard Worker       program->dev.scratch_global_offset_max = 4095;
171*61046927SAndroid Build Coastguard Worker    } else if (program->gfx_level >= GFX10 || program->gfx_level == GFX8) {
172*61046927SAndroid Build Coastguard Worker       program->dev.scratch_global_offset_min = -2048;
173*61046927SAndroid Build Coastguard Worker       program->dev.scratch_global_offset_max = 2047;
174*61046927SAndroid Build Coastguard Worker    } else if (program->gfx_level == GFX9) {
175*61046927SAndroid Build Coastguard Worker       /* The minimum is actually -4096, but negative offsets are broken when SADDR is used. */
176*61046927SAndroid Build Coastguard Worker       program->dev.scratch_global_offset_min = 0;
177*61046927SAndroid Build Coastguard Worker       program->dev.scratch_global_offset_max = 4095;
178*61046927SAndroid Build Coastguard Worker    }
179*61046927SAndroid Build Coastguard Worker 
180*61046927SAndroid Build Coastguard Worker    if (program->gfx_level >= GFX12) {
181*61046927SAndroid Build Coastguard Worker       /* Same as GFX11, except one less for VSAMPLE. */
182*61046927SAndroid Build Coastguard Worker       program->dev.max_nsa_vgprs = 3;
183*61046927SAndroid Build Coastguard Worker    } else if (program->gfx_level >= GFX11) {
184*61046927SAndroid Build Coastguard Worker       /* GFX11 can have only 1 NSA dword. The last VGPR isn't included here because it contains the
185*61046927SAndroid Build Coastguard Worker        * rest of the address.
186*61046927SAndroid Build Coastguard Worker        */
187*61046927SAndroid Build Coastguard Worker       program->dev.max_nsa_vgprs = 4;
188*61046927SAndroid Build Coastguard Worker    } else if (program->gfx_level >= GFX10_3) {
189*61046927SAndroid Build Coastguard Worker       /* GFX10.3 can have up to 3 NSA dwords. */
190*61046927SAndroid Build Coastguard Worker       program->dev.max_nsa_vgprs = 13;
191*61046927SAndroid Build Coastguard Worker    } else if (program->gfx_level >= GFX10) {
192*61046927SAndroid Build Coastguard Worker       /* Limit NSA instructions to 1 NSA dword on GFX10 to avoid stability issues. */
193*61046927SAndroid Build Coastguard Worker       program->dev.max_nsa_vgprs = 5;
194*61046927SAndroid Build Coastguard Worker    } else {
195*61046927SAndroid Build Coastguard Worker       program->dev.max_nsa_vgprs = 0;
196*61046927SAndroid Build Coastguard Worker    }
197*61046927SAndroid Build Coastguard Worker 
198*61046927SAndroid Build Coastguard Worker    program->wgp_mode = wgp_mode;
199*61046927SAndroid Build Coastguard Worker 
200*61046927SAndroid Build Coastguard Worker    program->progress = CompilationProgress::after_isel;
201*61046927SAndroid Build Coastguard Worker 
202*61046927SAndroid Build Coastguard Worker    program->next_fp_mode.must_flush_denorms32 = false;
203*61046927SAndroid Build Coastguard Worker    program->next_fp_mode.must_flush_denorms16_64 = false;
204*61046927SAndroid Build Coastguard Worker    program->next_fp_mode.care_about_round32 = false;
205*61046927SAndroid Build Coastguard Worker    program->next_fp_mode.care_about_round16_64 = false;
206*61046927SAndroid Build Coastguard Worker    program->next_fp_mode.denorm16_64 = fp_denorm_keep;
207*61046927SAndroid Build Coastguard Worker    program->next_fp_mode.denorm32 = 0;
208*61046927SAndroid Build Coastguard Worker    program->next_fp_mode.round16_64 = fp_round_ne;
209*61046927SAndroid Build Coastguard Worker    program->next_fp_mode.round32 = fp_round_ne;
210*61046927SAndroid Build Coastguard Worker }
211*61046927SAndroid Build Coastguard Worker 
212*61046927SAndroid Build Coastguard Worker bool
is_wait_export_ready(amd_gfx_level gfx_level,const Instruction * instr)213*61046927SAndroid Build Coastguard Worker is_wait_export_ready(amd_gfx_level gfx_level, const Instruction* instr)
214*61046927SAndroid Build Coastguard Worker {
215*61046927SAndroid Build Coastguard Worker    return instr->opcode == aco_opcode::s_wait_event &&
216*61046927SAndroid Build Coastguard Worker           (gfx_level >= GFX12 ? (instr->salu().imm & wait_event_imm_wait_export_ready_gfx12)
217*61046927SAndroid Build Coastguard Worker                               : !(instr->salu().imm & wait_event_imm_dont_wait_export_ready_gfx11));
218*61046927SAndroid Build Coastguard Worker }
219*61046927SAndroid Build Coastguard Worker 
220*61046927SAndroid Build Coastguard Worker memory_sync_info
get_sync_info(const Instruction * instr)221*61046927SAndroid Build Coastguard Worker get_sync_info(const Instruction* instr)
222*61046927SAndroid Build Coastguard Worker {
223*61046927SAndroid Build Coastguard Worker    /* Primitive Ordered Pixel Shading barriers necessary for accesses to memory shared between
224*61046927SAndroid Build Coastguard Worker     * overlapping waves in the queue family.
225*61046927SAndroid Build Coastguard Worker     */
226*61046927SAndroid Build Coastguard Worker    if (instr->opcode == aco_opcode::p_pops_gfx9_overlapped_wave_wait_done ||
227*61046927SAndroid Build Coastguard Worker        instr->opcode == aco_opcode::s_wait_event) {
228*61046927SAndroid Build Coastguard Worker       return memory_sync_info(storage_buffer | storage_image, semantic_acquire, scope_queuefamily);
229*61046927SAndroid Build Coastguard Worker    } else if (instr->opcode == aco_opcode::p_pops_gfx9_ordered_section_done) {
230*61046927SAndroid Build Coastguard Worker       return memory_sync_info(storage_buffer | storage_image, semantic_release, scope_queuefamily);
231*61046927SAndroid Build Coastguard Worker    }
232*61046927SAndroid Build Coastguard Worker 
233*61046927SAndroid Build Coastguard Worker    switch (instr->format) {
234*61046927SAndroid Build Coastguard Worker    case Format::SMEM: return instr->smem().sync;
235*61046927SAndroid Build Coastguard Worker    case Format::MUBUF: return instr->mubuf().sync;
236*61046927SAndroid Build Coastguard Worker    case Format::MIMG: return instr->mimg().sync;
237*61046927SAndroid Build Coastguard Worker    case Format::MTBUF: return instr->mtbuf().sync;
238*61046927SAndroid Build Coastguard Worker    case Format::FLAT:
239*61046927SAndroid Build Coastguard Worker    case Format::GLOBAL:
240*61046927SAndroid Build Coastguard Worker    case Format::SCRATCH: return instr->flatlike().sync;
241*61046927SAndroid Build Coastguard Worker    case Format::DS: return instr->ds().sync;
242*61046927SAndroid Build Coastguard Worker    case Format::LDSDIR: return instr->ldsdir().sync;
243*61046927SAndroid Build Coastguard Worker    default: return memory_sync_info();
244*61046927SAndroid Build Coastguard Worker    }
245*61046927SAndroid Build Coastguard Worker }
246*61046927SAndroid Build Coastguard Worker 
247*61046927SAndroid Build Coastguard Worker bool
can_use_SDWA(amd_gfx_level gfx_level,const aco_ptr<Instruction> & instr,bool pre_ra)248*61046927SAndroid Build Coastguard Worker can_use_SDWA(amd_gfx_level gfx_level, const aco_ptr<Instruction>& instr, bool pre_ra)
249*61046927SAndroid Build Coastguard Worker {
250*61046927SAndroid Build Coastguard Worker    if (!instr->isVALU())
251*61046927SAndroid Build Coastguard Worker       return false;
252*61046927SAndroid Build Coastguard Worker 
253*61046927SAndroid Build Coastguard Worker    if (gfx_level < GFX8 || gfx_level >= GFX11 || instr->isDPP() || instr->isVOP3P())
254*61046927SAndroid Build Coastguard Worker       return false;
255*61046927SAndroid Build Coastguard Worker 
256*61046927SAndroid Build Coastguard Worker    if (instr->isSDWA())
257*61046927SAndroid Build Coastguard Worker       return true;
258*61046927SAndroid Build Coastguard Worker 
259*61046927SAndroid Build Coastguard Worker    if (instr->isVOP3()) {
260*61046927SAndroid Build Coastguard Worker       VALU_instruction& vop3 = instr->valu();
261*61046927SAndroid Build Coastguard Worker       if (instr->format == Format::VOP3)
262*61046927SAndroid Build Coastguard Worker          return false;
263*61046927SAndroid Build Coastguard Worker       if (vop3.clamp && instr->isVOPC() && gfx_level != GFX8)
264*61046927SAndroid Build Coastguard Worker          return false;
265*61046927SAndroid Build Coastguard Worker       if (vop3.omod && gfx_level < GFX9)
266*61046927SAndroid Build Coastguard Worker          return false;
267*61046927SAndroid Build Coastguard Worker 
268*61046927SAndroid Build Coastguard Worker       // TODO: return true if we know we will use vcc
269*61046927SAndroid Build Coastguard Worker       if (!pre_ra && instr->definitions.size() >= 2)
270*61046927SAndroid Build Coastguard Worker          return false;
271*61046927SAndroid Build Coastguard Worker 
272*61046927SAndroid Build Coastguard Worker       for (unsigned i = 1; i < instr->operands.size(); i++) {
273*61046927SAndroid Build Coastguard Worker          if (instr->operands[i].isLiteral())
274*61046927SAndroid Build Coastguard Worker             return false;
275*61046927SAndroid Build Coastguard Worker          if (gfx_level < GFX9 && !instr->operands[i].isOfType(RegType::vgpr))
276*61046927SAndroid Build Coastguard Worker             return false;
277*61046927SAndroid Build Coastguard Worker       }
278*61046927SAndroid Build Coastguard Worker    }
279*61046927SAndroid Build Coastguard Worker 
280*61046927SAndroid Build Coastguard Worker    if (!instr->definitions.empty() && instr->definitions[0].bytes() > 4 && !instr->isVOPC())
281*61046927SAndroid Build Coastguard Worker       return false;
282*61046927SAndroid Build Coastguard Worker 
283*61046927SAndroid Build Coastguard Worker    if (!instr->operands.empty()) {
284*61046927SAndroid Build Coastguard Worker       if (instr->operands[0].isLiteral())
285*61046927SAndroid Build Coastguard Worker          return false;
286*61046927SAndroid Build Coastguard Worker       if (gfx_level < GFX9 && !instr->operands[0].isOfType(RegType::vgpr))
287*61046927SAndroid Build Coastguard Worker          return false;
288*61046927SAndroid Build Coastguard Worker       if (instr->operands[0].bytes() > 4)
289*61046927SAndroid Build Coastguard Worker          return false;
290*61046927SAndroid Build Coastguard Worker       if (instr->operands.size() > 1 && instr->operands[1].bytes() > 4)
291*61046927SAndroid Build Coastguard Worker          return false;
292*61046927SAndroid Build Coastguard Worker    }
293*61046927SAndroid Build Coastguard Worker 
294*61046927SAndroid Build Coastguard Worker    bool is_mac = instr->opcode == aco_opcode::v_mac_f32 || instr->opcode == aco_opcode::v_mac_f16 ||
295*61046927SAndroid Build Coastguard Worker                  instr->opcode == aco_opcode::v_fmac_f32 || instr->opcode == aco_opcode::v_fmac_f16;
296*61046927SAndroid Build Coastguard Worker 
297*61046927SAndroid Build Coastguard Worker    if (gfx_level != GFX8 && is_mac)
298*61046927SAndroid Build Coastguard Worker       return false;
299*61046927SAndroid Build Coastguard Worker 
300*61046927SAndroid Build Coastguard Worker    // TODO: return true if we know we will use vcc
301*61046927SAndroid Build Coastguard Worker    if (!pre_ra && instr->isVOPC() && gfx_level == GFX8)
302*61046927SAndroid Build Coastguard Worker       return false;
303*61046927SAndroid Build Coastguard Worker    if (!pre_ra && instr->operands.size() >= 3 && !is_mac)
304*61046927SAndroid Build Coastguard Worker       return false;
305*61046927SAndroid Build Coastguard Worker 
306*61046927SAndroid Build Coastguard Worker    return instr->opcode != aco_opcode::v_madmk_f32 && instr->opcode != aco_opcode::v_madak_f32 &&
307*61046927SAndroid Build Coastguard Worker           instr->opcode != aco_opcode::v_madmk_f16 && instr->opcode != aco_opcode::v_madak_f16 &&
308*61046927SAndroid Build Coastguard Worker           instr->opcode != aco_opcode::v_fmamk_f32 && instr->opcode != aco_opcode::v_fmaak_f32 &&
309*61046927SAndroid Build Coastguard Worker           instr->opcode != aco_opcode::v_fmamk_f16 && instr->opcode != aco_opcode::v_fmaak_f16 &&
310*61046927SAndroid Build Coastguard Worker           instr->opcode != aco_opcode::v_readfirstlane_b32 &&
311*61046927SAndroid Build Coastguard Worker           instr->opcode != aco_opcode::v_clrexcp && instr->opcode != aco_opcode::v_swap_b32;
312*61046927SAndroid Build Coastguard Worker }
313*61046927SAndroid Build Coastguard Worker 
314*61046927SAndroid Build Coastguard Worker /* updates "instr" and returns the old instruction (or NULL if no update was needed) */
315*61046927SAndroid Build Coastguard Worker aco_ptr<Instruction>
convert_to_SDWA(amd_gfx_level gfx_level,aco_ptr<Instruction> & instr)316*61046927SAndroid Build Coastguard Worker convert_to_SDWA(amd_gfx_level gfx_level, aco_ptr<Instruction>& instr)
317*61046927SAndroid Build Coastguard Worker {
318*61046927SAndroid Build Coastguard Worker    if (instr->isSDWA())
319*61046927SAndroid Build Coastguard Worker       return NULL;
320*61046927SAndroid Build Coastguard Worker 
321*61046927SAndroid Build Coastguard Worker    aco_ptr<Instruction> tmp = std::move(instr);
322*61046927SAndroid Build Coastguard Worker    Format format = asSDWA(withoutVOP3(tmp->format));
323*61046927SAndroid Build Coastguard Worker    instr.reset(
324*61046927SAndroid Build Coastguard Worker       create_instruction(tmp->opcode, format, tmp->operands.size(), tmp->definitions.size()));
325*61046927SAndroid Build Coastguard Worker    std::copy(tmp->operands.cbegin(), tmp->operands.cend(), instr->operands.begin());
326*61046927SAndroid Build Coastguard Worker    std::copy(tmp->definitions.cbegin(), tmp->definitions.cend(), instr->definitions.begin());
327*61046927SAndroid Build Coastguard Worker 
328*61046927SAndroid Build Coastguard Worker    SDWA_instruction& sdwa = instr->sdwa();
329*61046927SAndroid Build Coastguard Worker 
330*61046927SAndroid Build Coastguard Worker    if (tmp->isVOP3()) {
331*61046927SAndroid Build Coastguard Worker       VALU_instruction& vop3 = tmp->valu();
332*61046927SAndroid Build Coastguard Worker       sdwa.neg = vop3.neg;
333*61046927SAndroid Build Coastguard Worker       sdwa.abs = vop3.abs;
334*61046927SAndroid Build Coastguard Worker       sdwa.omod = vop3.omod;
335*61046927SAndroid Build Coastguard Worker       sdwa.clamp = vop3.clamp;
336*61046927SAndroid Build Coastguard Worker    }
337*61046927SAndroid Build Coastguard Worker 
338*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < instr->operands.size(); i++) {
339*61046927SAndroid Build Coastguard Worker       /* SDWA only uses operands 0 and 1. */
340*61046927SAndroid Build Coastguard Worker       if (i >= 2)
341*61046927SAndroid Build Coastguard Worker          break;
342*61046927SAndroid Build Coastguard Worker 
343*61046927SAndroid Build Coastguard Worker       sdwa.sel[i] = SubdwordSel(instr->operands[i].bytes(), 0, false);
344*61046927SAndroid Build Coastguard Worker    }
345*61046927SAndroid Build Coastguard Worker 
346*61046927SAndroid Build Coastguard Worker    sdwa.dst_sel = SubdwordSel(instr->definitions[0].bytes(), 0, false);
347*61046927SAndroid Build Coastguard Worker 
348*61046927SAndroid Build Coastguard Worker    if (instr->definitions[0].getTemp().type() == RegType::sgpr && gfx_level == GFX8)
349*61046927SAndroid Build Coastguard Worker       instr->definitions[0].setFixed(vcc);
350*61046927SAndroid Build Coastguard Worker    if (instr->definitions.size() >= 2)
351*61046927SAndroid Build Coastguard Worker       instr->definitions[1].setFixed(vcc);
352*61046927SAndroid Build Coastguard Worker    if (instr->operands.size() >= 3)
353*61046927SAndroid Build Coastguard Worker       instr->operands[2].setFixed(vcc);
354*61046927SAndroid Build Coastguard Worker 
355*61046927SAndroid Build Coastguard Worker    instr->pass_flags = tmp->pass_flags;
356*61046927SAndroid Build Coastguard Worker 
357*61046927SAndroid Build Coastguard Worker    return tmp;
358*61046927SAndroid Build Coastguard Worker }
359*61046927SAndroid Build Coastguard Worker 
360*61046927SAndroid Build Coastguard Worker bool
can_use_DPP(amd_gfx_level gfx_level,const aco_ptr<Instruction> & instr,bool dpp8)361*61046927SAndroid Build Coastguard Worker can_use_DPP(amd_gfx_level gfx_level, const aco_ptr<Instruction>& instr, bool dpp8)
362*61046927SAndroid Build Coastguard Worker {
363*61046927SAndroid Build Coastguard Worker    assert(instr->isVALU() && !instr->operands.empty());
364*61046927SAndroid Build Coastguard Worker 
365*61046927SAndroid Build Coastguard Worker    if (instr->isDPP())
366*61046927SAndroid Build Coastguard Worker       return instr->isDPP8() == dpp8;
367*61046927SAndroid Build Coastguard Worker 
368*61046927SAndroid Build Coastguard Worker    if (instr->isSDWA() || instr->isVINTERP_INREG())
369*61046927SAndroid Build Coastguard Worker       return false;
370*61046927SAndroid Build Coastguard Worker 
371*61046927SAndroid Build Coastguard Worker    if ((instr->format == Format::VOP3 || instr->isVOP3P()) && gfx_level < GFX11)
372*61046927SAndroid Build Coastguard Worker       return false;
373*61046927SAndroid Build Coastguard Worker 
374*61046927SAndroid Build Coastguard Worker    if ((instr->isVOPC() || instr->definitions.size() > 1) && instr->definitions.back().isFixed() &&
375*61046927SAndroid Build Coastguard Worker        instr->definitions.back().physReg() != vcc && gfx_level < GFX11)
376*61046927SAndroid Build Coastguard Worker       return false;
377*61046927SAndroid Build Coastguard Worker 
378*61046927SAndroid Build Coastguard Worker    if (instr->operands.size() >= 3 && instr->operands[2].isFixed() &&
379*61046927SAndroid Build Coastguard Worker        instr->operands[2].isOfType(RegType::sgpr) && instr->operands[2].physReg() != vcc &&
380*61046927SAndroid Build Coastguard Worker        gfx_level < GFX11)
381*61046927SAndroid Build Coastguard Worker       return false;
382*61046927SAndroid Build Coastguard Worker 
383*61046927SAndroid Build Coastguard Worker    if (instr->isVOP3() && gfx_level < GFX11) {
384*61046927SAndroid Build Coastguard Worker       const VALU_instruction* vop3 = &instr->valu();
385*61046927SAndroid Build Coastguard Worker       if (vop3->clamp || vop3->omod)
386*61046927SAndroid Build Coastguard Worker          return false;
387*61046927SAndroid Build Coastguard Worker       if (dpp8)
388*61046927SAndroid Build Coastguard Worker          return false;
389*61046927SAndroid Build Coastguard Worker    }
390*61046927SAndroid Build Coastguard Worker 
391*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < instr->operands.size(); i++) {
392*61046927SAndroid Build Coastguard Worker       if (instr->operands[i].isLiteral())
393*61046927SAndroid Build Coastguard Worker          return false;
394*61046927SAndroid Build Coastguard Worker       if (!instr->operands[i].isOfType(RegType::vgpr) && i < 2)
395*61046927SAndroid Build Coastguard Worker          return false;
396*61046927SAndroid Build Coastguard Worker    }
397*61046927SAndroid Build Coastguard Worker 
398*61046927SAndroid Build Coastguard Worker    /* According to LLVM, it's unsafe to combine DPP into v_cmpx. */
399*61046927SAndroid Build Coastguard Worker    if (instr->writes_exec())
400*61046927SAndroid Build Coastguard Worker       return false;
401*61046927SAndroid Build Coastguard Worker 
402*61046927SAndroid Build Coastguard Worker    /* simpler than listing all VOP3P opcodes which do not support DPP */
403*61046927SAndroid Build Coastguard Worker    if (instr->isVOP3P()) {
404*61046927SAndroid Build Coastguard Worker       return instr->opcode == aco_opcode::v_fma_mix_f32 ||
405*61046927SAndroid Build Coastguard Worker              instr->opcode == aco_opcode::v_fma_mixlo_f16 ||
406*61046927SAndroid Build Coastguard Worker              instr->opcode == aco_opcode::v_fma_mixhi_f16 ||
407*61046927SAndroid Build Coastguard Worker              instr->opcode == aco_opcode::v_dot2_f32_f16 ||
408*61046927SAndroid Build Coastguard Worker              instr->opcode == aco_opcode::v_dot2_f32_bf16;
409*61046927SAndroid Build Coastguard Worker    }
410*61046927SAndroid Build Coastguard Worker 
411*61046927SAndroid Build Coastguard Worker    if (instr->opcode == aco_opcode::v_pk_fmac_f16)
412*61046927SAndroid Build Coastguard Worker       return gfx_level < GFX11;
413*61046927SAndroid Build Coastguard Worker 
414*61046927SAndroid Build Coastguard Worker    /* there are more cases but those all take 64-bit inputs */
415*61046927SAndroid Build Coastguard Worker    return instr->opcode != aco_opcode::v_madmk_f32 && instr->opcode != aco_opcode::v_madak_f32 &&
416*61046927SAndroid Build Coastguard Worker           instr->opcode != aco_opcode::v_madmk_f16 && instr->opcode != aco_opcode::v_madak_f16 &&
417*61046927SAndroid Build Coastguard Worker           instr->opcode != aco_opcode::v_fmamk_f32 && instr->opcode != aco_opcode::v_fmaak_f32 &&
418*61046927SAndroid Build Coastguard Worker           instr->opcode != aco_opcode::v_fmamk_f16 && instr->opcode != aco_opcode::v_fmaak_f16 &&
419*61046927SAndroid Build Coastguard Worker           instr->opcode != aco_opcode::v_readfirstlane_b32 &&
420*61046927SAndroid Build Coastguard Worker           instr->opcode != aco_opcode::v_cvt_f64_i32 &&
421*61046927SAndroid Build Coastguard Worker           instr->opcode != aco_opcode::v_cvt_f64_f32 &&
422*61046927SAndroid Build Coastguard Worker           instr->opcode != aco_opcode::v_cvt_f64_u32 && instr->opcode != aco_opcode::v_mul_lo_u32 &&
423*61046927SAndroid Build Coastguard Worker           instr->opcode != aco_opcode::v_mul_lo_i32 && instr->opcode != aco_opcode::v_mul_hi_u32 &&
424*61046927SAndroid Build Coastguard Worker           instr->opcode != aco_opcode::v_mul_hi_i32 &&
425*61046927SAndroid Build Coastguard Worker           instr->opcode != aco_opcode::v_qsad_pk_u16_u8 &&
426*61046927SAndroid Build Coastguard Worker           instr->opcode != aco_opcode::v_mqsad_pk_u16_u8 &&
427*61046927SAndroid Build Coastguard Worker           instr->opcode != aco_opcode::v_mqsad_u32_u8 &&
428*61046927SAndroid Build Coastguard Worker           instr->opcode != aco_opcode::v_mad_u64_u32 &&
429*61046927SAndroid Build Coastguard Worker           instr->opcode != aco_opcode::v_mad_i64_i32 &&
430*61046927SAndroid Build Coastguard Worker           instr->opcode != aco_opcode::v_permlane16_b32 &&
431*61046927SAndroid Build Coastguard Worker           instr->opcode != aco_opcode::v_permlanex16_b32 &&
432*61046927SAndroid Build Coastguard Worker           instr->opcode != aco_opcode::v_permlane64_b32 &&
433*61046927SAndroid Build Coastguard Worker           instr->opcode != aco_opcode::v_readlane_b32_e64 &&
434*61046927SAndroid Build Coastguard Worker           instr->opcode != aco_opcode::v_writelane_b32_e64 &&
435*61046927SAndroid Build Coastguard Worker           instr->opcode != aco_opcode::p_v_cvt_pk_u8_f32;
436*61046927SAndroid Build Coastguard Worker }
437*61046927SAndroid Build Coastguard Worker 
438*61046927SAndroid Build Coastguard Worker aco_ptr<Instruction>
convert_to_DPP(amd_gfx_level gfx_level,aco_ptr<Instruction> & instr,bool dpp8)439*61046927SAndroid Build Coastguard Worker convert_to_DPP(amd_gfx_level gfx_level, aco_ptr<Instruction>& instr, bool dpp8)
440*61046927SAndroid Build Coastguard Worker {
441*61046927SAndroid Build Coastguard Worker    if (instr->isDPP())
442*61046927SAndroid Build Coastguard Worker       return NULL;
443*61046927SAndroid Build Coastguard Worker 
444*61046927SAndroid Build Coastguard Worker    aco_ptr<Instruction> tmp = std::move(instr);
445*61046927SAndroid Build Coastguard Worker    Format format =
446*61046927SAndroid Build Coastguard Worker       (Format)((uint32_t)tmp->format | (uint32_t)(dpp8 ? Format::DPP8 : Format::DPP16));
447*61046927SAndroid Build Coastguard Worker    if (dpp8)
448*61046927SAndroid Build Coastguard Worker       instr.reset(
449*61046927SAndroid Build Coastguard Worker          create_instruction(tmp->opcode, format, tmp->operands.size(), tmp->definitions.size()));
450*61046927SAndroid Build Coastguard Worker    else
451*61046927SAndroid Build Coastguard Worker       instr.reset(
452*61046927SAndroid Build Coastguard Worker          create_instruction(tmp->opcode, format, tmp->operands.size(), tmp->definitions.size()));
453*61046927SAndroid Build Coastguard Worker    std::copy(tmp->operands.cbegin(), tmp->operands.cend(), instr->operands.begin());
454*61046927SAndroid Build Coastguard Worker    std::copy(tmp->definitions.cbegin(), tmp->definitions.cend(), instr->definitions.begin());
455*61046927SAndroid Build Coastguard Worker 
456*61046927SAndroid Build Coastguard Worker    if (dpp8) {
457*61046927SAndroid Build Coastguard Worker       DPP8_instruction* dpp = &instr->dpp8();
458*61046927SAndroid Build Coastguard Worker       dpp->lane_sel = 0xfac688; /* [0,1,2,3,4,5,6,7] */
459*61046927SAndroid Build Coastguard Worker       dpp->fetch_inactive = gfx_level >= GFX10;
460*61046927SAndroid Build Coastguard Worker    } else {
461*61046927SAndroid Build Coastguard Worker       DPP16_instruction* dpp = &instr->dpp16();
462*61046927SAndroid Build Coastguard Worker       dpp->dpp_ctrl = dpp_quad_perm(0, 1, 2, 3);
463*61046927SAndroid Build Coastguard Worker       dpp->row_mask = 0xf;
464*61046927SAndroid Build Coastguard Worker       dpp->bank_mask = 0xf;
465*61046927SAndroid Build Coastguard Worker       dpp->fetch_inactive = gfx_level >= GFX10;
466*61046927SAndroid Build Coastguard Worker    }
467*61046927SAndroid Build Coastguard Worker 
468*61046927SAndroid Build Coastguard Worker    instr->valu().neg = tmp->valu().neg;
469*61046927SAndroid Build Coastguard Worker    instr->valu().abs = tmp->valu().abs;
470*61046927SAndroid Build Coastguard Worker    instr->valu().omod = tmp->valu().omod;
471*61046927SAndroid Build Coastguard Worker    instr->valu().clamp = tmp->valu().clamp;
472*61046927SAndroid Build Coastguard Worker    instr->valu().opsel = tmp->valu().opsel;
473*61046927SAndroid Build Coastguard Worker    instr->valu().opsel_lo = tmp->valu().opsel_lo;
474*61046927SAndroid Build Coastguard Worker    instr->valu().opsel_hi = tmp->valu().opsel_hi;
475*61046927SAndroid Build Coastguard Worker 
476*61046927SAndroid Build Coastguard Worker    if ((instr->isVOPC() || instr->definitions.size() > 1) && gfx_level < GFX11)
477*61046927SAndroid Build Coastguard Worker       instr->definitions.back().setFixed(vcc);
478*61046927SAndroid Build Coastguard Worker 
479*61046927SAndroid Build Coastguard Worker    if (instr->operands.size() >= 3 && instr->operands[2].isOfType(RegType::sgpr) &&
480*61046927SAndroid Build Coastguard Worker        gfx_level < GFX11)
481*61046927SAndroid Build Coastguard Worker       instr->operands[2].setFixed(vcc);
482*61046927SAndroid Build Coastguard Worker 
483*61046927SAndroid Build Coastguard Worker    instr->pass_flags = tmp->pass_flags;
484*61046927SAndroid Build Coastguard Worker 
485*61046927SAndroid Build Coastguard Worker    /* DPP16 supports input modifiers, so we might no longer need VOP3. */
486*61046927SAndroid Build Coastguard Worker    bool remove_vop3 = !dpp8 && !instr->valu().omod && !instr->valu().clamp &&
487*61046927SAndroid Build Coastguard Worker                       (instr->isVOP1() || instr->isVOP2() || instr->isVOPC());
488*61046927SAndroid Build Coastguard Worker 
489*61046927SAndroid Build Coastguard Worker    /* VOPC/add_co/sub_co definition needs VCC without VOP3. */
490*61046927SAndroid Build Coastguard Worker    remove_vop3 &= instr->definitions.back().regClass().type() != RegType::sgpr ||
491*61046927SAndroid Build Coastguard Worker                   !instr->definitions.back().isFixed() ||
492*61046927SAndroid Build Coastguard Worker                   instr->definitions.back().physReg() == vcc;
493*61046927SAndroid Build Coastguard Worker 
494*61046927SAndroid Build Coastguard Worker    /* addc/subb/cndmask 3rd operand needs VCC without VOP3. */
495*61046927SAndroid Build Coastguard Worker    remove_vop3 &= instr->operands.size() < 3 || !instr->operands[2].isFixed() ||
496*61046927SAndroid Build Coastguard Worker                   instr->operands[2].isOfType(RegType::vgpr) || instr->operands[2].physReg() == vcc;
497*61046927SAndroid Build Coastguard Worker 
498*61046927SAndroid Build Coastguard Worker    if (remove_vop3)
499*61046927SAndroid Build Coastguard Worker       instr->format = withoutVOP3(instr->format);
500*61046927SAndroid Build Coastguard Worker 
501*61046927SAndroid Build Coastguard Worker    return tmp;
502*61046927SAndroid Build Coastguard Worker }
503*61046927SAndroid Build Coastguard Worker 
504*61046927SAndroid Build Coastguard Worker bool
can_use_input_modifiers(amd_gfx_level gfx_level,aco_opcode op,int idx)505*61046927SAndroid Build Coastguard Worker can_use_input_modifiers(amd_gfx_level gfx_level, aco_opcode op, int idx)
506*61046927SAndroid Build Coastguard Worker {
507*61046927SAndroid Build Coastguard Worker    if (op == aco_opcode::v_mov_b32)
508*61046927SAndroid Build Coastguard Worker       return gfx_level >= GFX10;
509*61046927SAndroid Build Coastguard Worker 
510*61046927SAndroid Build Coastguard Worker    if (op == aco_opcode::v_ldexp_f16 || op == aco_opcode::v_ldexp_f32 ||
511*61046927SAndroid Build Coastguard Worker        op == aco_opcode::v_ldexp_f64)
512*61046927SAndroid Build Coastguard Worker       return idx == 0;
513*61046927SAndroid Build Coastguard Worker 
514*61046927SAndroid Build Coastguard Worker    return instr_info.can_use_input_modifiers[(int)op];
515*61046927SAndroid Build Coastguard Worker }
516*61046927SAndroid Build Coastguard Worker 
517*61046927SAndroid Build Coastguard Worker bool
can_use_opsel(amd_gfx_level gfx_level,aco_opcode op,int idx)518*61046927SAndroid Build Coastguard Worker can_use_opsel(amd_gfx_level gfx_level, aco_opcode op, int idx)
519*61046927SAndroid Build Coastguard Worker {
520*61046927SAndroid Build Coastguard Worker    /* opsel is only GFX9+ */
521*61046927SAndroid Build Coastguard Worker    if (gfx_level < GFX9)
522*61046927SAndroid Build Coastguard Worker       return false;
523*61046927SAndroid Build Coastguard Worker 
524*61046927SAndroid Build Coastguard Worker    switch (op) {
525*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_div_fixup_f16:
526*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_fma_f16:
527*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_mad_f16:
528*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_mad_u16:
529*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_mad_i16:
530*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_med3_f16:
531*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_med3_i16:
532*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_med3_u16:
533*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_min3_f16:
534*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_min3_i16:
535*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_min3_u16:
536*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_max3_f16:
537*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_max3_i16:
538*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_max3_u16:
539*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_minmax_f16:
540*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_maxmin_f16:
541*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_max_u16_e64:
542*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_max_i16_e64:
543*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_min_u16_e64:
544*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_min_i16_e64:
545*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_add_i16:
546*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_sub_i16:
547*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_add_u16_e64:
548*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_sub_u16_e64:
549*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_lshlrev_b16_e64:
550*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_lshrrev_b16_e64:
551*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_ashrrev_i16_e64:
552*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_and_b16:
553*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_or_b16:
554*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_xor_b16:
555*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_mul_lo_u16_e64: return true;
556*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_pack_b32_f16:
557*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cvt_pknorm_i16_f16:
558*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cvt_pknorm_u16_f16: return idx != -1;
559*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_mad_u32_u16:
560*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_mad_i32_i16: return idx >= 0 && idx < 2;
561*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_dot2_f16_f16:
562*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_dot2_bf16_bf16: return idx == -1 || idx == 2;
563*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cndmask_b16: return idx != 2;
564*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_interp_p10_f16_f32_inreg:
565*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_interp_p10_rtz_f16_f32_inreg: return idx == 0 || idx == 2;
566*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_interp_p2_f16_f32_inreg:
567*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_interp_p2_rtz_f16_f32_inreg: return idx == -1 || idx == 0;
568*61046927SAndroid Build Coastguard Worker    default:
569*61046927SAndroid Build Coastguard Worker       return gfx_level >= GFX11 && (get_gfx11_true16_mask(op) & BITFIELD_BIT(idx == -1 ? 3 : idx));
570*61046927SAndroid Build Coastguard Worker    }
571*61046927SAndroid Build Coastguard Worker }
572*61046927SAndroid Build Coastguard Worker 
573*61046927SAndroid Build Coastguard Worker bool
can_write_m0(const aco_ptr<Instruction> & instr)574*61046927SAndroid Build Coastguard Worker can_write_m0(const aco_ptr<Instruction>& instr)
575*61046927SAndroid Build Coastguard Worker {
576*61046927SAndroid Build Coastguard Worker    if (instr->isSALU())
577*61046927SAndroid Build Coastguard Worker       return true;
578*61046927SAndroid Build Coastguard Worker 
579*61046927SAndroid Build Coastguard Worker    /* VALU can't write m0 on any GPU generations. */
580*61046927SAndroid Build Coastguard Worker    if (instr->isVALU())
581*61046927SAndroid Build Coastguard Worker       return false;
582*61046927SAndroid Build Coastguard Worker 
583*61046927SAndroid Build Coastguard Worker    switch (instr->opcode) {
584*61046927SAndroid Build Coastguard Worker    case aco_opcode::p_parallelcopy:
585*61046927SAndroid Build Coastguard Worker    case aco_opcode::p_extract:
586*61046927SAndroid Build Coastguard Worker    case aco_opcode::p_insert:
587*61046927SAndroid Build Coastguard Worker       /* These pseudo instructions are implemented with SALU when writing m0. */
588*61046927SAndroid Build Coastguard Worker       return true;
589*61046927SAndroid Build Coastguard Worker    default:
590*61046927SAndroid Build Coastguard Worker       /* Assume that no other instructions can write m0. */
591*61046927SAndroid Build Coastguard Worker       return false;
592*61046927SAndroid Build Coastguard Worker    }
593*61046927SAndroid Build Coastguard Worker }
594*61046927SAndroid Build Coastguard Worker 
595*61046927SAndroid Build Coastguard Worker bool
instr_is_16bit(amd_gfx_level gfx_level,aco_opcode op)596*61046927SAndroid Build Coastguard Worker instr_is_16bit(amd_gfx_level gfx_level, aco_opcode op)
597*61046927SAndroid Build Coastguard Worker {
598*61046927SAndroid Build Coastguard Worker    /* partial register writes are GFX9+, only */
599*61046927SAndroid Build Coastguard Worker    if (gfx_level < GFX9)
600*61046927SAndroid Build Coastguard Worker       return false;
601*61046927SAndroid Build Coastguard Worker 
602*61046927SAndroid Build Coastguard Worker    switch (op) {
603*61046927SAndroid Build Coastguard Worker    /* VOP3 */
604*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_mad_legacy_f16:
605*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_mad_legacy_u16:
606*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_mad_legacy_i16:
607*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_fma_legacy_f16:
608*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_div_fixup_legacy_f16: return false;
609*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_interp_p2_f16:
610*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_interp_p2_hi_f16:
611*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_fma_mixlo_f16:
612*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_fma_mixhi_f16:
613*61046927SAndroid Build Coastguard Worker    /* VOP2 */
614*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_mac_f16:
615*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_madak_f16:
616*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_madmk_f16: return gfx_level >= GFX9;
617*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_add_f16:
618*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_sub_f16:
619*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_subrev_f16:
620*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_mul_f16:
621*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_max_f16:
622*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_min_f16:
623*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_ldexp_f16:
624*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_fmac_f16:
625*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_fmamk_f16:
626*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_fmaak_f16:
627*61046927SAndroid Build Coastguard Worker    /* VOP1 */
628*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cvt_f16_f32:
629*61046927SAndroid Build Coastguard Worker    case aco_opcode::p_v_cvt_f16_f32_rtne:
630*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cvt_f16_u16:
631*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cvt_f16_i16:
632*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_rcp_f16:
633*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_sqrt_f16:
634*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_rsq_f16:
635*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_log_f16:
636*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_exp_f16:
637*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_frexp_mant_f16:
638*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_frexp_exp_i16_f16:
639*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_floor_f16:
640*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_ceil_f16:
641*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_trunc_f16:
642*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_rndne_f16:
643*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_fract_f16:
644*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_sin_f16:
645*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cos_f16:
646*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cvt_u16_f16:
647*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cvt_i16_f16:
648*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cvt_norm_i16_f16:
649*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cvt_norm_u16_f16: return gfx_level >= GFX10;
650*61046927SAndroid Build Coastguard Worker    /* all non legacy opsel instructions preserve the high bits */
651*61046927SAndroid Build Coastguard Worker    default: return can_use_opsel(gfx_level, op, -1);
652*61046927SAndroid Build Coastguard Worker    }
653*61046927SAndroid Build Coastguard Worker }
654*61046927SAndroid Build Coastguard Worker 
655*61046927SAndroid Build Coastguard Worker /* On GFX11, for some instructions, bit 7 of the destination/operand vgpr is opsel and the field
656*61046927SAndroid Build Coastguard Worker  * only supports v0-v127.
657*61046927SAndroid Build Coastguard Worker  * The first three bits are used for operands 0-2, and the 4th bit is used for the destination.
658*61046927SAndroid Build Coastguard Worker  */
659*61046927SAndroid Build Coastguard Worker uint8_t
get_gfx11_true16_mask(aco_opcode op)660*61046927SAndroid Build Coastguard Worker get_gfx11_true16_mask(aco_opcode op)
661*61046927SAndroid Build Coastguard Worker {
662*61046927SAndroid Build Coastguard Worker    switch (op) {
663*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_ceil_f16:
664*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cos_f16:
665*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cvt_f16_i16:
666*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cvt_f16_u16:
667*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cvt_i16_f16:
668*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cvt_u16_f16:
669*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cvt_norm_i16_f16:
670*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cvt_norm_u16_f16:
671*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_exp_f16:
672*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_floor_f16:
673*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_fract_f16:
674*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_frexp_exp_i16_f16:
675*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_frexp_mant_f16:
676*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_log_f16:
677*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_not_b16:
678*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_rcp_f16:
679*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_rndne_f16:
680*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_rsq_f16:
681*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_sin_f16:
682*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_sqrt_f16:
683*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_trunc_f16:
684*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_swap_b16:
685*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_mov_b16: return 0x1 | 0x8;
686*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_add_f16:
687*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_fmaak_f16:
688*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_fmac_f16:
689*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_fmamk_f16:
690*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_ldexp_f16:
691*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_max_f16:
692*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_min_f16:
693*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_mul_f16:
694*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_sub_f16:
695*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_subrev_f16:
696*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_and_b16:
697*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_or_b16:
698*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_xor_b16: return 0x3 | 0x8;
699*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cvt_f32_f16:
700*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cvt_i32_i16:
701*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cvt_u32_u16: return 0x1;
702*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cmp_class_f16:
703*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cmp_eq_f16:
704*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cmp_eq_i16:
705*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cmp_eq_u16:
706*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cmp_ge_f16:
707*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cmp_ge_i16:
708*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cmp_ge_u16:
709*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cmp_gt_f16:
710*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cmp_gt_i16:
711*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cmp_gt_u16:
712*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cmp_le_f16:
713*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cmp_le_i16:
714*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cmp_le_u16:
715*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cmp_lg_f16:
716*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cmp_lg_i16:
717*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cmp_lg_u16:
718*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cmp_lt_f16:
719*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cmp_lt_i16:
720*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cmp_lt_u16:
721*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cmp_neq_f16:
722*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cmp_nge_f16:
723*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cmp_ngt_f16:
724*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cmp_nle_f16:
725*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cmp_nlg_f16:
726*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cmp_nlt_f16:
727*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cmp_o_f16:
728*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cmp_u_f16:
729*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cmpx_class_f16:
730*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cmpx_eq_f16:
731*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cmpx_eq_i16:
732*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cmpx_eq_u16:
733*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cmpx_ge_f16:
734*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cmpx_ge_i16:
735*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cmpx_ge_u16:
736*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cmpx_gt_f16:
737*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cmpx_gt_i16:
738*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cmpx_gt_u16:
739*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cmpx_le_f16:
740*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cmpx_le_i16:
741*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cmpx_le_u16:
742*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cmpx_lg_f16:
743*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cmpx_lg_i16:
744*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cmpx_lg_u16:
745*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cmpx_lt_f16:
746*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cmpx_lt_i16:
747*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cmpx_lt_u16:
748*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cmpx_neq_f16:
749*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cmpx_nge_f16:
750*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cmpx_ngt_f16:
751*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cmpx_nle_f16:
752*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cmpx_nlg_f16:
753*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cmpx_nlt_f16:
754*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cmpx_o_f16:
755*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cmpx_u_f16: return 0x3;
756*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cvt_f16_f32:
757*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_sat_pk_u8_i16: return 0x8;
758*61046927SAndroid Build Coastguard Worker    default: return 0x0;
759*61046927SAndroid Build Coastguard Worker    }
760*61046927SAndroid Build Coastguard Worker }
761*61046927SAndroid Build Coastguard Worker 
762*61046927SAndroid Build Coastguard Worker uint32_t
get_reduction_identity(ReduceOp op,unsigned idx)763*61046927SAndroid Build Coastguard Worker get_reduction_identity(ReduceOp op, unsigned idx)
764*61046927SAndroid Build Coastguard Worker {
765*61046927SAndroid Build Coastguard Worker    switch (op) {
766*61046927SAndroid Build Coastguard Worker    case iadd8:
767*61046927SAndroid Build Coastguard Worker    case iadd16:
768*61046927SAndroid Build Coastguard Worker    case iadd32:
769*61046927SAndroid Build Coastguard Worker    case iadd64:
770*61046927SAndroid Build Coastguard Worker    case fadd16:
771*61046927SAndroid Build Coastguard Worker    case fadd32:
772*61046927SAndroid Build Coastguard Worker    case fadd64:
773*61046927SAndroid Build Coastguard Worker    case ior8:
774*61046927SAndroid Build Coastguard Worker    case ior16:
775*61046927SAndroid Build Coastguard Worker    case ior32:
776*61046927SAndroid Build Coastguard Worker    case ior64:
777*61046927SAndroid Build Coastguard Worker    case ixor8:
778*61046927SAndroid Build Coastguard Worker    case ixor16:
779*61046927SAndroid Build Coastguard Worker    case ixor32:
780*61046927SAndroid Build Coastguard Worker    case ixor64:
781*61046927SAndroid Build Coastguard Worker    case umax8:
782*61046927SAndroid Build Coastguard Worker    case umax16:
783*61046927SAndroid Build Coastguard Worker    case umax32:
784*61046927SAndroid Build Coastguard Worker    case umax64: return 0;
785*61046927SAndroid Build Coastguard Worker    case imul8:
786*61046927SAndroid Build Coastguard Worker    case imul16:
787*61046927SAndroid Build Coastguard Worker    case imul32:
788*61046927SAndroid Build Coastguard Worker    case imul64: return idx ? 0 : 1;
789*61046927SAndroid Build Coastguard Worker    case fmul16: return 0x3c00u;                /* 1.0 */
790*61046927SAndroid Build Coastguard Worker    case fmul32: return 0x3f800000u;            /* 1.0 */
791*61046927SAndroid Build Coastguard Worker    case fmul64: return idx ? 0x3ff00000u : 0u; /* 1.0 */
792*61046927SAndroid Build Coastguard Worker    case imin8: return INT8_MAX;
793*61046927SAndroid Build Coastguard Worker    case imin16: return INT16_MAX;
794*61046927SAndroid Build Coastguard Worker    case imin32: return INT32_MAX;
795*61046927SAndroid Build Coastguard Worker    case imin64: return idx ? 0x7fffffffu : 0xffffffffu;
796*61046927SAndroid Build Coastguard Worker    case imax8: return INT8_MIN;
797*61046927SAndroid Build Coastguard Worker    case imax16: return INT16_MIN;
798*61046927SAndroid Build Coastguard Worker    case imax32: return INT32_MIN;
799*61046927SAndroid Build Coastguard Worker    case imax64: return idx ? 0x80000000u : 0;
800*61046927SAndroid Build Coastguard Worker    case umin8:
801*61046927SAndroid Build Coastguard Worker    case umin16:
802*61046927SAndroid Build Coastguard Worker    case iand8:
803*61046927SAndroid Build Coastguard Worker    case iand16: return 0xffffffffu;
804*61046927SAndroid Build Coastguard Worker    case umin32:
805*61046927SAndroid Build Coastguard Worker    case umin64:
806*61046927SAndroid Build Coastguard Worker    case iand32:
807*61046927SAndroid Build Coastguard Worker    case iand64: return 0xffffffffu;
808*61046927SAndroid Build Coastguard Worker    case fmin16: return 0x7c00u;                /* infinity */
809*61046927SAndroid Build Coastguard Worker    case fmin32: return 0x7f800000u;            /* infinity */
810*61046927SAndroid Build Coastguard Worker    case fmin64: return idx ? 0x7ff00000u : 0u; /* infinity */
811*61046927SAndroid Build Coastguard Worker    case fmax16: return 0xfc00u;                /* negative infinity */
812*61046927SAndroid Build Coastguard Worker    case fmax32: return 0xff800000u;            /* negative infinity */
813*61046927SAndroid Build Coastguard Worker    case fmax64: return idx ? 0xfff00000u : 0u; /* negative infinity */
814*61046927SAndroid Build Coastguard Worker    default: unreachable("Invalid reduction operation"); break;
815*61046927SAndroid Build Coastguard Worker    }
816*61046927SAndroid Build Coastguard Worker    return 0;
817*61046927SAndroid Build Coastguard Worker }
818*61046927SAndroid Build Coastguard Worker 
819*61046927SAndroid Build Coastguard Worker unsigned
get_operand_size(aco_ptr<Instruction> & instr,unsigned index)820*61046927SAndroid Build Coastguard Worker get_operand_size(aco_ptr<Instruction>& instr, unsigned index)
821*61046927SAndroid Build Coastguard Worker {
822*61046927SAndroid Build Coastguard Worker    if (instr->isPseudo())
823*61046927SAndroid Build Coastguard Worker       return instr->operands[index].bytes() * 8u;
824*61046927SAndroid Build Coastguard Worker    else if (instr->opcode == aco_opcode::v_mad_u64_u32 ||
825*61046927SAndroid Build Coastguard Worker             instr->opcode == aco_opcode::v_mad_i64_i32)
826*61046927SAndroid Build Coastguard Worker       return index == 2 ? 64 : 32;
827*61046927SAndroid Build Coastguard Worker    else if (instr->opcode == aco_opcode::v_fma_mix_f32 ||
828*61046927SAndroid Build Coastguard Worker             instr->opcode == aco_opcode::v_fma_mixlo_f16 ||
829*61046927SAndroid Build Coastguard Worker             instr->opcode == aco_opcode::v_fma_mixhi_f16)
830*61046927SAndroid Build Coastguard Worker       return instr->valu().opsel_hi[index] ? 16 : 32;
831*61046927SAndroid Build Coastguard Worker    else if (instr->opcode == aco_opcode::v_interp_p10_f16_f32_inreg ||
832*61046927SAndroid Build Coastguard Worker             instr->opcode == aco_opcode::v_interp_p10_rtz_f16_f32_inreg)
833*61046927SAndroid Build Coastguard Worker       return index == 1 ? 32 : 16;
834*61046927SAndroid Build Coastguard Worker    else if (instr->opcode == aco_opcode::v_interp_p2_f16_f32_inreg ||
835*61046927SAndroid Build Coastguard Worker             instr->opcode == aco_opcode::v_interp_p2_rtz_f16_f32_inreg)
836*61046927SAndroid Build Coastguard Worker       return index == 0 ? 16 : 32;
837*61046927SAndroid Build Coastguard Worker    else if (instr->isVALU() || instr->isSALU())
838*61046927SAndroid Build Coastguard Worker       return instr_info.operand_size[(int)instr->opcode];
839*61046927SAndroid Build Coastguard Worker    else
840*61046927SAndroid Build Coastguard Worker       return 0;
841*61046927SAndroid Build Coastguard Worker }
842*61046927SAndroid Build Coastguard Worker 
843*61046927SAndroid Build Coastguard Worker bool
needs_exec_mask(const Instruction * instr)844*61046927SAndroid Build Coastguard Worker needs_exec_mask(const Instruction* instr)
845*61046927SAndroid Build Coastguard Worker {
846*61046927SAndroid Build Coastguard Worker    if (instr->isVALU()) {
847*61046927SAndroid Build Coastguard Worker       return instr->opcode != aco_opcode::v_readlane_b32 &&
848*61046927SAndroid Build Coastguard Worker              instr->opcode != aco_opcode::v_readlane_b32_e64 &&
849*61046927SAndroid Build Coastguard Worker              instr->opcode != aco_opcode::v_writelane_b32 &&
850*61046927SAndroid Build Coastguard Worker              instr->opcode != aco_opcode::v_writelane_b32_e64;
851*61046927SAndroid Build Coastguard Worker    }
852*61046927SAndroid Build Coastguard Worker 
853*61046927SAndroid Build Coastguard Worker    if (instr->isVMEM() || instr->isFlatLike())
854*61046927SAndroid Build Coastguard Worker       return true;
855*61046927SAndroid Build Coastguard Worker 
856*61046927SAndroid Build Coastguard Worker    if (instr->isSALU() || instr->isBranch() || instr->isSMEM() || instr->isBarrier())
857*61046927SAndroid Build Coastguard Worker       return instr->reads_exec();
858*61046927SAndroid Build Coastguard Worker 
859*61046927SAndroid Build Coastguard Worker    if (instr->isPseudo()) {
860*61046927SAndroid Build Coastguard Worker       switch (instr->opcode) {
861*61046927SAndroid Build Coastguard Worker       case aco_opcode::p_create_vector:
862*61046927SAndroid Build Coastguard Worker       case aco_opcode::p_extract_vector:
863*61046927SAndroid Build Coastguard Worker       case aco_opcode::p_split_vector:
864*61046927SAndroid Build Coastguard Worker       case aco_opcode::p_phi:
865*61046927SAndroid Build Coastguard Worker       case aco_opcode::p_parallelcopy:
866*61046927SAndroid Build Coastguard Worker          for (Definition def : instr->definitions) {
867*61046927SAndroid Build Coastguard Worker             if (def.getTemp().type() == RegType::vgpr)
868*61046927SAndroid Build Coastguard Worker                return true;
869*61046927SAndroid Build Coastguard Worker          }
870*61046927SAndroid Build Coastguard Worker          return instr->reads_exec();
871*61046927SAndroid Build Coastguard Worker       case aco_opcode::p_spill:
872*61046927SAndroid Build Coastguard Worker       case aco_opcode::p_reload:
873*61046927SAndroid Build Coastguard Worker       case aco_opcode::p_end_linear_vgpr:
874*61046927SAndroid Build Coastguard Worker       case aco_opcode::p_logical_start:
875*61046927SAndroid Build Coastguard Worker       case aco_opcode::p_logical_end:
876*61046927SAndroid Build Coastguard Worker       case aco_opcode::p_startpgm:
877*61046927SAndroid Build Coastguard Worker       case aco_opcode::p_end_wqm:
878*61046927SAndroid Build Coastguard Worker       case aco_opcode::p_init_scratch: return instr->reads_exec();
879*61046927SAndroid Build Coastguard Worker       case aco_opcode::p_start_linear_vgpr: return instr->operands.size();
880*61046927SAndroid Build Coastguard Worker       default: break;
881*61046927SAndroid Build Coastguard Worker       }
882*61046927SAndroid Build Coastguard Worker    }
883*61046927SAndroid Build Coastguard Worker 
884*61046927SAndroid Build Coastguard Worker    return true;
885*61046927SAndroid Build Coastguard Worker }
886*61046927SAndroid Build Coastguard Worker 
887*61046927SAndroid Build Coastguard Worker struct CmpInfo {
888*61046927SAndroid Build Coastguard Worker    aco_opcode swapped;
889*61046927SAndroid Build Coastguard Worker    aco_opcode inverse;
890*61046927SAndroid Build Coastguard Worker    aco_opcode vcmpx;
891*61046927SAndroid Build Coastguard Worker };
892*61046927SAndroid Build Coastguard Worker 
893*61046927SAndroid Build Coastguard Worker static ALWAYS_INLINE bool
get_cmp_info(aco_opcode op,CmpInfo * info)894*61046927SAndroid Build Coastguard Worker get_cmp_info(aco_opcode op, CmpInfo* info)
895*61046927SAndroid Build Coastguard Worker {
896*61046927SAndroid Build Coastguard Worker    info->swapped = aco_opcode::num_opcodes;
897*61046927SAndroid Build Coastguard Worker    info->inverse = aco_opcode::num_opcodes;
898*61046927SAndroid Build Coastguard Worker    info->vcmpx = aco_opcode::num_opcodes;
899*61046927SAndroid Build Coastguard Worker    switch (op) {
900*61046927SAndroid Build Coastguard Worker       // clang-format off
901*61046927SAndroid Build Coastguard Worker #define CMP2(ord, unord, ord_swap, unord_swap, sz)                                                 \
902*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cmp_##ord##_f##sz:                                                           \
903*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cmp_n##unord##_f##sz:                                                        \
904*61046927SAndroid Build Coastguard Worker       info->swapped = op == aco_opcode::v_cmp_##ord##_f##sz ? aco_opcode::v_cmp_##ord_swap##_f##sz \
905*61046927SAndroid Build Coastguard Worker                                                       : aco_opcode::v_cmp_n##unord_swap##_f##sz;   \
906*61046927SAndroid Build Coastguard Worker       info->inverse = op == aco_opcode::v_cmp_n##unord##_f##sz ? aco_opcode::v_cmp_##unord##_f##sz \
907*61046927SAndroid Build Coastguard Worker                                                                : aco_opcode::v_cmp_n##ord##_f##sz; \
908*61046927SAndroid Build Coastguard Worker       info->vcmpx = op == aco_opcode::v_cmp_##ord##_f##sz ? aco_opcode::v_cmpx_##ord##_f##sz       \
909*61046927SAndroid Build Coastguard Worker                                                           : aco_opcode::v_cmpx_n##unord##_f##sz;   \
910*61046927SAndroid Build Coastguard Worker       return true;
911*61046927SAndroid Build Coastguard Worker #define CMP(ord, unord, ord_swap, unord_swap)                                                      \
912*61046927SAndroid Build Coastguard Worker    CMP2(ord, unord, ord_swap, unord_swap, 16)                                                      \
913*61046927SAndroid Build Coastguard Worker    CMP2(ord, unord, ord_swap, unord_swap, 32)                                                      \
914*61046927SAndroid Build Coastguard Worker    CMP2(ord, unord, ord_swap, unord_swap, 64)
915*61046927SAndroid Build Coastguard Worker       CMP(lt, /*n*/ge, gt, /*n*/le)
916*61046927SAndroid Build Coastguard Worker       CMP(eq, /*n*/lg, eq, /*n*/lg)
917*61046927SAndroid Build Coastguard Worker       CMP(le, /*n*/gt, ge, /*n*/lt)
918*61046927SAndroid Build Coastguard Worker       CMP(gt, /*n*/le, lt, /*n*/ge)
919*61046927SAndroid Build Coastguard Worker       CMP(lg, /*n*/eq, lg, /*n*/eq)
920*61046927SAndroid Build Coastguard Worker       CMP(ge, /*n*/lt, le, /*n*/gt)
921*61046927SAndroid Build Coastguard Worker #undef CMP
922*61046927SAndroid Build Coastguard Worker #undef CMP2
923*61046927SAndroid Build Coastguard Worker #define ORD_TEST(sz)                                                                               \
924*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cmp_u_f##sz:                                                                 \
925*61046927SAndroid Build Coastguard Worker       info->swapped = aco_opcode::v_cmp_u_f##sz;                                                   \
926*61046927SAndroid Build Coastguard Worker       info->inverse = aco_opcode::v_cmp_o_f##sz;                                                   \
927*61046927SAndroid Build Coastguard Worker       info->vcmpx = aco_opcode::v_cmpx_u_f##sz;                                                    \
928*61046927SAndroid Build Coastguard Worker       return true;                                                                                 \
929*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cmp_o_f##sz:                                                                 \
930*61046927SAndroid Build Coastguard Worker       info->swapped = aco_opcode::v_cmp_o_f##sz;                                                   \
931*61046927SAndroid Build Coastguard Worker       info->inverse = aco_opcode::v_cmp_u_f##sz;                                                   \
932*61046927SAndroid Build Coastguard Worker       info->vcmpx = aco_opcode::v_cmpx_o_f##sz;                                                    \
933*61046927SAndroid Build Coastguard Worker       return true;
934*61046927SAndroid Build Coastguard Worker       ORD_TEST(16)
935*61046927SAndroid Build Coastguard Worker       ORD_TEST(32)
936*61046927SAndroid Build Coastguard Worker       ORD_TEST(64)
937*61046927SAndroid Build Coastguard Worker #undef ORD_TEST
938*61046927SAndroid Build Coastguard Worker #define CMPI2(op, swap, inv, type, sz)                                                             \
939*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cmp_##op##_##type##sz:                                                       \
940*61046927SAndroid Build Coastguard Worker       info->swapped = aco_opcode::v_cmp_##swap##_##type##sz;                                       \
941*61046927SAndroid Build Coastguard Worker       info->inverse = aco_opcode::v_cmp_##inv##_##type##sz;                                        \
942*61046927SAndroid Build Coastguard Worker       info->vcmpx = aco_opcode::v_cmpx_##op##_##type##sz;                                          \
943*61046927SAndroid Build Coastguard Worker       return true;
944*61046927SAndroid Build Coastguard Worker #define CMPI(op, swap, inv)                                                                        \
945*61046927SAndroid Build Coastguard Worker    CMPI2(op, swap, inv, i, 16)                                                                     \
946*61046927SAndroid Build Coastguard Worker    CMPI2(op, swap, inv, u, 16)                                                                     \
947*61046927SAndroid Build Coastguard Worker    CMPI2(op, swap, inv, i, 32)                                                                     \
948*61046927SAndroid Build Coastguard Worker    CMPI2(op, swap, inv, u, 32)                                                                     \
949*61046927SAndroid Build Coastguard Worker    CMPI2(op, swap, inv, i, 64)                                                                     \
950*61046927SAndroid Build Coastguard Worker    CMPI2(op, swap, inv, u, 64)
951*61046927SAndroid Build Coastguard Worker       CMPI(lt, gt, ge)
952*61046927SAndroid Build Coastguard Worker       CMPI(eq, eq, lg)
953*61046927SAndroid Build Coastguard Worker       CMPI(le, ge, gt)
954*61046927SAndroid Build Coastguard Worker       CMPI(gt, lt, le)
955*61046927SAndroid Build Coastguard Worker       CMPI(lg, lg, eq)
956*61046927SAndroid Build Coastguard Worker       CMPI(ge, le, lt)
957*61046927SAndroid Build Coastguard Worker #undef CMPI
958*61046927SAndroid Build Coastguard Worker #undef CMPI2
959*61046927SAndroid Build Coastguard Worker #define CMPCLASS(sz)                                                                               \
960*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_cmp_class_f##sz:                                                             \
961*61046927SAndroid Build Coastguard Worker       info->vcmpx = aco_opcode::v_cmpx_class_f##sz;                                                \
962*61046927SAndroid Build Coastguard Worker       return true;
963*61046927SAndroid Build Coastguard Worker       CMPCLASS(16)
964*61046927SAndroid Build Coastguard Worker       CMPCLASS(32)
965*61046927SAndroid Build Coastguard Worker       CMPCLASS(64)
966*61046927SAndroid Build Coastguard Worker #undef CMPCLASS
967*61046927SAndroid Build Coastguard Worker       // clang-format on
968*61046927SAndroid Build Coastguard Worker    default: return false;
969*61046927SAndroid Build Coastguard Worker    }
970*61046927SAndroid Build Coastguard Worker }
971*61046927SAndroid Build Coastguard Worker 
972*61046927SAndroid Build Coastguard Worker aco_opcode
get_vcmp_inverse(aco_opcode op)973*61046927SAndroid Build Coastguard Worker get_vcmp_inverse(aco_opcode op)
974*61046927SAndroid Build Coastguard Worker {
975*61046927SAndroid Build Coastguard Worker    CmpInfo info;
976*61046927SAndroid Build Coastguard Worker    return get_cmp_info(op, &info) ? info.inverse : aco_opcode::num_opcodes;
977*61046927SAndroid Build Coastguard Worker }
978*61046927SAndroid Build Coastguard Worker 
979*61046927SAndroid Build Coastguard Worker aco_opcode
get_vcmp_swapped(aco_opcode op)980*61046927SAndroid Build Coastguard Worker get_vcmp_swapped(aco_opcode op)
981*61046927SAndroid Build Coastguard Worker {
982*61046927SAndroid Build Coastguard Worker    CmpInfo info;
983*61046927SAndroid Build Coastguard Worker    return get_cmp_info(op, &info) ? info.swapped : aco_opcode::num_opcodes;
984*61046927SAndroid Build Coastguard Worker }
985*61046927SAndroid Build Coastguard Worker 
986*61046927SAndroid Build Coastguard Worker aco_opcode
get_vcmpx(aco_opcode op)987*61046927SAndroid Build Coastguard Worker get_vcmpx(aco_opcode op)
988*61046927SAndroid Build Coastguard Worker {
989*61046927SAndroid Build Coastguard Worker    CmpInfo info;
990*61046927SAndroid Build Coastguard Worker    return get_cmp_info(op, &info) ? info.vcmpx : aco_opcode::num_opcodes;
991*61046927SAndroid Build Coastguard Worker }
992*61046927SAndroid Build Coastguard Worker 
993*61046927SAndroid Build Coastguard Worker bool
is_cmpx(aco_opcode op)994*61046927SAndroid Build Coastguard Worker is_cmpx(aco_opcode op)
995*61046927SAndroid Build Coastguard Worker {
996*61046927SAndroid Build Coastguard Worker    CmpInfo info;
997*61046927SAndroid Build Coastguard Worker    return !get_cmp_info(op, &info);
998*61046927SAndroid Build Coastguard Worker }
999*61046927SAndroid Build Coastguard Worker 
1000*61046927SAndroid Build Coastguard Worker bool
can_swap_operands(aco_ptr<Instruction> & instr,aco_opcode * new_op,unsigned idx0,unsigned idx1)1001*61046927SAndroid Build Coastguard Worker can_swap_operands(aco_ptr<Instruction>& instr, aco_opcode* new_op, unsigned idx0, unsigned idx1)
1002*61046927SAndroid Build Coastguard Worker {
1003*61046927SAndroid Build Coastguard Worker    if (idx0 == idx1) {
1004*61046927SAndroid Build Coastguard Worker       *new_op = instr->opcode;
1005*61046927SAndroid Build Coastguard Worker       return true;
1006*61046927SAndroid Build Coastguard Worker    }
1007*61046927SAndroid Build Coastguard Worker 
1008*61046927SAndroid Build Coastguard Worker    if (idx0 > idx1)
1009*61046927SAndroid Build Coastguard Worker       std::swap(idx0, idx1);
1010*61046927SAndroid Build Coastguard Worker 
1011*61046927SAndroid Build Coastguard Worker    if (instr->isDPP())
1012*61046927SAndroid Build Coastguard Worker       return false;
1013*61046927SAndroid Build Coastguard Worker 
1014*61046927SAndroid Build Coastguard Worker    if (!instr->isVOP3() && !instr->isVOP3P() && !instr->operands[0].isOfType(RegType::vgpr))
1015*61046927SAndroid Build Coastguard Worker       return false;
1016*61046927SAndroid Build Coastguard Worker 
1017*61046927SAndroid Build Coastguard Worker    if (instr->isVOPC()) {
1018*61046927SAndroid Build Coastguard Worker       CmpInfo info;
1019*61046927SAndroid Build Coastguard Worker       if (get_cmp_info(instr->opcode, &info) && info.swapped != aco_opcode::num_opcodes) {
1020*61046927SAndroid Build Coastguard Worker          *new_op = info.swapped;
1021*61046927SAndroid Build Coastguard Worker          return true;
1022*61046927SAndroid Build Coastguard Worker       }
1023*61046927SAndroid Build Coastguard Worker    }
1024*61046927SAndroid Build Coastguard Worker 
1025*61046927SAndroid Build Coastguard Worker    /* opcodes not relevant for DPP or SGPRs optimizations are not included. */
1026*61046927SAndroid Build Coastguard Worker    switch (instr->opcode) {
1027*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_med3_f32: return false; /* order matters for clamp+GFX8+denorm ftz. */
1028*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_add_u32:
1029*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_add_co_u32:
1030*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_add_co_u32_e64:
1031*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_add_i32:
1032*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_add_i16:
1033*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_add_u16_e64:
1034*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_add3_u32:
1035*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_add_f16:
1036*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_add_f32:
1037*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_mul_i32_i24:
1038*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_mul_hi_i32_i24:
1039*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_mul_u32_u24:
1040*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_mul_hi_u32_u24:
1041*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_mul_lo_u16:
1042*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_mul_lo_u16_e64:
1043*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_mul_f16:
1044*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_mul_f32:
1045*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_mul_legacy_f32:
1046*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_or_b32:
1047*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_and_b32:
1048*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_xor_b32:
1049*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_xnor_b32:
1050*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_xor3_b32:
1051*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_or3_b32:
1052*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_and_b16:
1053*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_or_b16:
1054*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_xor_b16:
1055*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_max3_f32:
1056*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_min3_f32:
1057*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_max3_f16:
1058*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_min3_f16:
1059*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_med3_f16:
1060*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_max3_u32:
1061*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_min3_u32:
1062*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_med3_u32:
1063*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_max3_i32:
1064*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_min3_i32:
1065*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_med3_i32:
1066*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_max3_u16:
1067*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_min3_u16:
1068*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_med3_u16:
1069*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_max3_i16:
1070*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_min3_i16:
1071*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_med3_i16:
1072*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_max_f16:
1073*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_max_f32:
1074*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_min_f16:
1075*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_min_f32:
1076*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_max_i32:
1077*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_min_i32:
1078*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_max_u32:
1079*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_min_u32:
1080*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_max_i16:
1081*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_min_i16:
1082*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_max_u16:
1083*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_min_u16:
1084*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_max_i16_e64:
1085*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_min_i16_e64:
1086*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_max_u16_e64:
1087*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_min_u16_e64: *new_op = instr->opcode; return true;
1088*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_sub_f16: *new_op = aco_opcode::v_subrev_f16; return true;
1089*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_sub_f32: *new_op = aco_opcode::v_subrev_f32; return true;
1090*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_sub_co_u32: *new_op = aco_opcode::v_subrev_co_u32; return true;
1091*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_sub_u16: *new_op = aco_opcode::v_subrev_u16; return true;
1092*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_sub_u32: *new_op = aco_opcode::v_subrev_u32; return true;
1093*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_sub_co_u32_e64: *new_op = aco_opcode::v_subrev_co_u32_e64; return true;
1094*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_subrev_f16: *new_op = aco_opcode::v_sub_f16; return true;
1095*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_subrev_f32: *new_op = aco_opcode::v_sub_f32; return true;
1096*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_subrev_co_u32: *new_op = aco_opcode::v_sub_co_u32; return true;
1097*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_subrev_u16: *new_op = aco_opcode::v_sub_u16; return true;
1098*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_subrev_u32: *new_op = aco_opcode::v_sub_u32; return true;
1099*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_subrev_co_u32_e64: *new_op = aco_opcode::v_sub_co_u32_e64; return true;
1100*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_addc_co_u32:
1101*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_mad_i32_i24:
1102*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_mad_u32_u24:
1103*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_lerp_u8:
1104*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_sad_u8:
1105*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_sad_hi_u8:
1106*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_sad_u16:
1107*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_sad_u32:
1108*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_xad_u32:
1109*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_add_lshl_u32:
1110*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_and_or_b32:
1111*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_mad_u16:
1112*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_mad_i16:
1113*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_mad_u32_u16:
1114*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_mad_i32_i16:
1115*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_maxmin_f32:
1116*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_minmax_f32:
1117*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_maxmin_f16:
1118*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_minmax_f16:
1119*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_maxmin_u32:
1120*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_minmax_u32:
1121*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_maxmin_i32:
1122*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_minmax_i32:
1123*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_fma_f32:
1124*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_fma_legacy_f32:
1125*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_fmac_f32:
1126*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_fmac_legacy_f32:
1127*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_mac_f32:
1128*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_mac_legacy_f32:
1129*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_fma_f16:
1130*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_fmac_f16:
1131*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_mac_f16:
1132*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_dot4c_i32_i8:
1133*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_dot2c_f32_f16:
1134*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_dot2_f32_f16:
1135*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_dot2_f32_bf16:
1136*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_dot2_f16_f16:
1137*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_dot2_bf16_bf16:
1138*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_fma_mix_f32:
1139*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_fma_mixlo_f16:
1140*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_fma_mixhi_f16:
1141*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_pk_fmac_f16: {
1142*61046927SAndroid Build Coastguard Worker       if (idx1 == 2)
1143*61046927SAndroid Build Coastguard Worker          return false;
1144*61046927SAndroid Build Coastguard Worker       *new_op = instr->opcode;
1145*61046927SAndroid Build Coastguard Worker       return true;
1146*61046927SAndroid Build Coastguard Worker    }
1147*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_subb_co_u32: {
1148*61046927SAndroid Build Coastguard Worker       if (idx1 == 2)
1149*61046927SAndroid Build Coastguard Worker          return false;
1150*61046927SAndroid Build Coastguard Worker       *new_op = aco_opcode::v_subbrev_co_u32;
1151*61046927SAndroid Build Coastguard Worker       return true;
1152*61046927SAndroid Build Coastguard Worker    }
1153*61046927SAndroid Build Coastguard Worker    case aco_opcode::v_subbrev_co_u32: {
1154*61046927SAndroid Build Coastguard Worker       if (idx1 == 2)
1155*61046927SAndroid Build Coastguard Worker          return false;
1156*61046927SAndroid Build Coastguard Worker       *new_op = aco_opcode::v_subb_co_u32;
1157*61046927SAndroid Build Coastguard Worker       return true;
1158*61046927SAndroid Build Coastguard Worker    }
1159*61046927SAndroid Build Coastguard Worker    default: return false;
1160*61046927SAndroid Build Coastguard Worker    }
1161*61046927SAndroid Build Coastguard Worker }
1162*61046927SAndroid Build Coastguard Worker 
wait_imm()1163*61046927SAndroid Build Coastguard Worker wait_imm::wait_imm()
1164*61046927SAndroid Build Coastguard Worker     : exp(unset_counter), lgkm(unset_counter), vm(unset_counter), vs(unset_counter),
1165*61046927SAndroid Build Coastguard Worker       sample(unset_counter), bvh(unset_counter), km(unset_counter)
1166*61046927SAndroid Build Coastguard Worker {}
wait_imm(uint16_t vm_,uint16_t exp_,uint16_t lgkm_,uint16_t vs_)1167*61046927SAndroid Build Coastguard Worker wait_imm::wait_imm(uint16_t vm_, uint16_t exp_, uint16_t lgkm_, uint16_t vs_)
1168*61046927SAndroid Build Coastguard Worker     : exp(exp_), lgkm(lgkm_), vm(vm_), vs(vs_), sample(unset_counter), bvh(unset_counter),
1169*61046927SAndroid Build Coastguard Worker       km(unset_counter)
1170*61046927SAndroid Build Coastguard Worker {}
1171*61046927SAndroid Build Coastguard Worker 
1172*61046927SAndroid Build Coastguard Worker uint16_t
pack(enum amd_gfx_level gfx_level) const1173*61046927SAndroid Build Coastguard Worker wait_imm::pack(enum amd_gfx_level gfx_level) const
1174*61046927SAndroid Build Coastguard Worker {
1175*61046927SAndroid Build Coastguard Worker    uint16_t imm = 0;
1176*61046927SAndroid Build Coastguard Worker    assert(exp == unset_counter || exp <= 0x7);
1177*61046927SAndroid Build Coastguard Worker    if (gfx_level >= GFX11) {
1178*61046927SAndroid Build Coastguard Worker       assert(lgkm == unset_counter || lgkm <= 0x3f);
1179*61046927SAndroid Build Coastguard Worker       assert(vm == unset_counter || vm <= 0x3f);
1180*61046927SAndroid Build Coastguard Worker       imm = ((vm & 0x3f) << 10) | ((lgkm & 0x3f) << 4) | (exp & 0x7);
1181*61046927SAndroid Build Coastguard Worker    } else if (gfx_level >= GFX10) {
1182*61046927SAndroid Build Coastguard Worker       assert(lgkm == unset_counter || lgkm <= 0x3f);
1183*61046927SAndroid Build Coastguard Worker       assert(vm == unset_counter || vm <= 0x3f);
1184*61046927SAndroid Build Coastguard Worker       imm = ((vm & 0x30) << 10) | ((lgkm & 0x3f) << 8) | ((exp & 0x7) << 4) | (vm & 0xf);
1185*61046927SAndroid Build Coastguard Worker    } else if (gfx_level >= GFX9) {
1186*61046927SAndroid Build Coastguard Worker       assert(lgkm == unset_counter || lgkm <= 0xf);
1187*61046927SAndroid Build Coastguard Worker       assert(vm == unset_counter || vm <= 0x3f);
1188*61046927SAndroid Build Coastguard Worker       imm = ((vm & 0x30) << 10) | ((lgkm & 0xf) << 8) | ((exp & 0x7) << 4) | (vm & 0xf);
1189*61046927SAndroid Build Coastguard Worker    } else {
1190*61046927SAndroid Build Coastguard Worker       assert(lgkm == unset_counter || lgkm <= 0xf);
1191*61046927SAndroid Build Coastguard Worker       assert(vm == unset_counter || vm <= 0xf);
1192*61046927SAndroid Build Coastguard Worker       imm = ((lgkm & 0xf) << 8) | ((exp & 0x7) << 4) | (vm & 0xf);
1193*61046927SAndroid Build Coastguard Worker    }
1194*61046927SAndroid Build Coastguard Worker    if (gfx_level < GFX9 && vm == wait_imm::unset_counter)
1195*61046927SAndroid Build Coastguard Worker       imm |= 0xc000; /* should have no effect on pre-GFX9 and now we won't have to worry about the
1196*61046927SAndroid Build Coastguard Worker                         architecture when interpreting the immediate */
1197*61046927SAndroid Build Coastguard Worker    if (gfx_level < GFX10 && lgkm == wait_imm::unset_counter)
1198*61046927SAndroid Build Coastguard Worker       imm |= 0x3000; /* should have no effect on pre-GFX10 and now we won't have to worry about the
1199*61046927SAndroid Build Coastguard Worker                         architecture when interpreting the immediate */
1200*61046927SAndroid Build Coastguard Worker    return imm;
1201*61046927SAndroid Build Coastguard Worker }
1202*61046927SAndroid Build Coastguard Worker 
1203*61046927SAndroid Build Coastguard Worker wait_imm
max(enum amd_gfx_level gfx_level)1204*61046927SAndroid Build Coastguard Worker wait_imm::max(enum amd_gfx_level gfx_level)
1205*61046927SAndroid Build Coastguard Worker {
1206*61046927SAndroid Build Coastguard Worker    wait_imm imm;
1207*61046927SAndroid Build Coastguard Worker    imm.vm = gfx_level >= GFX9 ? 63 : 15;
1208*61046927SAndroid Build Coastguard Worker    imm.exp = 7;
1209*61046927SAndroid Build Coastguard Worker    imm.lgkm = gfx_level >= GFX10 ? 63 : 15;
1210*61046927SAndroid Build Coastguard Worker    imm.vs = gfx_level >= GFX10 ? 63 : 0;
1211*61046927SAndroid Build Coastguard Worker    imm.sample = gfx_level >= GFX12 ? 63 : 0;
1212*61046927SAndroid Build Coastguard Worker    imm.bvh = gfx_level >= GFX12 ? 7 : 0;
1213*61046927SAndroid Build Coastguard Worker    imm.km = gfx_level >= GFX12 ? 31 : 0;
1214*61046927SAndroid Build Coastguard Worker    return imm;
1215*61046927SAndroid Build Coastguard Worker }
1216*61046927SAndroid Build Coastguard Worker 
1217*61046927SAndroid Build Coastguard Worker bool
unpack(enum amd_gfx_level gfx_level,const Instruction * instr)1218*61046927SAndroid Build Coastguard Worker wait_imm::unpack(enum amd_gfx_level gfx_level, const Instruction* instr)
1219*61046927SAndroid Build Coastguard Worker {
1220*61046927SAndroid Build Coastguard Worker    if (!instr->isSALU() || (!instr->operands.empty() && instr->operands[0].physReg() != sgpr_null))
1221*61046927SAndroid Build Coastguard Worker       return false;
1222*61046927SAndroid Build Coastguard Worker 
1223*61046927SAndroid Build Coastguard Worker    aco_opcode op = instr->opcode;
1224*61046927SAndroid Build Coastguard Worker    uint16_t packed = instr->salu().imm;
1225*61046927SAndroid Build Coastguard Worker 
1226*61046927SAndroid Build Coastguard Worker    if (op == aco_opcode::s_wait_loadcnt) {
1227*61046927SAndroid Build Coastguard Worker       vm = std::min<uint8_t>(vm, packed);
1228*61046927SAndroid Build Coastguard Worker    } else if (op == aco_opcode::s_wait_storecnt) {
1229*61046927SAndroid Build Coastguard Worker       vs = std::min<uint8_t>(vs, packed);
1230*61046927SAndroid Build Coastguard Worker    } else if (op == aco_opcode::s_wait_samplecnt) {
1231*61046927SAndroid Build Coastguard Worker       sample = std::min<uint8_t>(sample, packed);
1232*61046927SAndroid Build Coastguard Worker    } else if (op == aco_opcode::s_wait_bvhcnt) {
1233*61046927SAndroid Build Coastguard Worker       bvh = std::min<uint8_t>(bvh, packed);
1234*61046927SAndroid Build Coastguard Worker    } else if (op == aco_opcode::s_wait_expcnt) {
1235*61046927SAndroid Build Coastguard Worker       exp = std::min<uint8_t>(exp, packed);
1236*61046927SAndroid Build Coastguard Worker    } else if (op == aco_opcode::s_wait_dscnt) {
1237*61046927SAndroid Build Coastguard Worker       lgkm = std::min<uint8_t>(lgkm, packed);
1238*61046927SAndroid Build Coastguard Worker    } else if (op == aco_opcode::s_wait_kmcnt) {
1239*61046927SAndroid Build Coastguard Worker       km = std::min<uint8_t>(km, packed);
1240*61046927SAndroid Build Coastguard Worker    } else if (op == aco_opcode::s_wait_loadcnt_dscnt) {
1241*61046927SAndroid Build Coastguard Worker       uint32_t vm2 = (packed >> 8) & 0x3f;
1242*61046927SAndroid Build Coastguard Worker       uint32_t ds = packed & 0x3f;
1243*61046927SAndroid Build Coastguard Worker       vm = std::min<uint8_t>(vm, vm2 == 0x3f ? wait_imm::unset_counter : vm2);
1244*61046927SAndroid Build Coastguard Worker       lgkm = std::min<uint8_t>(lgkm, ds == 0x3f ? wait_imm::unset_counter : ds);
1245*61046927SAndroid Build Coastguard Worker    } else if (op == aco_opcode::s_wait_storecnt_dscnt) {
1246*61046927SAndroid Build Coastguard Worker       uint32_t vs2 = (packed >> 8) & 0x3f;
1247*61046927SAndroid Build Coastguard Worker       uint32_t ds = packed & 0x3f;
1248*61046927SAndroid Build Coastguard Worker       vs = std::min<uint8_t>(vs, vs2 == 0x3f ? wait_imm::unset_counter : vs2);
1249*61046927SAndroid Build Coastguard Worker       lgkm = std::min<uint8_t>(lgkm, ds == 0x3f ? wait_imm::unset_counter : ds);
1250*61046927SAndroid Build Coastguard Worker    } else if (op == aco_opcode::s_waitcnt_expcnt) {
1251*61046927SAndroid Build Coastguard Worker       exp = std::min<uint8_t>(exp, packed);
1252*61046927SAndroid Build Coastguard Worker    } else if (op == aco_opcode::s_waitcnt_lgkmcnt) {
1253*61046927SAndroid Build Coastguard Worker       lgkm = std::min<uint8_t>(lgkm, packed);
1254*61046927SAndroid Build Coastguard Worker    } else if (op == aco_opcode::s_waitcnt_vmcnt) {
1255*61046927SAndroid Build Coastguard Worker       vm = std::min<uint8_t>(vm, packed);
1256*61046927SAndroid Build Coastguard Worker    } else if (op == aco_opcode::s_waitcnt_vscnt) {
1257*61046927SAndroid Build Coastguard Worker       vs = std::min<uint8_t>(vs, packed);
1258*61046927SAndroid Build Coastguard Worker    } else if (op == aco_opcode::s_waitcnt) {
1259*61046927SAndroid Build Coastguard Worker       uint8_t vm2, lgkm2, exp2;
1260*61046927SAndroid Build Coastguard Worker       if (gfx_level >= GFX11) {
1261*61046927SAndroid Build Coastguard Worker          vm2 = (packed >> 10) & 0x3f;
1262*61046927SAndroid Build Coastguard Worker          lgkm2 = (packed >> 4) & 0x3f;
1263*61046927SAndroid Build Coastguard Worker          exp2 = packed & 0x7;
1264*61046927SAndroid Build Coastguard Worker       } else {
1265*61046927SAndroid Build Coastguard Worker          vm2 = packed & 0xf;
1266*61046927SAndroid Build Coastguard Worker          if (gfx_level >= GFX9)
1267*61046927SAndroid Build Coastguard Worker             vm2 |= (packed >> 10) & 0x30;
1268*61046927SAndroid Build Coastguard Worker 
1269*61046927SAndroid Build Coastguard Worker          exp2 = (packed >> 4) & 0x7;
1270*61046927SAndroid Build Coastguard Worker 
1271*61046927SAndroid Build Coastguard Worker          lgkm2 = (packed >> 8) & 0xf;
1272*61046927SAndroid Build Coastguard Worker          if (gfx_level >= GFX10)
1273*61046927SAndroid Build Coastguard Worker             lgkm2 |= (packed >> 8) & 0x30;
1274*61046927SAndroid Build Coastguard Worker       }
1275*61046927SAndroid Build Coastguard Worker 
1276*61046927SAndroid Build Coastguard Worker       if (vm2 == (gfx_level >= GFX9 ? 0x3f : 0xf))
1277*61046927SAndroid Build Coastguard Worker          vm2 = wait_imm::unset_counter;
1278*61046927SAndroid Build Coastguard Worker       if (exp2 == 0x7)
1279*61046927SAndroid Build Coastguard Worker          exp2 = wait_imm::unset_counter;
1280*61046927SAndroid Build Coastguard Worker       if (lgkm2 == (gfx_level >= GFX10 ? 0x3f : 0xf))
1281*61046927SAndroid Build Coastguard Worker          lgkm2 = wait_imm::unset_counter;
1282*61046927SAndroid Build Coastguard Worker 
1283*61046927SAndroid Build Coastguard Worker       vm = std::min(vm, vm2);
1284*61046927SAndroid Build Coastguard Worker       exp = std::min(exp, exp2);
1285*61046927SAndroid Build Coastguard Worker       lgkm = std::min(lgkm, lgkm2);
1286*61046927SAndroid Build Coastguard Worker    } else {
1287*61046927SAndroid Build Coastguard Worker       return false;
1288*61046927SAndroid Build Coastguard Worker    }
1289*61046927SAndroid Build Coastguard Worker    return true;
1290*61046927SAndroid Build Coastguard Worker }
1291*61046927SAndroid Build Coastguard Worker 
1292*61046927SAndroid Build Coastguard Worker bool
combine(const wait_imm & other)1293*61046927SAndroid Build Coastguard Worker wait_imm::combine(const wait_imm& other)
1294*61046927SAndroid Build Coastguard Worker {
1295*61046927SAndroid Build Coastguard Worker    bool changed = false;
1296*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < wait_type_num; i++) {
1297*61046927SAndroid Build Coastguard Worker       if (other[i] < (*this)[i])
1298*61046927SAndroid Build Coastguard Worker          changed = true;
1299*61046927SAndroid Build Coastguard Worker       (*this)[i] = std::min((*this)[i], other[i]);
1300*61046927SAndroid Build Coastguard Worker    }
1301*61046927SAndroid Build Coastguard Worker    return changed;
1302*61046927SAndroid Build Coastguard Worker }
1303*61046927SAndroid Build Coastguard Worker 
1304*61046927SAndroid Build Coastguard Worker bool
empty() const1305*61046927SAndroid Build Coastguard Worker wait_imm::empty() const
1306*61046927SAndroid Build Coastguard Worker {
1307*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < wait_type_num; i++) {
1308*61046927SAndroid Build Coastguard Worker       if ((*this)[i] != unset_counter)
1309*61046927SAndroid Build Coastguard Worker          return false;
1310*61046927SAndroid Build Coastguard Worker    }
1311*61046927SAndroid Build Coastguard Worker    return true;
1312*61046927SAndroid Build Coastguard Worker }
1313*61046927SAndroid Build Coastguard Worker 
1314*61046927SAndroid Build Coastguard Worker void
print(FILE * output) const1315*61046927SAndroid Build Coastguard Worker wait_imm::print(FILE* output) const
1316*61046927SAndroid Build Coastguard Worker {
1317*61046927SAndroid Build Coastguard Worker    const char* names[wait_type_num];
1318*61046927SAndroid Build Coastguard Worker    names[wait_type_exp] = "exp";
1319*61046927SAndroid Build Coastguard Worker    names[wait_type_vm] = "vm";
1320*61046927SAndroid Build Coastguard Worker    names[wait_type_lgkm] = "lgkm";
1321*61046927SAndroid Build Coastguard Worker    names[wait_type_vs] = "vs";
1322*61046927SAndroid Build Coastguard Worker    names[wait_type_sample] = "sample";
1323*61046927SAndroid Build Coastguard Worker    names[wait_type_bvh] = "bvh";
1324*61046927SAndroid Build Coastguard Worker    names[wait_type_km] = "km";
1325*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < wait_type_num; i++) {
1326*61046927SAndroid Build Coastguard Worker       if ((*this)[i] != unset_counter)
1327*61046927SAndroid Build Coastguard Worker          fprintf(output, "%s: %u\n", names[i], (*this)[i]);
1328*61046927SAndroid Build Coastguard Worker    }
1329*61046927SAndroid Build Coastguard Worker }
1330*61046927SAndroid Build Coastguard Worker 
1331*61046927SAndroid Build Coastguard Worker bool
should_form_clause(const Instruction * a,const Instruction * b)1332*61046927SAndroid Build Coastguard Worker should_form_clause(const Instruction* a, const Instruction* b)
1333*61046927SAndroid Build Coastguard Worker {
1334*61046927SAndroid Build Coastguard Worker    if (a->definitions.empty() != b->definitions.empty())
1335*61046927SAndroid Build Coastguard Worker       return false;
1336*61046927SAndroid Build Coastguard Worker 
1337*61046927SAndroid Build Coastguard Worker    if (a->format != b->format)
1338*61046927SAndroid Build Coastguard Worker       return false;
1339*61046927SAndroid Build Coastguard Worker 
1340*61046927SAndroid Build Coastguard Worker    if (a->operands.empty() || b->operands.empty())
1341*61046927SAndroid Build Coastguard Worker       return false;
1342*61046927SAndroid Build Coastguard Worker 
1343*61046927SAndroid Build Coastguard Worker    /* Assume loads which don't use descriptors might load from similar addresses. */
1344*61046927SAndroid Build Coastguard Worker    if (a->isFlatLike() || a->accessesLDS())
1345*61046927SAndroid Build Coastguard Worker       return true;
1346*61046927SAndroid Build Coastguard Worker    if (a->isSMEM() && a->operands[0].bytes() == 8 && b->operands[0].bytes() == 8)
1347*61046927SAndroid Build Coastguard Worker       return true;
1348*61046927SAndroid Build Coastguard Worker 
1349*61046927SAndroid Build Coastguard Worker    /* If they load from the same descriptor, assume they might load from similar
1350*61046927SAndroid Build Coastguard Worker     * addresses.
1351*61046927SAndroid Build Coastguard Worker     */
1352*61046927SAndroid Build Coastguard Worker    if (a->isVMEM() || a->isSMEM())
1353*61046927SAndroid Build Coastguard Worker       return a->operands[0].tempId() == b->operands[0].tempId();
1354*61046927SAndroid Build Coastguard Worker 
1355*61046927SAndroid Build Coastguard Worker    if (a->isEXP() && b->isEXP())
1356*61046927SAndroid Build Coastguard Worker       return true;
1357*61046927SAndroid Build Coastguard Worker 
1358*61046927SAndroid Build Coastguard Worker    return false;
1359*61046927SAndroid Build Coastguard Worker }
1360*61046927SAndroid Build Coastguard Worker 
1361*61046927SAndroid Build Coastguard Worker int
get_op_fixed_to_def(Instruction * instr)1362*61046927SAndroid Build Coastguard Worker get_op_fixed_to_def(Instruction* instr)
1363*61046927SAndroid Build Coastguard Worker {
1364*61046927SAndroid Build Coastguard Worker    if (instr->opcode == aco_opcode::v_interp_p2_f32 || instr->opcode == aco_opcode::v_mac_f32 ||
1365*61046927SAndroid Build Coastguard Worker        instr->opcode == aco_opcode::v_fmac_f32 || instr->opcode == aco_opcode::v_mac_f16 ||
1366*61046927SAndroid Build Coastguard Worker        instr->opcode == aco_opcode::v_fmac_f16 || instr->opcode == aco_opcode::v_mac_legacy_f32 ||
1367*61046927SAndroid Build Coastguard Worker        instr->opcode == aco_opcode::v_fmac_legacy_f32 ||
1368*61046927SAndroid Build Coastguard Worker        instr->opcode == aco_opcode::v_pk_fmac_f16 || instr->opcode == aco_opcode::v_writelane_b32 ||
1369*61046927SAndroid Build Coastguard Worker        instr->opcode == aco_opcode::v_writelane_b32_e64 ||
1370*61046927SAndroid Build Coastguard Worker        instr->opcode == aco_opcode::v_dot4c_i32_i8 || instr->opcode == aco_opcode::s_fmac_f32 ||
1371*61046927SAndroid Build Coastguard Worker        instr->opcode == aco_opcode::s_fmac_f16) {
1372*61046927SAndroid Build Coastguard Worker       return 2;
1373*61046927SAndroid Build Coastguard Worker    } else if (instr->opcode == aco_opcode::s_addk_i32 || instr->opcode == aco_opcode::s_mulk_i32 ||
1374*61046927SAndroid Build Coastguard Worker               instr->opcode == aco_opcode::s_cmovk_i32) {
1375*61046927SAndroid Build Coastguard Worker       return 0;
1376*61046927SAndroid Build Coastguard Worker    } else if (instr->isMUBUF() && instr->definitions.size() == 1 && instr->operands.size() == 4) {
1377*61046927SAndroid Build Coastguard Worker       return 3;
1378*61046927SAndroid Build Coastguard Worker    } else if (instr->isMIMG() && instr->definitions.size() == 1 &&
1379*61046927SAndroid Build Coastguard Worker               !instr->operands[2].isUndefined()) {
1380*61046927SAndroid Build Coastguard Worker       return 2;
1381*61046927SAndroid Build Coastguard Worker    }
1382*61046927SAndroid Build Coastguard Worker    return -1;
1383*61046927SAndroid Build Coastguard Worker }
1384*61046927SAndroid Build Coastguard Worker 
1385*61046927SAndroid Build Coastguard Worker uint8_t
get_vmem_type(enum amd_gfx_level gfx_level,Instruction * instr)1386*61046927SAndroid Build Coastguard Worker get_vmem_type(enum amd_gfx_level gfx_level, Instruction* instr)
1387*61046927SAndroid Build Coastguard Worker {
1388*61046927SAndroid Build Coastguard Worker    if (instr->opcode == aco_opcode::image_bvh64_intersect_ray)
1389*61046927SAndroid Build Coastguard Worker       return vmem_bvh;
1390*61046927SAndroid Build Coastguard Worker    else if (gfx_level >= GFX12 && instr->opcode == aco_opcode::image_msaa_load)
1391*61046927SAndroid Build Coastguard Worker       return vmem_sampler;
1392*61046927SAndroid Build Coastguard Worker    else if (instr->isMIMG() && !instr->operands[1].isUndefined() &&
1393*61046927SAndroid Build Coastguard Worker             instr->operands[1].regClass() == s4)
1394*61046927SAndroid Build Coastguard Worker       return vmem_sampler;
1395*61046927SAndroid Build Coastguard Worker    else if (instr->isVMEM() || instr->isScratch() || instr->isGlobal())
1396*61046927SAndroid Build Coastguard Worker       return vmem_nosampler;
1397*61046927SAndroid Build Coastguard Worker    return 0;
1398*61046927SAndroid Build Coastguard Worker }
1399*61046927SAndroid Build Coastguard Worker 
1400*61046927SAndroid Build Coastguard Worker unsigned
parse_vdst_wait(Instruction * instr)1401*61046927SAndroid Build Coastguard Worker parse_vdst_wait(Instruction* instr)
1402*61046927SAndroid Build Coastguard Worker {
1403*61046927SAndroid Build Coastguard Worker    if (instr->isVMEM() || instr->isFlatLike() || instr->isDS() || instr->isEXP())
1404*61046927SAndroid Build Coastguard Worker       return 0;
1405*61046927SAndroid Build Coastguard Worker    else if (instr->isLDSDIR())
1406*61046927SAndroid Build Coastguard Worker       return instr->ldsdir().wait_vdst;
1407*61046927SAndroid Build Coastguard Worker    else if (instr->opcode == aco_opcode::s_waitcnt_depctr)
1408*61046927SAndroid Build Coastguard Worker       return (instr->salu().imm >> 12) & 0xf;
1409*61046927SAndroid Build Coastguard Worker    else
1410*61046927SAndroid Build Coastguard Worker       return 15;
1411*61046927SAndroid Build Coastguard Worker }
1412*61046927SAndroid Build Coastguard Worker 
1413*61046927SAndroid Build Coastguard Worker bool
dealloc_vgprs(Program * program)1414*61046927SAndroid Build Coastguard Worker dealloc_vgprs(Program* program)
1415*61046927SAndroid Build Coastguard Worker {
1416*61046927SAndroid Build Coastguard Worker    if (program->gfx_level < GFX11)
1417*61046927SAndroid Build Coastguard Worker       return false;
1418*61046927SAndroid Build Coastguard Worker 
1419*61046927SAndroid Build Coastguard Worker    /* sendmsg(dealloc_vgprs) releases scratch, so this isn't safe if there is a in-progress scratch
1420*61046927SAndroid Build Coastguard Worker     * store. */
1421*61046927SAndroid Build Coastguard Worker    if (uses_scratch(program))
1422*61046927SAndroid Build Coastguard Worker       return false;
1423*61046927SAndroid Build Coastguard Worker 
1424*61046927SAndroid Build Coastguard Worker    /* If we insert the sendmsg on GFX11.5, the export priority workaround will require us to insert
1425*61046927SAndroid Build Coastguard Worker     * a wait after exports. There might still be pending VMEM stores for PS parameter exports,
1426*61046927SAndroid Build Coastguard Worker     * except NGG lowering usually inserts a memory barrier. This means there is unlikely to be any
1427*61046927SAndroid Build Coastguard Worker     * pending VMEM stores or exports if we insert the sendmsg for these stages. */
1428*61046927SAndroid Build Coastguard Worker    if (program->gfx_level == GFX11_5 && (program->stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER ||
1429*61046927SAndroid Build Coastguard Worker                                          program->stage.hw == AC_HW_PIXEL_SHADER))
1430*61046927SAndroid Build Coastguard Worker       return false;
1431*61046927SAndroid Build Coastguard Worker 
1432*61046927SAndroid Build Coastguard Worker    Block& block = program->blocks.back();
1433*61046927SAndroid Build Coastguard Worker 
1434*61046927SAndroid Build Coastguard Worker    /* don't bother checking if there is a pending VMEM store or export: there almost always is */
1435*61046927SAndroid Build Coastguard Worker    Builder bld(program);
1436*61046927SAndroid Build Coastguard Worker    if (!block.instructions.empty() && block.instructions.back()->opcode == aco_opcode::s_endpgm) {
1437*61046927SAndroid Build Coastguard Worker       bld.reset(&block.instructions, block.instructions.begin() + (block.instructions.size() - 1));
1438*61046927SAndroid Build Coastguard Worker       /* Due to a hazard, an s_nop is needed before "s_sendmsg sendmsg_dealloc_vgprs". */
1439*61046927SAndroid Build Coastguard Worker       bld.sopp(aco_opcode::s_nop, 0);
1440*61046927SAndroid Build Coastguard Worker       bld.sopp(aco_opcode::s_sendmsg, sendmsg_dealloc_vgprs);
1441*61046927SAndroid Build Coastguard Worker    }
1442*61046927SAndroid Build Coastguard Worker 
1443*61046927SAndroid Build Coastguard Worker    return true;
1444*61046927SAndroid Build Coastguard Worker }
1445*61046927SAndroid Build Coastguard Worker 
1446*61046927SAndroid Build Coastguard Worker bool
isTrans() const1447*61046927SAndroid Build Coastguard Worker Instruction::isTrans() const noexcept
1448*61046927SAndroid Build Coastguard Worker {
1449*61046927SAndroid Build Coastguard Worker    return instr_info.classes[(int)opcode] == instr_class::valu_transcendental32 ||
1450*61046927SAndroid Build Coastguard Worker           instr_info.classes[(int)opcode] == instr_class::valu_double_transcendental ||
1451*61046927SAndroid Build Coastguard Worker           instr_info.classes[(int)opcode] == instr_class::valu_pseudo_scalar_trans;
1452*61046927SAndroid Build Coastguard Worker }
1453*61046927SAndroid Build Coastguard Worker 
1454*61046927SAndroid Build Coastguard Worker size_t
get_instr_data_size(Format format)1455*61046927SAndroid Build Coastguard Worker get_instr_data_size(Format format)
1456*61046927SAndroid Build Coastguard Worker {
1457*61046927SAndroid Build Coastguard Worker    switch (format) {
1458*61046927SAndroid Build Coastguard Worker    case Format::SOP1:
1459*61046927SAndroid Build Coastguard Worker    case Format::SOP2:
1460*61046927SAndroid Build Coastguard Worker    case Format::SOPC:
1461*61046927SAndroid Build Coastguard Worker    case Format::SOPK:
1462*61046927SAndroid Build Coastguard Worker    case Format::SOPP: return sizeof(SALU_instruction);
1463*61046927SAndroid Build Coastguard Worker    case Format::SMEM: return sizeof(SMEM_instruction);
1464*61046927SAndroid Build Coastguard Worker    case Format::PSEUDO: return sizeof(Pseudo_instruction);
1465*61046927SAndroid Build Coastguard Worker    case Format::PSEUDO_BARRIER: return sizeof(Pseudo_barrier_instruction);
1466*61046927SAndroid Build Coastguard Worker    case Format::PSEUDO_REDUCTION: return sizeof(Pseudo_reduction_instruction);
1467*61046927SAndroid Build Coastguard Worker    case Format::PSEUDO_BRANCH: return sizeof(Pseudo_branch_instruction);
1468*61046927SAndroid Build Coastguard Worker    case Format::DS: return sizeof(DS_instruction);
1469*61046927SAndroid Build Coastguard Worker    case Format::FLAT:
1470*61046927SAndroid Build Coastguard Worker    case Format::GLOBAL:
1471*61046927SAndroid Build Coastguard Worker    case Format::SCRATCH: return sizeof(FLAT_instruction);
1472*61046927SAndroid Build Coastguard Worker    case Format::LDSDIR: return sizeof(LDSDIR_instruction);
1473*61046927SAndroid Build Coastguard Worker    case Format::MTBUF: return sizeof(MTBUF_instruction);
1474*61046927SAndroid Build Coastguard Worker    case Format::MUBUF: return sizeof(MUBUF_instruction);
1475*61046927SAndroid Build Coastguard Worker    case Format::MIMG: return sizeof(MIMG_instruction);
1476*61046927SAndroid Build Coastguard Worker    case Format::VOPD: return sizeof(VOPD_instruction);
1477*61046927SAndroid Build Coastguard Worker    case Format::VINTERP_INREG: return sizeof(VINTERP_inreg_instruction);
1478*61046927SAndroid Build Coastguard Worker    case Format::VINTRP: return sizeof(VINTRP_instruction);
1479*61046927SAndroid Build Coastguard Worker    case Format::EXP: return sizeof(Export_instruction);
1480*61046927SAndroid Build Coastguard Worker    default:
1481*61046927SAndroid Build Coastguard Worker       if ((uint16_t)format & (uint16_t)Format::DPP16)
1482*61046927SAndroid Build Coastguard Worker          return sizeof(DPP16_instruction);
1483*61046927SAndroid Build Coastguard Worker       else if ((uint16_t)format & (uint16_t)Format::DPP8)
1484*61046927SAndroid Build Coastguard Worker          return sizeof(DPP8_instruction);
1485*61046927SAndroid Build Coastguard Worker       else if ((uint16_t)format & (uint16_t)Format::SDWA)
1486*61046927SAndroid Build Coastguard Worker          return sizeof(SDWA_instruction);
1487*61046927SAndroid Build Coastguard Worker       else
1488*61046927SAndroid Build Coastguard Worker          return sizeof(VALU_instruction);
1489*61046927SAndroid Build Coastguard Worker    }
1490*61046927SAndroid Build Coastguard Worker }
1491*61046927SAndroid Build Coastguard Worker 
1492*61046927SAndroid Build Coastguard Worker Instruction*
create_instruction(aco_opcode opcode,Format format,uint32_t num_operands,uint32_t num_definitions)1493*61046927SAndroid Build Coastguard Worker create_instruction(aco_opcode opcode, Format format, uint32_t num_operands,
1494*61046927SAndroid Build Coastguard Worker                    uint32_t num_definitions)
1495*61046927SAndroid Build Coastguard Worker {
1496*61046927SAndroid Build Coastguard Worker    size_t size = get_instr_data_size(format);
1497*61046927SAndroid Build Coastguard Worker    size_t total_size = size + num_operands * sizeof(Operand) + num_definitions * sizeof(Definition);
1498*61046927SAndroid Build Coastguard Worker 
1499*61046927SAndroid Build Coastguard Worker    void* data = instruction_buffer->allocate(total_size, alignof(uint32_t));
1500*61046927SAndroid Build Coastguard Worker    memset(data, 0, total_size);
1501*61046927SAndroid Build Coastguard Worker    Instruction* inst = (Instruction*)data;
1502*61046927SAndroid Build Coastguard Worker 
1503*61046927SAndroid Build Coastguard Worker    inst->opcode = opcode;
1504*61046927SAndroid Build Coastguard Worker    inst->format = format;
1505*61046927SAndroid Build Coastguard Worker 
1506*61046927SAndroid Build Coastguard Worker    uint16_t operands_offset = size - offsetof(Instruction, operands);
1507*61046927SAndroid Build Coastguard Worker    inst->operands = aco::span<Operand>(operands_offset, num_operands);
1508*61046927SAndroid Build Coastguard Worker    uint16_t definitions_offset = (char*)inst->operands.end() - (char*)&inst->definitions;
1509*61046927SAndroid Build Coastguard Worker    inst->definitions = aco::span<Definition>(definitions_offset, num_definitions);
1510*61046927SAndroid Build Coastguard Worker 
1511*61046927SAndroid Build Coastguard Worker    return inst;
1512*61046927SAndroid Build Coastguard Worker }
1513*61046927SAndroid Build Coastguard Worker 
1514*61046927SAndroid Build Coastguard Worker } // namespace aco
1515