xref: /aosp_15_r20/external/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker //===-- SIShrinkInstructions.cpp - Shrink Instructions --------------------===//
2*9880d681SAndroid Build Coastguard Worker //
3*9880d681SAndroid Build Coastguard Worker //                     The LLVM Compiler Infrastructure
4*9880d681SAndroid Build Coastguard Worker //
5*9880d681SAndroid Build Coastguard Worker // This file is distributed under the University of Illinois Open Source
6*9880d681SAndroid Build Coastguard Worker // License. See LICENSE.TXT for details.
7*9880d681SAndroid Build Coastguard Worker //
8*9880d681SAndroid Build Coastguard Worker /// The pass tries to use the 32-bit encoding for instructions when possible.
9*9880d681SAndroid Build Coastguard Worker //===----------------------------------------------------------------------===//
10*9880d681SAndroid Build Coastguard Worker //
11*9880d681SAndroid Build Coastguard Worker 
12*9880d681SAndroid Build Coastguard Worker #include "AMDGPU.h"
13*9880d681SAndroid Build Coastguard Worker #include "AMDGPUMCInstLower.h"
14*9880d681SAndroid Build Coastguard Worker #include "AMDGPUSubtarget.h"
15*9880d681SAndroid Build Coastguard Worker #include "SIInstrInfo.h"
16*9880d681SAndroid Build Coastguard Worker #include "llvm/ADT/Statistic.h"
17*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/MachineFunctionPass.h"
18*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/MachineInstrBuilder.h"
19*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/MachineRegisterInfo.h"
20*9880d681SAndroid Build Coastguard Worker #include "llvm/IR/Constants.h"
21*9880d681SAndroid Build Coastguard Worker #include "llvm/IR/Function.h"
22*9880d681SAndroid Build Coastguard Worker #include "llvm/IR/LLVMContext.h"
23*9880d681SAndroid Build Coastguard Worker #include "llvm/Support/Debug.h"
24*9880d681SAndroid Build Coastguard Worker #include "llvm/Support/raw_ostream.h"
25*9880d681SAndroid Build Coastguard Worker #include "llvm/Target/TargetMachine.h"
26*9880d681SAndroid Build Coastguard Worker 
27*9880d681SAndroid Build Coastguard Worker #define DEBUG_TYPE "si-shrink-instructions"
28*9880d681SAndroid Build Coastguard Worker 
29*9880d681SAndroid Build Coastguard Worker STATISTIC(NumInstructionsShrunk,
30*9880d681SAndroid Build Coastguard Worker           "Number of 64-bit instruction reduced to 32-bit.");
31*9880d681SAndroid Build Coastguard Worker STATISTIC(NumLiteralConstantsFolded,
32*9880d681SAndroid Build Coastguard Worker           "Number of literal constants folded into 32-bit instructions.");
33*9880d681SAndroid Build Coastguard Worker 
34*9880d681SAndroid Build Coastguard Worker using namespace llvm;
35*9880d681SAndroid Build Coastguard Worker 
36*9880d681SAndroid Build Coastguard Worker namespace {
37*9880d681SAndroid Build Coastguard Worker 
38*9880d681SAndroid Build Coastguard Worker class SIShrinkInstructions : public MachineFunctionPass {
39*9880d681SAndroid Build Coastguard Worker public:
40*9880d681SAndroid Build Coastguard Worker   static char ID;
41*9880d681SAndroid Build Coastguard Worker 
42*9880d681SAndroid Build Coastguard Worker public:
SIShrinkInstructions()43*9880d681SAndroid Build Coastguard Worker   SIShrinkInstructions() : MachineFunctionPass(ID) {
44*9880d681SAndroid Build Coastguard Worker   }
45*9880d681SAndroid Build Coastguard Worker 
46*9880d681SAndroid Build Coastguard Worker   bool runOnMachineFunction(MachineFunction &MF) override;
47*9880d681SAndroid Build Coastguard Worker 
getPassName() const48*9880d681SAndroid Build Coastguard Worker   const char *getPassName() const override {
49*9880d681SAndroid Build Coastguard Worker     return "SI Shrink Instructions";
50*9880d681SAndroid Build Coastguard Worker   }
51*9880d681SAndroid Build Coastguard Worker 
getAnalysisUsage(AnalysisUsage & AU) const52*9880d681SAndroid Build Coastguard Worker   void getAnalysisUsage(AnalysisUsage &AU) const override {
53*9880d681SAndroid Build Coastguard Worker     AU.setPreservesCFG();
54*9880d681SAndroid Build Coastguard Worker     MachineFunctionPass::getAnalysisUsage(AU);
55*9880d681SAndroid Build Coastguard Worker   }
56*9880d681SAndroid Build Coastguard Worker };
57*9880d681SAndroid Build Coastguard Worker 
58*9880d681SAndroid Build Coastguard Worker } // End anonymous namespace.
59*9880d681SAndroid Build Coastguard Worker 
60*9880d681SAndroid Build Coastguard Worker INITIALIZE_PASS(SIShrinkInstructions, DEBUG_TYPE,
61*9880d681SAndroid Build Coastguard Worker                 "SI Shrink Instructions", false, false)
62*9880d681SAndroid Build Coastguard Worker 
63*9880d681SAndroid Build Coastguard Worker char SIShrinkInstructions::ID = 0;
64*9880d681SAndroid Build Coastguard Worker 
createSIShrinkInstructionsPass()65*9880d681SAndroid Build Coastguard Worker FunctionPass *llvm::createSIShrinkInstructionsPass() {
66*9880d681SAndroid Build Coastguard Worker   return new SIShrinkInstructions();
67*9880d681SAndroid Build Coastguard Worker }
68*9880d681SAndroid Build Coastguard Worker 
isVGPR(const MachineOperand * MO,const SIRegisterInfo & TRI,const MachineRegisterInfo & MRI)69*9880d681SAndroid Build Coastguard Worker static bool isVGPR(const MachineOperand *MO, const SIRegisterInfo &TRI,
70*9880d681SAndroid Build Coastguard Worker                    const MachineRegisterInfo &MRI) {
71*9880d681SAndroid Build Coastguard Worker   if (!MO->isReg())
72*9880d681SAndroid Build Coastguard Worker     return false;
73*9880d681SAndroid Build Coastguard Worker 
74*9880d681SAndroid Build Coastguard Worker   if (TargetRegisterInfo::isVirtualRegister(MO->getReg()))
75*9880d681SAndroid Build Coastguard Worker     return TRI.hasVGPRs(MRI.getRegClass(MO->getReg()));
76*9880d681SAndroid Build Coastguard Worker 
77*9880d681SAndroid Build Coastguard Worker   return TRI.hasVGPRs(TRI.getPhysRegClass(MO->getReg()));
78*9880d681SAndroid Build Coastguard Worker }
79*9880d681SAndroid Build Coastguard Worker 
canShrink(MachineInstr & MI,const SIInstrInfo * TII,const SIRegisterInfo & TRI,const MachineRegisterInfo & MRI)80*9880d681SAndroid Build Coastguard Worker static bool canShrink(MachineInstr &MI, const SIInstrInfo *TII,
81*9880d681SAndroid Build Coastguard Worker                       const SIRegisterInfo &TRI,
82*9880d681SAndroid Build Coastguard Worker                       const MachineRegisterInfo &MRI) {
83*9880d681SAndroid Build Coastguard Worker 
84*9880d681SAndroid Build Coastguard Worker   const MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2);
85*9880d681SAndroid Build Coastguard Worker   // Can't shrink instruction with three operands.
86*9880d681SAndroid Build Coastguard Worker   // FIXME: v_cndmask_b32 has 3 operands and is shrinkable, but we need to add
87*9880d681SAndroid Build Coastguard Worker   // a special case for it.  It can only be shrunk if the third operand
88*9880d681SAndroid Build Coastguard Worker   // is vcc.  We should handle this the same way we handle vopc, by addding
89*9880d681SAndroid Build Coastguard Worker   // a register allocation hint pre-regalloc and then do the shrining
90*9880d681SAndroid Build Coastguard Worker   // post-regalloc.
91*9880d681SAndroid Build Coastguard Worker   if (Src2) {
92*9880d681SAndroid Build Coastguard Worker     switch (MI.getOpcode()) {
93*9880d681SAndroid Build Coastguard Worker       default: return false;
94*9880d681SAndroid Build Coastguard Worker 
95*9880d681SAndroid Build Coastguard Worker       case AMDGPU::V_MAC_F32_e64:
96*9880d681SAndroid Build Coastguard Worker         if (!isVGPR(Src2, TRI, MRI) ||
97*9880d681SAndroid Build Coastguard Worker             TII->hasModifiersSet(MI, AMDGPU::OpName::src2_modifiers))
98*9880d681SAndroid Build Coastguard Worker           return false;
99*9880d681SAndroid Build Coastguard Worker         break;
100*9880d681SAndroid Build Coastguard Worker 
101*9880d681SAndroid Build Coastguard Worker       case AMDGPU::V_CNDMASK_B32_e64:
102*9880d681SAndroid Build Coastguard Worker         break;
103*9880d681SAndroid Build Coastguard Worker     }
104*9880d681SAndroid Build Coastguard Worker   }
105*9880d681SAndroid Build Coastguard Worker 
106*9880d681SAndroid Build Coastguard Worker   const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
107*9880d681SAndroid Build Coastguard Worker   const MachineOperand *Src1Mod =
108*9880d681SAndroid Build Coastguard Worker       TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers);
109*9880d681SAndroid Build Coastguard Worker 
110*9880d681SAndroid Build Coastguard Worker   if (Src1 && (!isVGPR(Src1, TRI, MRI) || (Src1Mod && Src1Mod->getImm() != 0)))
111*9880d681SAndroid Build Coastguard Worker     return false;
112*9880d681SAndroid Build Coastguard Worker 
113*9880d681SAndroid Build Coastguard Worker   // We don't need to check src0, all input types are legal, so just make sure
114*9880d681SAndroid Build Coastguard Worker   // src0 isn't using any modifiers.
115*9880d681SAndroid Build Coastguard Worker   if (TII->hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers))
116*9880d681SAndroid Build Coastguard Worker     return false;
117*9880d681SAndroid Build Coastguard Worker 
118*9880d681SAndroid Build Coastguard Worker   // Check output modifiers
119*9880d681SAndroid Build Coastguard Worker   if (TII->hasModifiersSet(MI, AMDGPU::OpName::omod))
120*9880d681SAndroid Build Coastguard Worker     return false;
121*9880d681SAndroid Build Coastguard Worker 
122*9880d681SAndroid Build Coastguard Worker   return !TII->hasModifiersSet(MI, AMDGPU::OpName::clamp);
123*9880d681SAndroid Build Coastguard Worker }
124*9880d681SAndroid Build Coastguard Worker 
125*9880d681SAndroid Build Coastguard Worker /// \brief This function checks \p MI for operands defined by a move immediate
126*9880d681SAndroid Build Coastguard Worker /// instruction and then folds the literal constant into the instruction if it
127*9880d681SAndroid Build Coastguard Worker /// can.  This function assumes that \p MI is a VOP1, VOP2, or VOPC instruction
128*9880d681SAndroid Build Coastguard Worker /// and will only fold literal constants if we are still in SSA.
foldImmediates(MachineInstr & MI,const SIInstrInfo * TII,MachineRegisterInfo & MRI,bool TryToCommute=true)129*9880d681SAndroid Build Coastguard Worker static void foldImmediates(MachineInstr &MI, const SIInstrInfo *TII,
130*9880d681SAndroid Build Coastguard Worker                            MachineRegisterInfo &MRI, bool TryToCommute = true) {
131*9880d681SAndroid Build Coastguard Worker 
132*9880d681SAndroid Build Coastguard Worker   if (!MRI.isSSA())
133*9880d681SAndroid Build Coastguard Worker     return;
134*9880d681SAndroid Build Coastguard Worker 
135*9880d681SAndroid Build Coastguard Worker   assert(TII->isVOP1(MI) || TII->isVOP2(MI) || TII->isVOPC(MI));
136*9880d681SAndroid Build Coastguard Worker 
137*9880d681SAndroid Build Coastguard Worker   const SIRegisterInfo &TRI = TII->getRegisterInfo();
138*9880d681SAndroid Build Coastguard Worker   int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
139*9880d681SAndroid Build Coastguard Worker   MachineOperand &Src0 = MI.getOperand(Src0Idx);
140*9880d681SAndroid Build Coastguard Worker 
141*9880d681SAndroid Build Coastguard Worker   // Only one literal constant is allowed per instruction, so if src0 is a
142*9880d681SAndroid Build Coastguard Worker   // literal constant then we can't do any folding.
143*9880d681SAndroid Build Coastguard Worker   if (Src0.isImm() &&
144*9880d681SAndroid Build Coastguard Worker       TII->isLiteralConstant(Src0, TII->getOpSize(MI, Src0Idx)))
145*9880d681SAndroid Build Coastguard Worker     return;
146*9880d681SAndroid Build Coastguard Worker 
147*9880d681SAndroid Build Coastguard Worker   // Literal constants and SGPRs can only be used in Src0, so if Src0 is an
148*9880d681SAndroid Build Coastguard Worker   // SGPR, we cannot commute the instruction, so we can't fold any literal
149*9880d681SAndroid Build Coastguard Worker   // constants.
150*9880d681SAndroid Build Coastguard Worker   if (Src0.isReg() && !isVGPR(&Src0, TRI, MRI))
151*9880d681SAndroid Build Coastguard Worker     return;
152*9880d681SAndroid Build Coastguard Worker 
153*9880d681SAndroid Build Coastguard Worker   // Try to fold Src0
154*9880d681SAndroid Build Coastguard Worker   if (Src0.isReg() && MRI.hasOneUse(Src0.getReg())) {
155*9880d681SAndroid Build Coastguard Worker     unsigned Reg = Src0.getReg();
156*9880d681SAndroid Build Coastguard Worker     MachineInstr *Def = MRI.getUniqueVRegDef(Reg);
157*9880d681SAndroid Build Coastguard Worker     if (Def && Def->isMoveImmediate()) {
158*9880d681SAndroid Build Coastguard Worker       MachineOperand &MovSrc = Def->getOperand(1);
159*9880d681SAndroid Build Coastguard Worker       bool ConstantFolded = false;
160*9880d681SAndroid Build Coastguard Worker 
161*9880d681SAndroid Build Coastguard Worker       if (MovSrc.isImm() && isUInt<32>(MovSrc.getImm())) {
162*9880d681SAndroid Build Coastguard Worker         Src0.ChangeToImmediate(MovSrc.getImm());
163*9880d681SAndroid Build Coastguard Worker         ConstantFolded = true;
164*9880d681SAndroid Build Coastguard Worker       }
165*9880d681SAndroid Build Coastguard Worker       if (ConstantFolded) {
166*9880d681SAndroid Build Coastguard Worker         if (MRI.use_empty(Reg))
167*9880d681SAndroid Build Coastguard Worker           Def->eraseFromParent();
168*9880d681SAndroid Build Coastguard Worker         ++NumLiteralConstantsFolded;
169*9880d681SAndroid Build Coastguard Worker         return;
170*9880d681SAndroid Build Coastguard Worker       }
171*9880d681SAndroid Build Coastguard Worker     }
172*9880d681SAndroid Build Coastguard Worker   }
173*9880d681SAndroid Build Coastguard Worker 
174*9880d681SAndroid Build Coastguard Worker   // We have failed to fold src0, so commute the instruction and try again.
175*9880d681SAndroid Build Coastguard Worker   if (TryToCommute && MI.isCommutable() && TII->commuteInstruction(MI))
176*9880d681SAndroid Build Coastguard Worker     foldImmediates(MI, TII, MRI, false);
177*9880d681SAndroid Build Coastguard Worker 
178*9880d681SAndroid Build Coastguard Worker }
179*9880d681SAndroid Build Coastguard Worker 
180*9880d681SAndroid Build Coastguard Worker // Copy MachineOperand with all flags except setting it as implicit.
copyFlagsToImplicitVCC(MachineInstr & MI,const MachineOperand & Orig)181*9880d681SAndroid Build Coastguard Worker static void copyFlagsToImplicitVCC(MachineInstr &MI,
182*9880d681SAndroid Build Coastguard Worker                                    const MachineOperand &Orig) {
183*9880d681SAndroid Build Coastguard Worker 
184*9880d681SAndroid Build Coastguard Worker   for (MachineOperand &Use : MI.implicit_operands()) {
185*9880d681SAndroid Build Coastguard Worker     if (Use.getReg() == AMDGPU::VCC) {
186*9880d681SAndroid Build Coastguard Worker       Use.setIsUndef(Orig.isUndef());
187*9880d681SAndroid Build Coastguard Worker       Use.setIsKill(Orig.isKill());
188*9880d681SAndroid Build Coastguard Worker       return;
189*9880d681SAndroid Build Coastguard Worker     }
190*9880d681SAndroid Build Coastguard Worker   }
191*9880d681SAndroid Build Coastguard Worker }
192*9880d681SAndroid Build Coastguard Worker 
isKImmOperand(const SIInstrInfo * TII,const MachineOperand & Src)193*9880d681SAndroid Build Coastguard Worker static bool isKImmOperand(const SIInstrInfo *TII, const MachineOperand &Src) {
194*9880d681SAndroid Build Coastguard Worker   return isInt<16>(Src.getImm()) && !TII->isInlineConstant(Src, 4);
195*9880d681SAndroid Build Coastguard Worker }
196*9880d681SAndroid Build Coastguard Worker 
runOnMachineFunction(MachineFunction & MF)197*9880d681SAndroid Build Coastguard Worker bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
198*9880d681SAndroid Build Coastguard Worker   if (skipFunction(*MF.getFunction()))
199*9880d681SAndroid Build Coastguard Worker     return false;
200*9880d681SAndroid Build Coastguard Worker 
201*9880d681SAndroid Build Coastguard Worker   MachineRegisterInfo &MRI = MF.getRegInfo();
202*9880d681SAndroid Build Coastguard Worker   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
203*9880d681SAndroid Build Coastguard Worker   const SIInstrInfo *TII = ST.getInstrInfo();
204*9880d681SAndroid Build Coastguard Worker   const SIRegisterInfo &TRI = TII->getRegisterInfo();
205*9880d681SAndroid Build Coastguard Worker 
206*9880d681SAndroid Build Coastguard Worker   std::vector<unsigned> I1Defs;
207*9880d681SAndroid Build Coastguard Worker 
208*9880d681SAndroid Build Coastguard Worker   for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
209*9880d681SAndroid Build Coastguard Worker                                                   BI != BE; ++BI) {
210*9880d681SAndroid Build Coastguard Worker 
211*9880d681SAndroid Build Coastguard Worker     MachineBasicBlock &MBB = *BI;
212*9880d681SAndroid Build Coastguard Worker     MachineBasicBlock::iterator I, Next;
213*9880d681SAndroid Build Coastguard Worker     for (I = MBB.begin(); I != MBB.end(); I = Next) {
214*9880d681SAndroid Build Coastguard Worker       Next = std::next(I);
215*9880d681SAndroid Build Coastguard Worker       MachineInstr &MI = *I;
216*9880d681SAndroid Build Coastguard Worker 
217*9880d681SAndroid Build Coastguard Worker       if (MI.getOpcode() == AMDGPU::V_MOV_B32_e32) {
218*9880d681SAndroid Build Coastguard Worker         // If this has a literal constant source that is the same as the
219*9880d681SAndroid Build Coastguard Worker         // reversed bits of an inline immediate, replace with a bitreverse of
220*9880d681SAndroid Build Coastguard Worker         // that constant. This saves 4 bytes in the common case of materializing
221*9880d681SAndroid Build Coastguard Worker         // sign bits.
222*9880d681SAndroid Build Coastguard Worker 
223*9880d681SAndroid Build Coastguard Worker         // Test if we are after regalloc. We only want to do this after any
224*9880d681SAndroid Build Coastguard Worker         // optimizations happen because this will confuse them.
225*9880d681SAndroid Build Coastguard Worker         // XXX - not exactly a check for post-regalloc run.
226*9880d681SAndroid Build Coastguard Worker         MachineOperand &Src = MI.getOperand(1);
227*9880d681SAndroid Build Coastguard Worker         if (Src.isImm() &&
228*9880d681SAndroid Build Coastguard Worker             TargetRegisterInfo::isPhysicalRegister(MI.getOperand(0).getReg())) {
229*9880d681SAndroid Build Coastguard Worker           int64_t Imm = Src.getImm();
230*9880d681SAndroid Build Coastguard Worker           if (isInt<32>(Imm) && !TII->isInlineConstant(Src, 4)) {
231*9880d681SAndroid Build Coastguard Worker             int32_t ReverseImm = reverseBits<int32_t>(static_cast<int32_t>(Imm));
232*9880d681SAndroid Build Coastguard Worker             if (ReverseImm >= -16 && ReverseImm <= 64) {
233*9880d681SAndroid Build Coastguard Worker               MI.setDesc(TII->get(AMDGPU::V_BFREV_B32_e32));
234*9880d681SAndroid Build Coastguard Worker               Src.setImm(ReverseImm);
235*9880d681SAndroid Build Coastguard Worker               continue;
236*9880d681SAndroid Build Coastguard Worker             }
237*9880d681SAndroid Build Coastguard Worker           }
238*9880d681SAndroid Build Coastguard Worker         }
239*9880d681SAndroid Build Coastguard Worker       }
240*9880d681SAndroid Build Coastguard Worker 
241*9880d681SAndroid Build Coastguard Worker       // Combine adjacent s_nops to use the immediate operand encoding how long
242*9880d681SAndroid Build Coastguard Worker       // to wait.
243*9880d681SAndroid Build Coastguard Worker       //
244*9880d681SAndroid Build Coastguard Worker       // s_nop N
245*9880d681SAndroid Build Coastguard Worker       // s_nop M
246*9880d681SAndroid Build Coastguard Worker       //  =>
247*9880d681SAndroid Build Coastguard Worker       // s_nop (N + M)
248*9880d681SAndroid Build Coastguard Worker       if (MI.getOpcode() == AMDGPU::S_NOP &&
249*9880d681SAndroid Build Coastguard Worker           Next != MBB.end() &&
250*9880d681SAndroid Build Coastguard Worker           (*Next).getOpcode() == AMDGPU::S_NOP) {
251*9880d681SAndroid Build Coastguard Worker 
252*9880d681SAndroid Build Coastguard Worker         MachineInstr &NextMI = *Next;
253*9880d681SAndroid Build Coastguard Worker         // The instruction encodes the amount to wait with an offset of 1,
254*9880d681SAndroid Build Coastguard Worker         // i.e. 0 is wait 1 cycle. Convert both to cycles and then convert back
255*9880d681SAndroid Build Coastguard Worker         // after adding.
256*9880d681SAndroid Build Coastguard Worker         uint8_t Nop0 = MI.getOperand(0).getImm() + 1;
257*9880d681SAndroid Build Coastguard Worker         uint8_t Nop1 = NextMI.getOperand(0).getImm() + 1;
258*9880d681SAndroid Build Coastguard Worker 
259*9880d681SAndroid Build Coastguard Worker         // Make sure we don't overflow the bounds.
260*9880d681SAndroid Build Coastguard Worker         if (Nop0 + Nop1 <= 8) {
261*9880d681SAndroid Build Coastguard Worker           NextMI.getOperand(0).setImm(Nop0 + Nop1 - 1);
262*9880d681SAndroid Build Coastguard Worker           MI.eraseFromParent();
263*9880d681SAndroid Build Coastguard Worker         }
264*9880d681SAndroid Build Coastguard Worker 
265*9880d681SAndroid Build Coastguard Worker         continue;
266*9880d681SAndroid Build Coastguard Worker       }
267*9880d681SAndroid Build Coastguard Worker 
268*9880d681SAndroid Build Coastguard Worker       // FIXME: We also need to consider movs of constant operands since
269*9880d681SAndroid Build Coastguard Worker       // immediate operands are not folded if they have more than one use, and
270*9880d681SAndroid Build Coastguard Worker       // the operand folding pass is unaware if the immediate will be free since
271*9880d681SAndroid Build Coastguard Worker       // it won't know if the src == dest constraint will end up being
272*9880d681SAndroid Build Coastguard Worker       // satisfied.
273*9880d681SAndroid Build Coastguard Worker       if (MI.getOpcode() == AMDGPU::S_ADD_I32 ||
274*9880d681SAndroid Build Coastguard Worker           MI.getOpcode() == AMDGPU::S_MUL_I32) {
275*9880d681SAndroid Build Coastguard Worker         const MachineOperand &Dest = MI.getOperand(0);
276*9880d681SAndroid Build Coastguard Worker         const MachineOperand &Src0 = MI.getOperand(1);
277*9880d681SAndroid Build Coastguard Worker         const MachineOperand &Src1 = MI.getOperand(2);
278*9880d681SAndroid Build Coastguard Worker 
279*9880d681SAndroid Build Coastguard Worker         // FIXME: This could work better if hints worked with subregisters. If
280*9880d681SAndroid Build Coastguard Worker         // we have a vector add of a constant, we usually don't get the correct
281*9880d681SAndroid Build Coastguard Worker         // allocation due to the subregister usage.
282*9880d681SAndroid Build Coastguard Worker         if (TargetRegisterInfo::isVirtualRegister(Dest.getReg()) &&
283*9880d681SAndroid Build Coastguard Worker             Src0.isReg()) {
284*9880d681SAndroid Build Coastguard Worker           MRI.setRegAllocationHint(Dest.getReg(), 0, Src0.getReg());
285*9880d681SAndroid Build Coastguard Worker           continue;
286*9880d681SAndroid Build Coastguard Worker         }
287*9880d681SAndroid Build Coastguard Worker 
288*9880d681SAndroid Build Coastguard Worker         if (Src0.isReg() && Src0.getReg() == Dest.getReg()) {
289*9880d681SAndroid Build Coastguard Worker           if (Src1.isImm() && isKImmOperand(TII, Src1)) {
290*9880d681SAndroid Build Coastguard Worker             unsigned Opc = (MI.getOpcode() == AMDGPU::S_ADD_I32) ?
291*9880d681SAndroid Build Coastguard Worker               AMDGPU::S_ADDK_I32 : AMDGPU::S_MULK_I32;
292*9880d681SAndroid Build Coastguard Worker 
293*9880d681SAndroid Build Coastguard Worker             MI.setDesc(TII->get(Opc));
294*9880d681SAndroid Build Coastguard Worker             MI.tieOperands(0, 1);
295*9880d681SAndroid Build Coastguard Worker           }
296*9880d681SAndroid Build Coastguard Worker         }
297*9880d681SAndroid Build Coastguard Worker       }
298*9880d681SAndroid Build Coastguard Worker 
299*9880d681SAndroid Build Coastguard Worker       // Try to use S_MOVK_I32, which will save 4 bytes for small immediates.
300*9880d681SAndroid Build Coastguard Worker       if (MI.getOpcode() == AMDGPU::S_MOV_B32) {
301*9880d681SAndroid Build Coastguard Worker         const MachineOperand &Src = MI.getOperand(1);
302*9880d681SAndroid Build Coastguard Worker 
303*9880d681SAndroid Build Coastguard Worker         if (Src.isImm() && isKImmOperand(TII, Src))
304*9880d681SAndroid Build Coastguard Worker           MI.setDesc(TII->get(AMDGPU::S_MOVK_I32));
305*9880d681SAndroid Build Coastguard Worker 
306*9880d681SAndroid Build Coastguard Worker         continue;
307*9880d681SAndroid Build Coastguard Worker       }
308*9880d681SAndroid Build Coastguard Worker 
309*9880d681SAndroid Build Coastguard Worker       if (!TII->hasVALU32BitEncoding(MI.getOpcode()))
310*9880d681SAndroid Build Coastguard Worker         continue;
311*9880d681SAndroid Build Coastguard Worker 
312*9880d681SAndroid Build Coastguard Worker       if (!canShrink(MI, TII, TRI, MRI)) {
313*9880d681SAndroid Build Coastguard Worker         // Try commuting the instruction and see if that enables us to shrink
314*9880d681SAndroid Build Coastguard Worker         // it.
315*9880d681SAndroid Build Coastguard Worker         if (!MI.isCommutable() || !TII->commuteInstruction(MI) ||
316*9880d681SAndroid Build Coastguard Worker             !canShrink(MI, TII, TRI, MRI))
317*9880d681SAndroid Build Coastguard Worker           continue;
318*9880d681SAndroid Build Coastguard Worker       }
319*9880d681SAndroid Build Coastguard Worker 
320*9880d681SAndroid Build Coastguard Worker       // getVOPe32 could be -1 here if we started with an instruction that had
321*9880d681SAndroid Build Coastguard Worker       // a 32-bit encoding and then commuted it to an instruction that did not.
322*9880d681SAndroid Build Coastguard Worker       if (!TII->hasVALU32BitEncoding(MI.getOpcode()))
323*9880d681SAndroid Build Coastguard Worker         continue;
324*9880d681SAndroid Build Coastguard Worker 
325*9880d681SAndroid Build Coastguard Worker       int Op32 = AMDGPU::getVOPe32(MI.getOpcode());
326*9880d681SAndroid Build Coastguard Worker 
327*9880d681SAndroid Build Coastguard Worker       if (TII->isVOPC(Op32)) {
328*9880d681SAndroid Build Coastguard Worker         unsigned DstReg = MI.getOperand(0).getReg();
329*9880d681SAndroid Build Coastguard Worker         if (TargetRegisterInfo::isVirtualRegister(DstReg)) {
330*9880d681SAndroid Build Coastguard Worker           // VOPC instructions can only write to the VCC register. We can't
331*9880d681SAndroid Build Coastguard Worker           // force them to use VCC here, because this is only one register and
332*9880d681SAndroid Build Coastguard Worker           // cannot deal with sequences which would require multiple copies of
333*9880d681SAndroid Build Coastguard Worker           // VCC, e.g. S_AND_B64 (vcc = V_CMP_...), (vcc = V_CMP_...)
334*9880d681SAndroid Build Coastguard Worker           //
335*9880d681SAndroid Build Coastguard Worker           // So, instead of forcing the instruction to write to VCC, we provide
336*9880d681SAndroid Build Coastguard Worker           // a hint to the register allocator to use VCC and then we we will run
337*9880d681SAndroid Build Coastguard Worker           // this pass again after RA and shrink it if it outputs to VCC.
338*9880d681SAndroid Build Coastguard Worker           MRI.setRegAllocationHint(MI.getOperand(0).getReg(), 0, AMDGPU::VCC);
339*9880d681SAndroid Build Coastguard Worker           continue;
340*9880d681SAndroid Build Coastguard Worker         }
341*9880d681SAndroid Build Coastguard Worker         if (DstReg != AMDGPU::VCC)
342*9880d681SAndroid Build Coastguard Worker           continue;
343*9880d681SAndroid Build Coastguard Worker       }
344*9880d681SAndroid Build Coastguard Worker 
345*9880d681SAndroid Build Coastguard Worker       if (Op32 == AMDGPU::V_CNDMASK_B32_e32) {
346*9880d681SAndroid Build Coastguard Worker         // We shrink V_CNDMASK_B32_e64 using regalloc hints like we do for VOPC
347*9880d681SAndroid Build Coastguard Worker         // instructions.
348*9880d681SAndroid Build Coastguard Worker         const MachineOperand *Src2 =
349*9880d681SAndroid Build Coastguard Worker             TII->getNamedOperand(MI, AMDGPU::OpName::src2);
350*9880d681SAndroid Build Coastguard Worker         if (!Src2->isReg())
351*9880d681SAndroid Build Coastguard Worker           continue;
352*9880d681SAndroid Build Coastguard Worker         unsigned SReg = Src2->getReg();
353*9880d681SAndroid Build Coastguard Worker         if (TargetRegisterInfo::isVirtualRegister(SReg)) {
354*9880d681SAndroid Build Coastguard Worker           MRI.setRegAllocationHint(SReg, 0, AMDGPU::VCC);
355*9880d681SAndroid Build Coastguard Worker           continue;
356*9880d681SAndroid Build Coastguard Worker         }
357*9880d681SAndroid Build Coastguard Worker         if (SReg != AMDGPU::VCC)
358*9880d681SAndroid Build Coastguard Worker           continue;
359*9880d681SAndroid Build Coastguard Worker       }
360*9880d681SAndroid Build Coastguard Worker 
361*9880d681SAndroid Build Coastguard Worker       // We can shrink this instruction
362*9880d681SAndroid Build Coastguard Worker       DEBUG(dbgs() << "Shrinking " << MI);
363*9880d681SAndroid Build Coastguard Worker 
364*9880d681SAndroid Build Coastguard Worker       MachineInstrBuilder Inst32 =
365*9880d681SAndroid Build Coastguard Worker           BuildMI(MBB, I, MI.getDebugLoc(), TII->get(Op32));
366*9880d681SAndroid Build Coastguard Worker 
367*9880d681SAndroid Build Coastguard Worker       // Add the dst operand if the 32-bit encoding also has an explicit $vdst.
368*9880d681SAndroid Build Coastguard Worker       // For VOPC instructions, this is replaced by an implicit def of vcc.
369*9880d681SAndroid Build Coastguard Worker       int Op32DstIdx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::vdst);
370*9880d681SAndroid Build Coastguard Worker       if (Op32DstIdx != -1) {
371*9880d681SAndroid Build Coastguard Worker         // dst
372*9880d681SAndroid Build Coastguard Worker         Inst32.addOperand(MI.getOperand(0));
373*9880d681SAndroid Build Coastguard Worker       } else {
374*9880d681SAndroid Build Coastguard Worker         assert(MI.getOperand(0).getReg() == AMDGPU::VCC &&
375*9880d681SAndroid Build Coastguard Worker                "Unexpected case");
376*9880d681SAndroid Build Coastguard Worker       }
377*9880d681SAndroid Build Coastguard Worker 
378*9880d681SAndroid Build Coastguard Worker 
379*9880d681SAndroid Build Coastguard Worker       Inst32.addOperand(*TII->getNamedOperand(MI, AMDGPU::OpName::src0));
380*9880d681SAndroid Build Coastguard Worker 
381*9880d681SAndroid Build Coastguard Worker       const MachineOperand *Src1 =
382*9880d681SAndroid Build Coastguard Worker           TII->getNamedOperand(MI, AMDGPU::OpName::src1);
383*9880d681SAndroid Build Coastguard Worker       if (Src1)
384*9880d681SAndroid Build Coastguard Worker         Inst32.addOperand(*Src1);
385*9880d681SAndroid Build Coastguard Worker 
386*9880d681SAndroid Build Coastguard Worker       const MachineOperand *Src2 =
387*9880d681SAndroid Build Coastguard Worker         TII->getNamedOperand(MI, AMDGPU::OpName::src2);
388*9880d681SAndroid Build Coastguard Worker       if (Src2) {
389*9880d681SAndroid Build Coastguard Worker         int Op32Src2Idx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::src2);
390*9880d681SAndroid Build Coastguard Worker         if (Op32Src2Idx != -1) {
391*9880d681SAndroid Build Coastguard Worker           Inst32.addOperand(*Src2);
392*9880d681SAndroid Build Coastguard Worker         } else {
393*9880d681SAndroid Build Coastguard Worker           // In the case of V_CNDMASK_B32_e32, the explicit operand src2 is
394*9880d681SAndroid Build Coastguard Worker           // replaced with an implicit read of vcc. This was already added
395*9880d681SAndroid Build Coastguard Worker           // during the initial BuildMI, so find it to preserve the flags.
396*9880d681SAndroid Build Coastguard Worker           copyFlagsToImplicitVCC(*Inst32, *Src2);
397*9880d681SAndroid Build Coastguard Worker         }
398*9880d681SAndroid Build Coastguard Worker       }
399*9880d681SAndroid Build Coastguard Worker 
400*9880d681SAndroid Build Coastguard Worker       ++NumInstructionsShrunk;
401*9880d681SAndroid Build Coastguard Worker       MI.eraseFromParent();
402*9880d681SAndroid Build Coastguard Worker 
403*9880d681SAndroid Build Coastguard Worker       foldImmediates(*Inst32, TII, MRI);
404*9880d681SAndroid Build Coastguard Worker       DEBUG(dbgs() << "e32 MI = " << *Inst32 << '\n');
405*9880d681SAndroid Build Coastguard Worker 
406*9880d681SAndroid Build Coastguard Worker 
407*9880d681SAndroid Build Coastguard Worker     }
408*9880d681SAndroid Build Coastguard Worker   }
409*9880d681SAndroid Build Coastguard Worker   return false;
410*9880d681SAndroid Build Coastguard Worker }
411