1*9880d681SAndroid Build Coastguard Worker //===-- SIShrinkInstructions.cpp - Shrink Instructions --------------------===//
2*9880d681SAndroid Build Coastguard Worker //
3*9880d681SAndroid Build Coastguard Worker // The LLVM Compiler Infrastructure
4*9880d681SAndroid Build Coastguard Worker //
5*9880d681SAndroid Build Coastguard Worker // This file is distributed under the University of Illinois Open Source
6*9880d681SAndroid Build Coastguard Worker // License. See LICENSE.TXT for details.
7*9880d681SAndroid Build Coastguard Worker //
8*9880d681SAndroid Build Coastguard Worker /// The pass tries to use the 32-bit encoding for instructions when possible.
9*9880d681SAndroid Build Coastguard Worker //===----------------------------------------------------------------------===//
10*9880d681SAndroid Build Coastguard Worker //
11*9880d681SAndroid Build Coastguard Worker
12*9880d681SAndroid Build Coastguard Worker #include "AMDGPU.h"
13*9880d681SAndroid Build Coastguard Worker #include "AMDGPUMCInstLower.h"
14*9880d681SAndroid Build Coastguard Worker #include "AMDGPUSubtarget.h"
15*9880d681SAndroid Build Coastguard Worker #include "SIInstrInfo.h"
16*9880d681SAndroid Build Coastguard Worker #include "llvm/ADT/Statistic.h"
17*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/MachineFunctionPass.h"
18*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/MachineInstrBuilder.h"
19*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/MachineRegisterInfo.h"
20*9880d681SAndroid Build Coastguard Worker #include "llvm/IR/Constants.h"
21*9880d681SAndroid Build Coastguard Worker #include "llvm/IR/Function.h"
22*9880d681SAndroid Build Coastguard Worker #include "llvm/IR/LLVMContext.h"
23*9880d681SAndroid Build Coastguard Worker #include "llvm/Support/Debug.h"
24*9880d681SAndroid Build Coastguard Worker #include "llvm/Support/raw_ostream.h"
25*9880d681SAndroid Build Coastguard Worker #include "llvm/Target/TargetMachine.h"
26*9880d681SAndroid Build Coastguard Worker
27*9880d681SAndroid Build Coastguard Worker #define DEBUG_TYPE "si-shrink-instructions"
28*9880d681SAndroid Build Coastguard Worker
29*9880d681SAndroid Build Coastguard Worker STATISTIC(NumInstructionsShrunk,
30*9880d681SAndroid Build Coastguard Worker "Number of 64-bit instruction reduced to 32-bit.");
31*9880d681SAndroid Build Coastguard Worker STATISTIC(NumLiteralConstantsFolded,
32*9880d681SAndroid Build Coastguard Worker "Number of literal constants folded into 32-bit instructions.");
33*9880d681SAndroid Build Coastguard Worker
34*9880d681SAndroid Build Coastguard Worker using namespace llvm;
35*9880d681SAndroid Build Coastguard Worker
36*9880d681SAndroid Build Coastguard Worker namespace {
37*9880d681SAndroid Build Coastguard Worker
38*9880d681SAndroid Build Coastguard Worker class SIShrinkInstructions : public MachineFunctionPass {
39*9880d681SAndroid Build Coastguard Worker public:
40*9880d681SAndroid Build Coastguard Worker static char ID;
41*9880d681SAndroid Build Coastguard Worker
42*9880d681SAndroid Build Coastguard Worker public:
SIShrinkInstructions()43*9880d681SAndroid Build Coastguard Worker SIShrinkInstructions() : MachineFunctionPass(ID) {
44*9880d681SAndroid Build Coastguard Worker }
45*9880d681SAndroid Build Coastguard Worker
46*9880d681SAndroid Build Coastguard Worker bool runOnMachineFunction(MachineFunction &MF) override;
47*9880d681SAndroid Build Coastguard Worker
getPassName() const48*9880d681SAndroid Build Coastguard Worker const char *getPassName() const override {
49*9880d681SAndroid Build Coastguard Worker return "SI Shrink Instructions";
50*9880d681SAndroid Build Coastguard Worker }
51*9880d681SAndroid Build Coastguard Worker
getAnalysisUsage(AnalysisUsage & AU) const52*9880d681SAndroid Build Coastguard Worker void getAnalysisUsage(AnalysisUsage &AU) const override {
53*9880d681SAndroid Build Coastguard Worker AU.setPreservesCFG();
54*9880d681SAndroid Build Coastguard Worker MachineFunctionPass::getAnalysisUsage(AU);
55*9880d681SAndroid Build Coastguard Worker }
56*9880d681SAndroid Build Coastguard Worker };
57*9880d681SAndroid Build Coastguard Worker
58*9880d681SAndroid Build Coastguard Worker } // End anonymous namespace.
59*9880d681SAndroid Build Coastguard Worker
60*9880d681SAndroid Build Coastguard Worker INITIALIZE_PASS(SIShrinkInstructions, DEBUG_TYPE,
61*9880d681SAndroid Build Coastguard Worker "SI Shrink Instructions", false, false)
62*9880d681SAndroid Build Coastguard Worker
63*9880d681SAndroid Build Coastguard Worker char SIShrinkInstructions::ID = 0;
64*9880d681SAndroid Build Coastguard Worker
createSIShrinkInstructionsPass()65*9880d681SAndroid Build Coastguard Worker FunctionPass *llvm::createSIShrinkInstructionsPass() {
66*9880d681SAndroid Build Coastguard Worker return new SIShrinkInstructions();
67*9880d681SAndroid Build Coastguard Worker }
68*9880d681SAndroid Build Coastguard Worker
isVGPR(const MachineOperand * MO,const SIRegisterInfo & TRI,const MachineRegisterInfo & MRI)69*9880d681SAndroid Build Coastguard Worker static bool isVGPR(const MachineOperand *MO, const SIRegisterInfo &TRI,
70*9880d681SAndroid Build Coastguard Worker const MachineRegisterInfo &MRI) {
71*9880d681SAndroid Build Coastguard Worker if (!MO->isReg())
72*9880d681SAndroid Build Coastguard Worker return false;
73*9880d681SAndroid Build Coastguard Worker
74*9880d681SAndroid Build Coastguard Worker if (TargetRegisterInfo::isVirtualRegister(MO->getReg()))
75*9880d681SAndroid Build Coastguard Worker return TRI.hasVGPRs(MRI.getRegClass(MO->getReg()));
76*9880d681SAndroid Build Coastguard Worker
77*9880d681SAndroid Build Coastguard Worker return TRI.hasVGPRs(TRI.getPhysRegClass(MO->getReg()));
78*9880d681SAndroid Build Coastguard Worker }
79*9880d681SAndroid Build Coastguard Worker
canShrink(MachineInstr & MI,const SIInstrInfo * TII,const SIRegisterInfo & TRI,const MachineRegisterInfo & MRI)80*9880d681SAndroid Build Coastguard Worker static bool canShrink(MachineInstr &MI, const SIInstrInfo *TII,
81*9880d681SAndroid Build Coastguard Worker const SIRegisterInfo &TRI,
82*9880d681SAndroid Build Coastguard Worker const MachineRegisterInfo &MRI) {
83*9880d681SAndroid Build Coastguard Worker
84*9880d681SAndroid Build Coastguard Worker const MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2);
85*9880d681SAndroid Build Coastguard Worker // Can't shrink instruction with three operands.
86*9880d681SAndroid Build Coastguard Worker // FIXME: v_cndmask_b32 has 3 operands and is shrinkable, but we need to add
87*9880d681SAndroid Build Coastguard Worker // a special case for it. It can only be shrunk if the third operand
88*9880d681SAndroid Build Coastguard Worker // is vcc. We should handle this the same way we handle vopc, by addding
89*9880d681SAndroid Build Coastguard Worker // a register allocation hint pre-regalloc and then do the shrining
90*9880d681SAndroid Build Coastguard Worker // post-regalloc.
91*9880d681SAndroid Build Coastguard Worker if (Src2) {
92*9880d681SAndroid Build Coastguard Worker switch (MI.getOpcode()) {
93*9880d681SAndroid Build Coastguard Worker default: return false;
94*9880d681SAndroid Build Coastguard Worker
95*9880d681SAndroid Build Coastguard Worker case AMDGPU::V_MAC_F32_e64:
96*9880d681SAndroid Build Coastguard Worker if (!isVGPR(Src2, TRI, MRI) ||
97*9880d681SAndroid Build Coastguard Worker TII->hasModifiersSet(MI, AMDGPU::OpName::src2_modifiers))
98*9880d681SAndroid Build Coastguard Worker return false;
99*9880d681SAndroid Build Coastguard Worker break;
100*9880d681SAndroid Build Coastguard Worker
101*9880d681SAndroid Build Coastguard Worker case AMDGPU::V_CNDMASK_B32_e64:
102*9880d681SAndroid Build Coastguard Worker break;
103*9880d681SAndroid Build Coastguard Worker }
104*9880d681SAndroid Build Coastguard Worker }
105*9880d681SAndroid Build Coastguard Worker
106*9880d681SAndroid Build Coastguard Worker const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
107*9880d681SAndroid Build Coastguard Worker const MachineOperand *Src1Mod =
108*9880d681SAndroid Build Coastguard Worker TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers);
109*9880d681SAndroid Build Coastguard Worker
110*9880d681SAndroid Build Coastguard Worker if (Src1 && (!isVGPR(Src1, TRI, MRI) || (Src1Mod && Src1Mod->getImm() != 0)))
111*9880d681SAndroid Build Coastguard Worker return false;
112*9880d681SAndroid Build Coastguard Worker
113*9880d681SAndroid Build Coastguard Worker // We don't need to check src0, all input types are legal, so just make sure
114*9880d681SAndroid Build Coastguard Worker // src0 isn't using any modifiers.
115*9880d681SAndroid Build Coastguard Worker if (TII->hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers))
116*9880d681SAndroid Build Coastguard Worker return false;
117*9880d681SAndroid Build Coastguard Worker
118*9880d681SAndroid Build Coastguard Worker // Check output modifiers
119*9880d681SAndroid Build Coastguard Worker if (TII->hasModifiersSet(MI, AMDGPU::OpName::omod))
120*9880d681SAndroid Build Coastguard Worker return false;
121*9880d681SAndroid Build Coastguard Worker
122*9880d681SAndroid Build Coastguard Worker return !TII->hasModifiersSet(MI, AMDGPU::OpName::clamp);
123*9880d681SAndroid Build Coastguard Worker }
124*9880d681SAndroid Build Coastguard Worker
125*9880d681SAndroid Build Coastguard Worker /// \brief This function checks \p MI for operands defined by a move immediate
126*9880d681SAndroid Build Coastguard Worker /// instruction and then folds the literal constant into the instruction if it
127*9880d681SAndroid Build Coastguard Worker /// can. This function assumes that \p MI is a VOP1, VOP2, or VOPC instruction
128*9880d681SAndroid Build Coastguard Worker /// and will only fold literal constants if we are still in SSA.
foldImmediates(MachineInstr & MI,const SIInstrInfo * TII,MachineRegisterInfo & MRI,bool TryToCommute=true)129*9880d681SAndroid Build Coastguard Worker static void foldImmediates(MachineInstr &MI, const SIInstrInfo *TII,
130*9880d681SAndroid Build Coastguard Worker MachineRegisterInfo &MRI, bool TryToCommute = true) {
131*9880d681SAndroid Build Coastguard Worker
132*9880d681SAndroid Build Coastguard Worker if (!MRI.isSSA())
133*9880d681SAndroid Build Coastguard Worker return;
134*9880d681SAndroid Build Coastguard Worker
135*9880d681SAndroid Build Coastguard Worker assert(TII->isVOP1(MI) || TII->isVOP2(MI) || TII->isVOPC(MI));
136*9880d681SAndroid Build Coastguard Worker
137*9880d681SAndroid Build Coastguard Worker const SIRegisterInfo &TRI = TII->getRegisterInfo();
138*9880d681SAndroid Build Coastguard Worker int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
139*9880d681SAndroid Build Coastguard Worker MachineOperand &Src0 = MI.getOperand(Src0Idx);
140*9880d681SAndroid Build Coastguard Worker
141*9880d681SAndroid Build Coastguard Worker // Only one literal constant is allowed per instruction, so if src0 is a
142*9880d681SAndroid Build Coastguard Worker // literal constant then we can't do any folding.
143*9880d681SAndroid Build Coastguard Worker if (Src0.isImm() &&
144*9880d681SAndroid Build Coastguard Worker TII->isLiteralConstant(Src0, TII->getOpSize(MI, Src0Idx)))
145*9880d681SAndroid Build Coastguard Worker return;
146*9880d681SAndroid Build Coastguard Worker
147*9880d681SAndroid Build Coastguard Worker // Literal constants and SGPRs can only be used in Src0, so if Src0 is an
148*9880d681SAndroid Build Coastguard Worker // SGPR, we cannot commute the instruction, so we can't fold any literal
149*9880d681SAndroid Build Coastguard Worker // constants.
150*9880d681SAndroid Build Coastguard Worker if (Src0.isReg() && !isVGPR(&Src0, TRI, MRI))
151*9880d681SAndroid Build Coastguard Worker return;
152*9880d681SAndroid Build Coastguard Worker
153*9880d681SAndroid Build Coastguard Worker // Try to fold Src0
154*9880d681SAndroid Build Coastguard Worker if (Src0.isReg() && MRI.hasOneUse(Src0.getReg())) {
155*9880d681SAndroid Build Coastguard Worker unsigned Reg = Src0.getReg();
156*9880d681SAndroid Build Coastguard Worker MachineInstr *Def = MRI.getUniqueVRegDef(Reg);
157*9880d681SAndroid Build Coastguard Worker if (Def && Def->isMoveImmediate()) {
158*9880d681SAndroid Build Coastguard Worker MachineOperand &MovSrc = Def->getOperand(1);
159*9880d681SAndroid Build Coastguard Worker bool ConstantFolded = false;
160*9880d681SAndroid Build Coastguard Worker
161*9880d681SAndroid Build Coastguard Worker if (MovSrc.isImm() && isUInt<32>(MovSrc.getImm())) {
162*9880d681SAndroid Build Coastguard Worker Src0.ChangeToImmediate(MovSrc.getImm());
163*9880d681SAndroid Build Coastguard Worker ConstantFolded = true;
164*9880d681SAndroid Build Coastguard Worker }
165*9880d681SAndroid Build Coastguard Worker if (ConstantFolded) {
166*9880d681SAndroid Build Coastguard Worker if (MRI.use_empty(Reg))
167*9880d681SAndroid Build Coastguard Worker Def->eraseFromParent();
168*9880d681SAndroid Build Coastguard Worker ++NumLiteralConstantsFolded;
169*9880d681SAndroid Build Coastguard Worker return;
170*9880d681SAndroid Build Coastguard Worker }
171*9880d681SAndroid Build Coastguard Worker }
172*9880d681SAndroid Build Coastguard Worker }
173*9880d681SAndroid Build Coastguard Worker
174*9880d681SAndroid Build Coastguard Worker // We have failed to fold src0, so commute the instruction and try again.
175*9880d681SAndroid Build Coastguard Worker if (TryToCommute && MI.isCommutable() && TII->commuteInstruction(MI))
176*9880d681SAndroid Build Coastguard Worker foldImmediates(MI, TII, MRI, false);
177*9880d681SAndroid Build Coastguard Worker
178*9880d681SAndroid Build Coastguard Worker }
179*9880d681SAndroid Build Coastguard Worker
180*9880d681SAndroid Build Coastguard Worker // Copy MachineOperand with all flags except setting it as implicit.
copyFlagsToImplicitVCC(MachineInstr & MI,const MachineOperand & Orig)181*9880d681SAndroid Build Coastguard Worker static void copyFlagsToImplicitVCC(MachineInstr &MI,
182*9880d681SAndroid Build Coastguard Worker const MachineOperand &Orig) {
183*9880d681SAndroid Build Coastguard Worker
184*9880d681SAndroid Build Coastguard Worker for (MachineOperand &Use : MI.implicit_operands()) {
185*9880d681SAndroid Build Coastguard Worker if (Use.getReg() == AMDGPU::VCC) {
186*9880d681SAndroid Build Coastguard Worker Use.setIsUndef(Orig.isUndef());
187*9880d681SAndroid Build Coastguard Worker Use.setIsKill(Orig.isKill());
188*9880d681SAndroid Build Coastguard Worker return;
189*9880d681SAndroid Build Coastguard Worker }
190*9880d681SAndroid Build Coastguard Worker }
191*9880d681SAndroid Build Coastguard Worker }
192*9880d681SAndroid Build Coastguard Worker
isKImmOperand(const SIInstrInfo * TII,const MachineOperand & Src)193*9880d681SAndroid Build Coastguard Worker static bool isKImmOperand(const SIInstrInfo *TII, const MachineOperand &Src) {
194*9880d681SAndroid Build Coastguard Worker return isInt<16>(Src.getImm()) && !TII->isInlineConstant(Src, 4);
195*9880d681SAndroid Build Coastguard Worker }
196*9880d681SAndroid Build Coastguard Worker
runOnMachineFunction(MachineFunction & MF)197*9880d681SAndroid Build Coastguard Worker bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
198*9880d681SAndroid Build Coastguard Worker if (skipFunction(*MF.getFunction()))
199*9880d681SAndroid Build Coastguard Worker return false;
200*9880d681SAndroid Build Coastguard Worker
201*9880d681SAndroid Build Coastguard Worker MachineRegisterInfo &MRI = MF.getRegInfo();
202*9880d681SAndroid Build Coastguard Worker const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
203*9880d681SAndroid Build Coastguard Worker const SIInstrInfo *TII = ST.getInstrInfo();
204*9880d681SAndroid Build Coastguard Worker const SIRegisterInfo &TRI = TII->getRegisterInfo();
205*9880d681SAndroid Build Coastguard Worker
206*9880d681SAndroid Build Coastguard Worker std::vector<unsigned> I1Defs;
207*9880d681SAndroid Build Coastguard Worker
208*9880d681SAndroid Build Coastguard Worker for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
209*9880d681SAndroid Build Coastguard Worker BI != BE; ++BI) {
210*9880d681SAndroid Build Coastguard Worker
211*9880d681SAndroid Build Coastguard Worker MachineBasicBlock &MBB = *BI;
212*9880d681SAndroid Build Coastguard Worker MachineBasicBlock::iterator I, Next;
213*9880d681SAndroid Build Coastguard Worker for (I = MBB.begin(); I != MBB.end(); I = Next) {
214*9880d681SAndroid Build Coastguard Worker Next = std::next(I);
215*9880d681SAndroid Build Coastguard Worker MachineInstr &MI = *I;
216*9880d681SAndroid Build Coastguard Worker
217*9880d681SAndroid Build Coastguard Worker if (MI.getOpcode() == AMDGPU::V_MOV_B32_e32) {
218*9880d681SAndroid Build Coastguard Worker // If this has a literal constant source that is the same as the
219*9880d681SAndroid Build Coastguard Worker // reversed bits of an inline immediate, replace with a bitreverse of
220*9880d681SAndroid Build Coastguard Worker // that constant. This saves 4 bytes in the common case of materializing
221*9880d681SAndroid Build Coastguard Worker // sign bits.
222*9880d681SAndroid Build Coastguard Worker
223*9880d681SAndroid Build Coastguard Worker // Test if we are after regalloc. We only want to do this after any
224*9880d681SAndroid Build Coastguard Worker // optimizations happen because this will confuse them.
225*9880d681SAndroid Build Coastguard Worker // XXX - not exactly a check for post-regalloc run.
226*9880d681SAndroid Build Coastguard Worker MachineOperand &Src = MI.getOperand(1);
227*9880d681SAndroid Build Coastguard Worker if (Src.isImm() &&
228*9880d681SAndroid Build Coastguard Worker TargetRegisterInfo::isPhysicalRegister(MI.getOperand(0).getReg())) {
229*9880d681SAndroid Build Coastguard Worker int64_t Imm = Src.getImm();
230*9880d681SAndroid Build Coastguard Worker if (isInt<32>(Imm) && !TII->isInlineConstant(Src, 4)) {
231*9880d681SAndroid Build Coastguard Worker int32_t ReverseImm = reverseBits<int32_t>(static_cast<int32_t>(Imm));
232*9880d681SAndroid Build Coastguard Worker if (ReverseImm >= -16 && ReverseImm <= 64) {
233*9880d681SAndroid Build Coastguard Worker MI.setDesc(TII->get(AMDGPU::V_BFREV_B32_e32));
234*9880d681SAndroid Build Coastguard Worker Src.setImm(ReverseImm);
235*9880d681SAndroid Build Coastguard Worker continue;
236*9880d681SAndroid Build Coastguard Worker }
237*9880d681SAndroid Build Coastguard Worker }
238*9880d681SAndroid Build Coastguard Worker }
239*9880d681SAndroid Build Coastguard Worker }
240*9880d681SAndroid Build Coastguard Worker
241*9880d681SAndroid Build Coastguard Worker // Combine adjacent s_nops to use the immediate operand encoding how long
242*9880d681SAndroid Build Coastguard Worker // to wait.
243*9880d681SAndroid Build Coastguard Worker //
244*9880d681SAndroid Build Coastguard Worker // s_nop N
245*9880d681SAndroid Build Coastguard Worker // s_nop M
246*9880d681SAndroid Build Coastguard Worker // =>
247*9880d681SAndroid Build Coastguard Worker // s_nop (N + M)
248*9880d681SAndroid Build Coastguard Worker if (MI.getOpcode() == AMDGPU::S_NOP &&
249*9880d681SAndroid Build Coastguard Worker Next != MBB.end() &&
250*9880d681SAndroid Build Coastguard Worker (*Next).getOpcode() == AMDGPU::S_NOP) {
251*9880d681SAndroid Build Coastguard Worker
252*9880d681SAndroid Build Coastguard Worker MachineInstr &NextMI = *Next;
253*9880d681SAndroid Build Coastguard Worker // The instruction encodes the amount to wait with an offset of 1,
254*9880d681SAndroid Build Coastguard Worker // i.e. 0 is wait 1 cycle. Convert both to cycles and then convert back
255*9880d681SAndroid Build Coastguard Worker // after adding.
256*9880d681SAndroid Build Coastguard Worker uint8_t Nop0 = MI.getOperand(0).getImm() + 1;
257*9880d681SAndroid Build Coastguard Worker uint8_t Nop1 = NextMI.getOperand(0).getImm() + 1;
258*9880d681SAndroid Build Coastguard Worker
259*9880d681SAndroid Build Coastguard Worker // Make sure we don't overflow the bounds.
260*9880d681SAndroid Build Coastguard Worker if (Nop0 + Nop1 <= 8) {
261*9880d681SAndroid Build Coastguard Worker NextMI.getOperand(0).setImm(Nop0 + Nop1 - 1);
262*9880d681SAndroid Build Coastguard Worker MI.eraseFromParent();
263*9880d681SAndroid Build Coastguard Worker }
264*9880d681SAndroid Build Coastguard Worker
265*9880d681SAndroid Build Coastguard Worker continue;
266*9880d681SAndroid Build Coastguard Worker }
267*9880d681SAndroid Build Coastguard Worker
268*9880d681SAndroid Build Coastguard Worker // FIXME: We also need to consider movs of constant operands since
269*9880d681SAndroid Build Coastguard Worker // immediate operands are not folded if they have more than one use, and
270*9880d681SAndroid Build Coastguard Worker // the operand folding pass is unaware if the immediate will be free since
271*9880d681SAndroid Build Coastguard Worker // it won't know if the src == dest constraint will end up being
272*9880d681SAndroid Build Coastguard Worker // satisfied.
273*9880d681SAndroid Build Coastguard Worker if (MI.getOpcode() == AMDGPU::S_ADD_I32 ||
274*9880d681SAndroid Build Coastguard Worker MI.getOpcode() == AMDGPU::S_MUL_I32) {
275*9880d681SAndroid Build Coastguard Worker const MachineOperand &Dest = MI.getOperand(0);
276*9880d681SAndroid Build Coastguard Worker const MachineOperand &Src0 = MI.getOperand(1);
277*9880d681SAndroid Build Coastguard Worker const MachineOperand &Src1 = MI.getOperand(2);
278*9880d681SAndroid Build Coastguard Worker
279*9880d681SAndroid Build Coastguard Worker // FIXME: This could work better if hints worked with subregisters. If
280*9880d681SAndroid Build Coastguard Worker // we have a vector add of a constant, we usually don't get the correct
281*9880d681SAndroid Build Coastguard Worker // allocation due to the subregister usage.
282*9880d681SAndroid Build Coastguard Worker if (TargetRegisterInfo::isVirtualRegister(Dest.getReg()) &&
283*9880d681SAndroid Build Coastguard Worker Src0.isReg()) {
284*9880d681SAndroid Build Coastguard Worker MRI.setRegAllocationHint(Dest.getReg(), 0, Src0.getReg());
285*9880d681SAndroid Build Coastguard Worker continue;
286*9880d681SAndroid Build Coastguard Worker }
287*9880d681SAndroid Build Coastguard Worker
288*9880d681SAndroid Build Coastguard Worker if (Src0.isReg() && Src0.getReg() == Dest.getReg()) {
289*9880d681SAndroid Build Coastguard Worker if (Src1.isImm() && isKImmOperand(TII, Src1)) {
290*9880d681SAndroid Build Coastguard Worker unsigned Opc = (MI.getOpcode() == AMDGPU::S_ADD_I32) ?
291*9880d681SAndroid Build Coastguard Worker AMDGPU::S_ADDK_I32 : AMDGPU::S_MULK_I32;
292*9880d681SAndroid Build Coastguard Worker
293*9880d681SAndroid Build Coastguard Worker MI.setDesc(TII->get(Opc));
294*9880d681SAndroid Build Coastguard Worker MI.tieOperands(0, 1);
295*9880d681SAndroid Build Coastguard Worker }
296*9880d681SAndroid Build Coastguard Worker }
297*9880d681SAndroid Build Coastguard Worker }
298*9880d681SAndroid Build Coastguard Worker
299*9880d681SAndroid Build Coastguard Worker // Try to use S_MOVK_I32, which will save 4 bytes for small immediates.
300*9880d681SAndroid Build Coastguard Worker if (MI.getOpcode() == AMDGPU::S_MOV_B32) {
301*9880d681SAndroid Build Coastguard Worker const MachineOperand &Src = MI.getOperand(1);
302*9880d681SAndroid Build Coastguard Worker
303*9880d681SAndroid Build Coastguard Worker if (Src.isImm() && isKImmOperand(TII, Src))
304*9880d681SAndroid Build Coastguard Worker MI.setDesc(TII->get(AMDGPU::S_MOVK_I32));
305*9880d681SAndroid Build Coastguard Worker
306*9880d681SAndroid Build Coastguard Worker continue;
307*9880d681SAndroid Build Coastguard Worker }
308*9880d681SAndroid Build Coastguard Worker
309*9880d681SAndroid Build Coastguard Worker if (!TII->hasVALU32BitEncoding(MI.getOpcode()))
310*9880d681SAndroid Build Coastguard Worker continue;
311*9880d681SAndroid Build Coastguard Worker
312*9880d681SAndroid Build Coastguard Worker if (!canShrink(MI, TII, TRI, MRI)) {
313*9880d681SAndroid Build Coastguard Worker // Try commuting the instruction and see if that enables us to shrink
314*9880d681SAndroid Build Coastguard Worker // it.
315*9880d681SAndroid Build Coastguard Worker if (!MI.isCommutable() || !TII->commuteInstruction(MI) ||
316*9880d681SAndroid Build Coastguard Worker !canShrink(MI, TII, TRI, MRI))
317*9880d681SAndroid Build Coastguard Worker continue;
318*9880d681SAndroid Build Coastguard Worker }
319*9880d681SAndroid Build Coastguard Worker
320*9880d681SAndroid Build Coastguard Worker // getVOPe32 could be -1 here if we started with an instruction that had
321*9880d681SAndroid Build Coastguard Worker // a 32-bit encoding and then commuted it to an instruction that did not.
322*9880d681SAndroid Build Coastguard Worker if (!TII->hasVALU32BitEncoding(MI.getOpcode()))
323*9880d681SAndroid Build Coastguard Worker continue;
324*9880d681SAndroid Build Coastguard Worker
325*9880d681SAndroid Build Coastguard Worker int Op32 = AMDGPU::getVOPe32(MI.getOpcode());
326*9880d681SAndroid Build Coastguard Worker
327*9880d681SAndroid Build Coastguard Worker if (TII->isVOPC(Op32)) {
328*9880d681SAndroid Build Coastguard Worker unsigned DstReg = MI.getOperand(0).getReg();
329*9880d681SAndroid Build Coastguard Worker if (TargetRegisterInfo::isVirtualRegister(DstReg)) {
330*9880d681SAndroid Build Coastguard Worker // VOPC instructions can only write to the VCC register. We can't
331*9880d681SAndroid Build Coastguard Worker // force them to use VCC here, because this is only one register and
332*9880d681SAndroid Build Coastguard Worker // cannot deal with sequences which would require multiple copies of
333*9880d681SAndroid Build Coastguard Worker // VCC, e.g. S_AND_B64 (vcc = V_CMP_...), (vcc = V_CMP_...)
334*9880d681SAndroid Build Coastguard Worker //
335*9880d681SAndroid Build Coastguard Worker // So, instead of forcing the instruction to write to VCC, we provide
336*9880d681SAndroid Build Coastguard Worker // a hint to the register allocator to use VCC and then we we will run
337*9880d681SAndroid Build Coastguard Worker // this pass again after RA and shrink it if it outputs to VCC.
338*9880d681SAndroid Build Coastguard Worker MRI.setRegAllocationHint(MI.getOperand(0).getReg(), 0, AMDGPU::VCC);
339*9880d681SAndroid Build Coastguard Worker continue;
340*9880d681SAndroid Build Coastguard Worker }
341*9880d681SAndroid Build Coastguard Worker if (DstReg != AMDGPU::VCC)
342*9880d681SAndroid Build Coastguard Worker continue;
343*9880d681SAndroid Build Coastguard Worker }
344*9880d681SAndroid Build Coastguard Worker
345*9880d681SAndroid Build Coastguard Worker if (Op32 == AMDGPU::V_CNDMASK_B32_e32) {
346*9880d681SAndroid Build Coastguard Worker // We shrink V_CNDMASK_B32_e64 using regalloc hints like we do for VOPC
347*9880d681SAndroid Build Coastguard Worker // instructions.
348*9880d681SAndroid Build Coastguard Worker const MachineOperand *Src2 =
349*9880d681SAndroid Build Coastguard Worker TII->getNamedOperand(MI, AMDGPU::OpName::src2);
350*9880d681SAndroid Build Coastguard Worker if (!Src2->isReg())
351*9880d681SAndroid Build Coastguard Worker continue;
352*9880d681SAndroid Build Coastguard Worker unsigned SReg = Src2->getReg();
353*9880d681SAndroid Build Coastguard Worker if (TargetRegisterInfo::isVirtualRegister(SReg)) {
354*9880d681SAndroid Build Coastguard Worker MRI.setRegAllocationHint(SReg, 0, AMDGPU::VCC);
355*9880d681SAndroid Build Coastguard Worker continue;
356*9880d681SAndroid Build Coastguard Worker }
357*9880d681SAndroid Build Coastguard Worker if (SReg != AMDGPU::VCC)
358*9880d681SAndroid Build Coastguard Worker continue;
359*9880d681SAndroid Build Coastguard Worker }
360*9880d681SAndroid Build Coastguard Worker
361*9880d681SAndroid Build Coastguard Worker // We can shrink this instruction
362*9880d681SAndroid Build Coastguard Worker DEBUG(dbgs() << "Shrinking " << MI);
363*9880d681SAndroid Build Coastguard Worker
364*9880d681SAndroid Build Coastguard Worker MachineInstrBuilder Inst32 =
365*9880d681SAndroid Build Coastguard Worker BuildMI(MBB, I, MI.getDebugLoc(), TII->get(Op32));
366*9880d681SAndroid Build Coastguard Worker
367*9880d681SAndroid Build Coastguard Worker // Add the dst operand if the 32-bit encoding also has an explicit $vdst.
368*9880d681SAndroid Build Coastguard Worker // For VOPC instructions, this is replaced by an implicit def of vcc.
369*9880d681SAndroid Build Coastguard Worker int Op32DstIdx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::vdst);
370*9880d681SAndroid Build Coastguard Worker if (Op32DstIdx != -1) {
371*9880d681SAndroid Build Coastguard Worker // dst
372*9880d681SAndroid Build Coastguard Worker Inst32.addOperand(MI.getOperand(0));
373*9880d681SAndroid Build Coastguard Worker } else {
374*9880d681SAndroid Build Coastguard Worker assert(MI.getOperand(0).getReg() == AMDGPU::VCC &&
375*9880d681SAndroid Build Coastguard Worker "Unexpected case");
376*9880d681SAndroid Build Coastguard Worker }
377*9880d681SAndroid Build Coastguard Worker
378*9880d681SAndroid Build Coastguard Worker
379*9880d681SAndroid Build Coastguard Worker Inst32.addOperand(*TII->getNamedOperand(MI, AMDGPU::OpName::src0));
380*9880d681SAndroid Build Coastguard Worker
381*9880d681SAndroid Build Coastguard Worker const MachineOperand *Src1 =
382*9880d681SAndroid Build Coastguard Worker TII->getNamedOperand(MI, AMDGPU::OpName::src1);
383*9880d681SAndroid Build Coastguard Worker if (Src1)
384*9880d681SAndroid Build Coastguard Worker Inst32.addOperand(*Src1);
385*9880d681SAndroid Build Coastguard Worker
386*9880d681SAndroid Build Coastguard Worker const MachineOperand *Src2 =
387*9880d681SAndroid Build Coastguard Worker TII->getNamedOperand(MI, AMDGPU::OpName::src2);
388*9880d681SAndroid Build Coastguard Worker if (Src2) {
389*9880d681SAndroid Build Coastguard Worker int Op32Src2Idx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::src2);
390*9880d681SAndroid Build Coastguard Worker if (Op32Src2Idx != -1) {
391*9880d681SAndroid Build Coastguard Worker Inst32.addOperand(*Src2);
392*9880d681SAndroid Build Coastguard Worker } else {
393*9880d681SAndroid Build Coastguard Worker // In the case of V_CNDMASK_B32_e32, the explicit operand src2 is
394*9880d681SAndroid Build Coastguard Worker // replaced with an implicit read of vcc. This was already added
395*9880d681SAndroid Build Coastguard Worker // during the initial BuildMI, so find it to preserve the flags.
396*9880d681SAndroid Build Coastguard Worker copyFlagsToImplicitVCC(*Inst32, *Src2);
397*9880d681SAndroid Build Coastguard Worker }
398*9880d681SAndroid Build Coastguard Worker }
399*9880d681SAndroid Build Coastguard Worker
400*9880d681SAndroid Build Coastguard Worker ++NumInstructionsShrunk;
401*9880d681SAndroid Build Coastguard Worker MI.eraseFromParent();
402*9880d681SAndroid Build Coastguard Worker
403*9880d681SAndroid Build Coastguard Worker foldImmediates(*Inst32, TII, MRI);
404*9880d681SAndroid Build Coastguard Worker DEBUG(dbgs() << "e32 MI = " << *Inst32 << '\n');
405*9880d681SAndroid Build Coastguard Worker
406*9880d681SAndroid Build Coastguard Worker
407*9880d681SAndroid Build Coastguard Worker }
408*9880d681SAndroid Build Coastguard Worker }
409*9880d681SAndroid Build Coastguard Worker return false;
410*9880d681SAndroid Build Coastguard Worker }
411