1*9880d681SAndroid Build Coastguard Worker //===-- NVPTXPeephole.cpp - NVPTX Peephole Optimiztions -------------------===//
2*9880d681SAndroid Build Coastguard Worker //
3*9880d681SAndroid Build Coastguard Worker // The LLVM Compiler Infrastructure
4*9880d681SAndroid Build Coastguard Worker //
5*9880d681SAndroid Build Coastguard Worker // This file is distributed under the University of Illinois Open Source
6*9880d681SAndroid Build Coastguard Worker // License. See LICENSE.TXT for details.
7*9880d681SAndroid Build Coastguard Worker //
8*9880d681SAndroid Build Coastguard Worker //===----------------------------------------------------------------------===//
9*9880d681SAndroid Build Coastguard Worker //
10*9880d681SAndroid Build Coastguard Worker // In NVPTX, NVPTXFrameLowering will emit following instruction at the beginning
11*9880d681SAndroid Build Coastguard Worker // of a MachineFunction.
12*9880d681SAndroid Build Coastguard Worker //
13*9880d681SAndroid Build Coastguard Worker // mov %SPL, %depot
14*9880d681SAndroid Build Coastguard Worker // cvta.local %SP, %SPL
15*9880d681SAndroid Build Coastguard Worker //
16*9880d681SAndroid Build Coastguard Worker // Because Frame Index is a generic address and alloca can only return generic
17*9880d681SAndroid Build Coastguard Worker // pointer, without this pass the instructions producing alloca'ed address will
18*9880d681SAndroid Build Coastguard Worker // be based on %SP. NVPTXLowerAlloca tends to help replace store and load on
19*9880d681SAndroid Build Coastguard Worker // this address with their .local versions, but this may introduce a lot of
20*9880d681SAndroid Build Coastguard Worker // cvta.to.local instructions. Performance can be improved if we avoid casting
21*9880d681SAndroid Build Coastguard Worker // address back and forth and directly calculate local address based on %SPL.
22*9880d681SAndroid Build Coastguard Worker // This peephole pass optimizes these cases, for example
23*9880d681SAndroid Build Coastguard Worker //
24*9880d681SAndroid Build Coastguard Worker // It will transform the following pattern
25*9880d681SAndroid Build Coastguard Worker // %vreg0<def> = LEA_ADDRi64 %VRFrame, 4
26*9880d681SAndroid Build Coastguard Worker // %vreg1<def> = cvta_to_local_yes_64 %vreg0
27*9880d681SAndroid Build Coastguard Worker //
28*9880d681SAndroid Build Coastguard Worker // into
29*9880d681SAndroid Build Coastguard Worker // %vreg1<def> = LEA_ADDRi64 %VRFrameLocal, 4
30*9880d681SAndroid Build Coastguard Worker //
31*9880d681SAndroid Build Coastguard Worker // %VRFrameLocal is the virtual register name of %SPL
32*9880d681SAndroid Build Coastguard Worker //
33*9880d681SAndroid Build Coastguard Worker //===----------------------------------------------------------------------===//
34*9880d681SAndroid Build Coastguard Worker
35*9880d681SAndroid Build Coastguard Worker #include "NVPTX.h"
36*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/MachineFunctionPass.h"
37*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/MachineInstrBuilder.h"
38*9880d681SAndroid Build Coastguard Worker #include "llvm/CodeGen/MachineRegisterInfo.h"
39*9880d681SAndroid Build Coastguard Worker #include "llvm/Target/TargetRegisterInfo.h"
40*9880d681SAndroid Build Coastguard Worker #include "llvm/Target/TargetInstrInfo.h"
41*9880d681SAndroid Build Coastguard Worker
42*9880d681SAndroid Build Coastguard Worker using namespace llvm;
43*9880d681SAndroid Build Coastguard Worker
44*9880d681SAndroid Build Coastguard Worker #define DEBUG_TYPE "nvptx-peephole"
45*9880d681SAndroid Build Coastguard Worker
46*9880d681SAndroid Build Coastguard Worker namespace llvm {
47*9880d681SAndroid Build Coastguard Worker void initializeNVPTXPeepholePass(PassRegistry &);
48*9880d681SAndroid Build Coastguard Worker }
49*9880d681SAndroid Build Coastguard Worker
50*9880d681SAndroid Build Coastguard Worker namespace {
51*9880d681SAndroid Build Coastguard Worker struct NVPTXPeephole : public MachineFunctionPass {
52*9880d681SAndroid Build Coastguard Worker public:
53*9880d681SAndroid Build Coastguard Worker static char ID;
NVPTXPeephole__anone21ae23f0111::NVPTXPeephole54*9880d681SAndroid Build Coastguard Worker NVPTXPeephole() : MachineFunctionPass(ID) {
55*9880d681SAndroid Build Coastguard Worker initializeNVPTXPeepholePass(*PassRegistry::getPassRegistry());
56*9880d681SAndroid Build Coastguard Worker }
57*9880d681SAndroid Build Coastguard Worker
58*9880d681SAndroid Build Coastguard Worker bool runOnMachineFunction(MachineFunction &MF) override;
59*9880d681SAndroid Build Coastguard Worker
getPassName__anone21ae23f0111::NVPTXPeephole60*9880d681SAndroid Build Coastguard Worker const char *getPassName() const override {
61*9880d681SAndroid Build Coastguard Worker return "NVPTX optimize redundant cvta.to.local instruction";
62*9880d681SAndroid Build Coastguard Worker }
63*9880d681SAndroid Build Coastguard Worker
getAnalysisUsage__anone21ae23f0111::NVPTXPeephole64*9880d681SAndroid Build Coastguard Worker void getAnalysisUsage(AnalysisUsage &AU) const override {
65*9880d681SAndroid Build Coastguard Worker MachineFunctionPass::getAnalysisUsage(AU);
66*9880d681SAndroid Build Coastguard Worker }
67*9880d681SAndroid Build Coastguard Worker };
68*9880d681SAndroid Build Coastguard Worker }
69*9880d681SAndroid Build Coastguard Worker
70*9880d681SAndroid Build Coastguard Worker char NVPTXPeephole::ID = 0;
71*9880d681SAndroid Build Coastguard Worker
72*9880d681SAndroid Build Coastguard Worker INITIALIZE_PASS(NVPTXPeephole, "nvptx-peephole", "NVPTX Peephole", false, false)
73*9880d681SAndroid Build Coastguard Worker
isCVTAToLocalCombinationCandidate(MachineInstr & Root)74*9880d681SAndroid Build Coastguard Worker static bool isCVTAToLocalCombinationCandidate(MachineInstr &Root) {
75*9880d681SAndroid Build Coastguard Worker auto &MBB = *Root.getParent();
76*9880d681SAndroid Build Coastguard Worker auto &MF = *MBB.getParent();
77*9880d681SAndroid Build Coastguard Worker // Check current instruction is cvta.to.local
78*9880d681SAndroid Build Coastguard Worker if (Root.getOpcode() != NVPTX::cvta_to_local_yes_64 &&
79*9880d681SAndroid Build Coastguard Worker Root.getOpcode() != NVPTX::cvta_to_local_yes)
80*9880d681SAndroid Build Coastguard Worker return false;
81*9880d681SAndroid Build Coastguard Worker
82*9880d681SAndroid Build Coastguard Worker auto &Op = Root.getOperand(1);
83*9880d681SAndroid Build Coastguard Worker const auto &MRI = MF.getRegInfo();
84*9880d681SAndroid Build Coastguard Worker MachineInstr *GenericAddrDef = nullptr;
85*9880d681SAndroid Build Coastguard Worker if (Op.isReg() && TargetRegisterInfo::isVirtualRegister(Op.getReg())) {
86*9880d681SAndroid Build Coastguard Worker GenericAddrDef = MRI.getUniqueVRegDef(Op.getReg());
87*9880d681SAndroid Build Coastguard Worker }
88*9880d681SAndroid Build Coastguard Worker
89*9880d681SAndroid Build Coastguard Worker // Check the register operand is uniquely defined by LEA_ADDRi instruction
90*9880d681SAndroid Build Coastguard Worker if (!GenericAddrDef || GenericAddrDef->getParent() != &MBB ||
91*9880d681SAndroid Build Coastguard Worker (GenericAddrDef->getOpcode() != NVPTX::LEA_ADDRi64 &&
92*9880d681SAndroid Build Coastguard Worker GenericAddrDef->getOpcode() != NVPTX::LEA_ADDRi)) {
93*9880d681SAndroid Build Coastguard Worker return false;
94*9880d681SAndroid Build Coastguard Worker }
95*9880d681SAndroid Build Coastguard Worker
96*9880d681SAndroid Build Coastguard Worker // Check the LEA_ADDRi operand is Frame index
97*9880d681SAndroid Build Coastguard Worker auto &BaseAddrOp = GenericAddrDef->getOperand(1);
98*9880d681SAndroid Build Coastguard Worker if (BaseAddrOp.isReg() && BaseAddrOp.getReg() == NVPTX::VRFrame) {
99*9880d681SAndroid Build Coastguard Worker return true;
100*9880d681SAndroid Build Coastguard Worker }
101*9880d681SAndroid Build Coastguard Worker
102*9880d681SAndroid Build Coastguard Worker return false;
103*9880d681SAndroid Build Coastguard Worker }
104*9880d681SAndroid Build Coastguard Worker
CombineCVTAToLocal(MachineInstr & Root)105*9880d681SAndroid Build Coastguard Worker static void CombineCVTAToLocal(MachineInstr &Root) {
106*9880d681SAndroid Build Coastguard Worker auto &MBB = *Root.getParent();
107*9880d681SAndroid Build Coastguard Worker auto &MF = *MBB.getParent();
108*9880d681SAndroid Build Coastguard Worker const auto &MRI = MF.getRegInfo();
109*9880d681SAndroid Build Coastguard Worker const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
110*9880d681SAndroid Build Coastguard Worker auto &Prev = *MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
111*9880d681SAndroid Build Coastguard Worker
112*9880d681SAndroid Build Coastguard Worker MachineInstrBuilder MIB =
113*9880d681SAndroid Build Coastguard Worker BuildMI(MF, Root.getDebugLoc(), TII->get(Prev.getOpcode()),
114*9880d681SAndroid Build Coastguard Worker Root.getOperand(0).getReg())
115*9880d681SAndroid Build Coastguard Worker .addReg(NVPTX::VRFrameLocal)
116*9880d681SAndroid Build Coastguard Worker .addOperand(Prev.getOperand(2));
117*9880d681SAndroid Build Coastguard Worker
118*9880d681SAndroid Build Coastguard Worker MBB.insert((MachineBasicBlock::iterator)&Root, MIB);
119*9880d681SAndroid Build Coastguard Worker
120*9880d681SAndroid Build Coastguard Worker // Check if MRI has only one non dbg use, which is Root
121*9880d681SAndroid Build Coastguard Worker if (MRI.hasOneNonDBGUse(Prev.getOperand(0).getReg())) {
122*9880d681SAndroid Build Coastguard Worker Prev.eraseFromParentAndMarkDBGValuesForRemoval();
123*9880d681SAndroid Build Coastguard Worker }
124*9880d681SAndroid Build Coastguard Worker Root.eraseFromParentAndMarkDBGValuesForRemoval();
125*9880d681SAndroid Build Coastguard Worker }
126*9880d681SAndroid Build Coastguard Worker
runOnMachineFunction(MachineFunction & MF)127*9880d681SAndroid Build Coastguard Worker bool NVPTXPeephole::runOnMachineFunction(MachineFunction &MF) {
128*9880d681SAndroid Build Coastguard Worker if (skipFunction(*MF.getFunction()))
129*9880d681SAndroid Build Coastguard Worker return false;
130*9880d681SAndroid Build Coastguard Worker
131*9880d681SAndroid Build Coastguard Worker bool Changed = false;
132*9880d681SAndroid Build Coastguard Worker // Loop over all of the basic blocks.
133*9880d681SAndroid Build Coastguard Worker for (auto &MBB : MF) {
134*9880d681SAndroid Build Coastguard Worker // Traverse the basic block.
135*9880d681SAndroid Build Coastguard Worker auto BlockIter = MBB.begin();
136*9880d681SAndroid Build Coastguard Worker
137*9880d681SAndroid Build Coastguard Worker while (BlockIter != MBB.end()) {
138*9880d681SAndroid Build Coastguard Worker auto &MI = *BlockIter++;
139*9880d681SAndroid Build Coastguard Worker if (isCVTAToLocalCombinationCandidate(MI)) {
140*9880d681SAndroid Build Coastguard Worker CombineCVTAToLocal(MI);
141*9880d681SAndroid Build Coastguard Worker Changed = true;
142*9880d681SAndroid Build Coastguard Worker }
143*9880d681SAndroid Build Coastguard Worker } // Instruction
144*9880d681SAndroid Build Coastguard Worker } // Basic Block
145*9880d681SAndroid Build Coastguard Worker
146*9880d681SAndroid Build Coastguard Worker // Remove unnecessary %VRFrame = cvta.local %VRFrameLocal
147*9880d681SAndroid Build Coastguard Worker const auto &MRI = MF.getRegInfo();
148*9880d681SAndroid Build Coastguard Worker if (MRI.use_empty(NVPTX::VRFrame)) {
149*9880d681SAndroid Build Coastguard Worker if (auto MI = MRI.getUniqueVRegDef(NVPTX::VRFrame)) {
150*9880d681SAndroid Build Coastguard Worker MI->eraseFromParentAndMarkDBGValuesForRemoval();
151*9880d681SAndroid Build Coastguard Worker }
152*9880d681SAndroid Build Coastguard Worker }
153*9880d681SAndroid Build Coastguard Worker
154*9880d681SAndroid Build Coastguard Worker return Changed;
155*9880d681SAndroid Build Coastguard Worker }
156*9880d681SAndroid Build Coastguard Worker
createNVPTXPeephole()157*9880d681SAndroid Build Coastguard Worker MachineFunctionPass *llvm::createNVPTXPeephole() { return new NVPTXPeephole(); }
158