1 //===-- X86FastPreTileConfig.cpp - Fast Tile Register Configure------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file Pass to preconfig the shape of physical tile registers
10 /// It inserts ldtilecfg ahead of each group of tile registers. The algorithm
11 /// walk each instruction of basic block in reverse order. All the tile
12 /// registers that live out the basic block would be spilled and reloaded
13 /// before its user. It also check the depenedency of the shape to ensure
14 /// the shape is defined before ldtilecfg.
15 //
16 //===----------------------------------------------------------------------===//
17
18 #include "X86.h"
19 #include "X86InstrBuilder.h"
20 #include "X86MachineFunctionInfo.h"
21 #include "X86RegisterInfo.h"
22 #include "X86Subtarget.h"
23 #include "llvm/ADT/DepthFirstIterator.h"
24 #include "llvm/ADT/PostOrderIterator.h"
25 #include "llvm/ADT/Statistic.h"
26 #include "llvm/CodeGen/MachineFrameInfo.h"
27 #include "llvm/CodeGen/MachineFunctionPass.h"
28 #include "llvm/CodeGen/MachineInstr.h"
29 #include "llvm/CodeGen/MachineRegisterInfo.h"
30 #include "llvm/CodeGen/Passes.h"
31 #include "llvm/CodeGen/TargetInstrInfo.h"
32 #include "llvm/CodeGen/TargetRegisterInfo.h"
33 #include "llvm/InitializePasses.h"
34 #include "llvm/Support/Debug.h"
35
36 using namespace llvm;
37
38 #define DEBUG_TYPE "fastpretileconfig"
39
40 STATISTIC(NumStores, "Number of stores added");
41 STATISTIC(NumLoads, "Number of loads added");
42
43 namespace {
44
45 class X86FastPreTileConfig : public MachineFunctionPass {
46 MachineFunction *MF = nullptr;
47 const X86Subtarget *ST = nullptr;
48 const TargetInstrInfo *TII = nullptr;
49 MachineRegisterInfo *MRI = nullptr;
50 X86MachineFunctionInfo *X86FI = nullptr;
51 MachineFrameInfo *MFI = nullptr;
52 const TargetRegisterInfo *TRI = nullptr;
53 MachineBasicBlock *MBB = nullptr;
54 int CfgSS = -1;
55 struct PHIInfo {
56 Register Row;
57 Register Col;
58 Register StackAddr;
59 };
60 DenseMap<MachineInstr *, struct PHIInfo> VisitedPHIs;
61
62 /// Maps virtual regs to the frame index where these values are spilled.
63 IndexedMap<int, VirtReg2IndexFunctor> StackSlotForVirtReg;
64
65 /// Has a bit set for tile virtual register for which it was determined
66 /// that it is alive across blocks.
67 BitVector MayLiveAcrossBlocks;
68
69 int getStackSpaceFor(Register VirtReg);
70 void InitializeTileConfigStackSpace();
71 bool mayLiveOut(Register VirtReg, MachineInstr *CfgMI);
72 void spill(MachineBasicBlock::iterator Before, Register VirtReg, bool Kill);
73 void reload(MachineBasicBlock::iterator UseMI, Register VirtReg,
74 MachineOperand *RowMO, MachineOperand *ColMO);
75 void canonicalizePHIs(MachineBasicBlock &MBB);
76 void convertPHI(MachineBasicBlock *MBB, MachineInstr &PHI);
77 void convertPHIs(MachineBasicBlock &MBB);
78 bool configBasicBlock(MachineBasicBlock &MBB);
79
80 public:
X86FastPreTileConfig()81 X86FastPreTileConfig() : MachineFunctionPass(ID), StackSlotForVirtReg(-1) {}
82
83 /// Return the pass name.
getPassName() const84 StringRef getPassName() const override {
85 return "Fast Tile Register Preconfigure";
86 }
87
88 /// Perform tile register configure.
89 bool runOnMachineFunction(MachineFunction &MFunc) override;
90
91 static char ID;
92 };
93
94 } // end anonymous namespace
95
96 char X86FastPreTileConfig::ID = 0;
97
98 INITIALIZE_PASS_BEGIN(X86FastPreTileConfig, DEBUG_TYPE,
99 "Fast Tile Register Preconfigure", false, false)
100 INITIALIZE_PASS_END(X86FastPreTileConfig, DEBUG_TYPE,
101 "Fast Tile Register Preconfigure", false, false)
102
dominates(MachineBasicBlock & MBB,MachineBasicBlock::const_iterator A,MachineBasicBlock::const_iterator B)103 static bool dominates(MachineBasicBlock &MBB,
104 MachineBasicBlock::const_iterator A,
105 MachineBasicBlock::const_iterator B) {
106 auto MBBEnd = MBB.end();
107 if (B == MBBEnd)
108 return true;
109
110 MachineBasicBlock::const_iterator I = MBB.begin();
111 for (; &*I != A && &*I != B; ++I)
112 ;
113
114 return &*I == A;
115 }
116
117 /// This allocates space for the specified virtual register to be held on the
118 /// stack.
getStackSpaceFor(Register VirtReg)119 int X86FastPreTileConfig::getStackSpaceFor(Register VirtReg) {
120 // Find the location Reg would belong...
121 int SS = StackSlotForVirtReg[VirtReg];
122 // Already has space allocated?
123 if (SS != -1)
124 return SS;
125
126 // Allocate a new stack object for this spill location...
127 const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
128 unsigned Size = TRI->getSpillSize(RC);
129 Align Alignment = TRI->getSpillAlign(RC);
130 int FrameIdx = MFI->CreateSpillStackObject(Size, Alignment);
131
132 // Assign the slot.
133 StackSlotForVirtReg[VirtReg] = FrameIdx;
134 return FrameIdx;
135 }
136
137 /// Returns false if \p VirtReg is known to not live out of the current config.
138 /// If \p VirtReg live out of the current MBB, it must live out of the current
139 /// config
mayLiveOut(Register VirtReg,MachineInstr * CfgMI)140 bool X86FastPreTileConfig::mayLiveOut(Register VirtReg, MachineInstr *CfgMI) {
141 if (MayLiveAcrossBlocks.test(Register::virtReg2Index(VirtReg)))
142 return true;
143
144 for (const MachineInstr &UseInst : MRI->use_nodbg_instructions(VirtReg)) {
145 if (UseInst.getParent() != MBB) {
146 MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg));
147 return true;
148 }
149
150 // The use and def are in the same MBB. If the tile register is
151 // reconfigured, it is crobbered and we need to spill and reload
152 // tile register.
153 if (CfgMI) {
154 if (dominates(*MBB, *CfgMI, UseInst)) {
155 MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg));
156 return true;
157 }
158 }
159 }
160
161 return false;
162 }
163
InitializeTileConfigStackSpace()164 void X86FastPreTileConfig::InitializeTileConfigStackSpace() {
165 MachineBasicBlock &MBB = MF->front();
166 MachineInstr *MI = &*MBB.getFirstNonPHI();
167 DebugLoc DL;
168 if (ST->hasAVX512()) {
169 Register Zmm = MRI->createVirtualRegister(&X86::VR512RegClass);
170 BuildMI(MBB, MI, DL, TII->get(X86::AVX512_512_SET0), Zmm);
171 addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSZmr)), CfgSS)
172 .addReg(Zmm);
173 } else if (ST->hasAVX2()) {
174 Register Ymm = MRI->createVirtualRegister(&X86::VR256RegClass);
175 BuildMI(MBB, MI, DL, TII->get(X86::AVX_SET0), Ymm);
176 addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSYmr)), CfgSS)
177 .addReg(Ymm);
178 addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSYmr)), CfgSS,
179 32)
180 .addReg(Ymm);
181 } else {
182 assert(ST->hasSSE2() && "AMX should assume SSE2 enabled");
183 unsigned StoreOpc = ST->hasAVX() ? X86::VMOVUPSmr : X86::MOVUPSmr;
184 Register Xmm = MRI->createVirtualRegister(&X86::VR128RegClass);
185 BuildMI(MBB, MI, DL, TII->get(X86::V_SET0), Xmm);
186 addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS)
187 .addReg(Xmm);
188 addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS, 16)
189 .addReg(Xmm);
190 addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS, 32)
191 .addReg(Xmm);
192 addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS, 48)
193 .addReg(Xmm);
194 }
195 // Fill in the palette first.
196 addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::MOV8mi)), CfgSS)
197 .addImm(1);
198 }
199
200 /// Insert spill instruction for \p AssignedReg before \p Before.
201 /// TODO: Update DBG_VALUEs with \p VirtReg operands with the stack slot.
spill(MachineBasicBlock::iterator Before,Register VirtReg,bool Kill)202 void X86FastPreTileConfig::spill(MachineBasicBlock::iterator Before,
203 Register VirtReg, bool Kill) {
204 LLVM_DEBUG(dbgs() << "Spilling " << printReg(VirtReg, TRI) << " \n");
205 int FI = getStackSpaceFor(VirtReg);
206 LLVM_DEBUG(dbgs() << " to stack slot #" << FI << '\n');
207
208 const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
209 // Don't need shape information for tile store, becasue it is adjacent to
210 // the tile def instruction.
211 TII->storeRegToStackSlot(*MBB, Before, VirtReg, Kill, FI, &RC, TRI,
212 Register());
213 ++NumStores;
214
215 // TODO: update DBG_VALUEs
216 }
217
218 /// Insert reload instruction for \p PhysReg before \p Before.
reload(MachineBasicBlock::iterator UseMI,Register OrigReg,MachineOperand * RowMO,MachineOperand * ColMO)219 void X86FastPreTileConfig::reload(MachineBasicBlock::iterator UseMI,
220 Register OrigReg, MachineOperand *RowMO,
221 MachineOperand *ColMO) {
222 int FI = getStackSpaceFor(OrigReg);
223 const TargetRegisterClass &RC = *MRI->getRegClass(OrigReg);
224 Register TileReg;
225 // Fold copy to tileload
226 // BB1:
227 // spill src to s
228 //
229 // BB2:
230 // t = copy src
231 // -->
232 // t = tileload (s)
233 if (UseMI->isCopy())
234 TileReg = UseMI->getOperand(0).getReg();
235 else
236 TileReg = MRI->createVirtualRegister(&RC);
237 // Can't use TII->loadRegFromStackSlot(), because we need the shape
238 // information for reload.
239 // tileloadd (%sp, %idx), %tmm
240 unsigned Opc = X86::PTILELOADDV;
241 Register StrideReg = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass);
242 // FIXME: MBB is not the parent of UseMI.
243 MachineInstr *NewMI = BuildMI(*UseMI->getParent(), UseMI, DebugLoc(),
244 TII->get(X86::MOV64ri), StrideReg)
245 .addImm(64);
246 NewMI = addFrameReference(
247 BuildMI(*UseMI->getParent(), UseMI, DebugLoc(), TII->get(Opc), TileReg)
248 .addReg(RowMO->getReg())
249 .addReg(ColMO->getReg()),
250 FI);
251 MachineOperand &MO = NewMI->getOperand(5);
252 MO.setReg(StrideReg);
253 MO.setIsKill(true);
254 RowMO->setIsKill(false);
255 ColMO->setIsKill(false);
256 // Erase copy instruction after it is folded.
257 if (UseMI->isCopy()) {
258 UseMI->eraseFromParent();
259 } else {
260 // Replace the register in the user MI.
261 for (auto &MO : UseMI->operands()) {
262 if (MO.isReg() && MO.getReg() == OrigReg)
263 MO.setReg(TileReg);
264 }
265 }
266
267 ++NumLoads;
268 LLVM_DEBUG(dbgs() << "Reloading " << printReg(OrigReg, TRI) << " into "
269 << printReg(TileReg, TRI) << '\n');
270 }
271
isTileDef(MachineRegisterInfo * MRI,MachineInstr & MI)272 static bool isTileDef(MachineRegisterInfo *MRI, MachineInstr &MI) {
273 // The instruction must have 3 operands: tile def, row, col.
274 if (MI.isDebugInstr() || MI.getNumOperands() < 3 || !MI.isPseudo())
275 return false;
276 MachineOperand &MO = MI.getOperand(0);
277
278 if (MO.isReg()) {
279 Register Reg = MO.getReg();
280 // FIXME it may be used after Greedy RA and the physical
281 // register is not rewritten yet.
282 if (Reg.isVirtual() &&
283 MRI->getRegClass(Reg)->getID() == X86::TILERegClassID)
284 return true;
285 if (Reg >= X86::TMM0 && Reg <= X86::TMM7)
286 return true;
287 }
288
289 return false;
290 }
291
getShape(MachineRegisterInfo * MRI,Register TileReg)292 static ShapeT getShape(MachineRegisterInfo *MRI, Register TileReg) {
293 MachineInstr *MI = MRI->getVRegDef(TileReg);
294 if (isTileDef(MRI, *MI)) {
295 MachineOperand *RowMO = &MI->getOperand(1);
296 MachineOperand *ColMO = &MI->getOperand(2);
297 return ShapeT(RowMO, ColMO, MRI);
298 } else if (MI->isCopy()) {
299 TileReg = MI->getOperand(1).getReg();
300 return getShape(MRI, TileReg);
301 }
302
303 // The def should not be PHI node, because we walk the MBB in reverse post
304 // order.
305 assert(MI->isPHI() && "Unexpected PHI when get shape.");
306 llvm_unreachable("Unexpected MI when get shape.");
307 }
308
309 // BB0:
310 // spill t0 to s0
311 // BB1:
312 // spill t1 to s1
313 //
314 // BB2:
315 // t = phi [t0, bb0] [t1, bb1]
316 // -->
317 // row = phi [r0, bb0] [r1, bb1]
318 // col = phi [c0, bb0] [c1, bb1]
319 // s = phi [s0, bb0] [s1, bb1]
320 // t = tileload row, col, s
321 // The new instruction is inserted at the end of the phi node. The order
322 // of the original phi node is not ensured.
convertPHI(MachineBasicBlock * MBB,MachineInstr & PHI)323 void X86FastPreTileConfig::convertPHI(MachineBasicBlock *MBB,
324 MachineInstr &PHI) {
325 // 1. Create instruction to get stack slot address of each incoming block.
326 // 2. Create PHI node for the stack address.
327 // 3. Create PHI node for shape. If one of the incoming shape is immediate
328 // use the immediate and delete the PHI node.
329 // 4. Create tileload instruction from the stack address.
330 Register StackAddrReg = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass);
331 MachineInstrBuilder AddrPHI = BuildMI(*MBB, ++PHI.getIterator(), DebugLoc(),
332 TII->get(X86::PHI), StackAddrReg);
333 Register RowReg = MRI->createVirtualRegister(&X86::GR16RegClass);
334 MachineInstrBuilder RowPHI = BuildMI(*MBB, ++PHI.getIterator(), DebugLoc(),
335 TII->get(X86::PHI), RowReg);
336 Register ColReg = MRI->createVirtualRegister(&X86::GR16RegClass);
337 MachineInstrBuilder ColPHI = BuildMI(*MBB, ++PHI.getIterator(), DebugLoc(),
338 TII->get(X86::PHI), ColReg);
339 // Record the mapping of phi node and its row/column information.
340 VisitedPHIs[&PHI] = {RowReg, ColReg, StackAddrReg};
341
342 for (unsigned I = 1, E = PHI.getNumOperands(); I != E; I += 2) {
343 // Get the 2 incoming value of tile register and MBB.
344 Register InTileReg = PHI.getOperand(I).getReg();
345 // Mark it as liveout, so that it will be spilled when visit
346 // the incoming MBB. Otherwise since phi will be deleted, it
347 // would miss spill when visit incoming MBB.
348 MayLiveAcrossBlocks.set(Register::virtReg2Index(InTileReg));
349 MachineBasicBlock *InMBB = PHI.getOperand(I + 1).getMBB();
350
351 MachineInstr *TileDefMI = MRI->getVRegDef(InTileReg);
352 MachineBasicBlock::iterator InsertPos;
353 if (TileDefMI->isPHI()) {
354 InsertPos = TileDefMI->getParent()->getFirstNonPHI();
355 if (VisitedPHIs.count(TileDefMI)) { // circular phi reference
356 // def t1
357 // / \
358 // def t2 t3 = phi(t1, t4) <--
359 // \ / |
360 // t4 = phi(t2, t3)-------------
361 //
362 // For each (row, column and stack address) append phi incoming value.
363 // Create r3 = phi(r1, r4)
364 // Create r4 = phi(r2, r3)
365 Register InRowReg = VisitedPHIs[TileDefMI].Row;
366 Register InColReg = VisitedPHIs[TileDefMI].Col;
367 Register InStackAddrReg = VisitedPHIs[TileDefMI].StackAddr;
368 RowPHI.addReg(InRowReg).addMBB(InMBB);
369 ColPHI.addReg(InColReg).addMBB(InMBB);
370 AddrPHI.addReg(InStackAddrReg).addMBB(InMBB);
371 continue;
372 } else {
373 // Recursively convert PHI to tileload
374 convertPHI(TileDefMI->getParent(), *TileDefMI);
375 // The PHI node is coverted to tileload instruction. Get the stack
376 // address from tileload operands.
377 MachineInstr *TileLoad = MRI->getVRegDef(InTileReg);
378 assert(TileLoad && TileLoad->getOpcode() == X86::PTILELOADDV);
379 Register InRowReg = TileLoad->getOperand(1).getReg();
380 Register InColReg = TileLoad->getOperand(2).getReg();
381 Register InStackAddrReg = TileLoad->getOperand(3).getReg();
382 RowPHI.addReg(InRowReg).addMBB(InMBB);
383 ColPHI.addReg(InColReg).addMBB(InMBB);
384 AddrPHI.addReg(InStackAddrReg).addMBB(InMBB);
385 }
386 } else {
387 InsertPos = TileDefMI->getIterator();
388
389 // Fill the incoming operand of row/column phi instruction.
390 ShapeT Shape = getShape(MRI, InTileReg);
391 Shape.getRow()->setIsKill(false);
392 Shape.getCol()->setIsKill(false);
393 RowPHI.addReg(Shape.getRow()->getReg()).addMBB(InMBB);
394 ColPHI.addReg(Shape.getCol()->getReg()).addMBB(InMBB);
395
396 // The incoming tile register live out of its def BB, it would be spilled.
397 // Create MI to get the spill stack slot address for the tile register
398 int FI = getStackSpaceFor(InTileReg);
399 Register InStackAddrReg =
400 MRI->createVirtualRegister(&X86::GR64_NOSPRegClass);
401 addOffset(BuildMI(*TileDefMI->getParent(), InsertPos, DebugLoc(),
402 TII->get(X86::LEA64r), InStackAddrReg)
403 .addFrameIndex(FI),
404 0);
405 AddrPHI.addReg(InStackAddrReg).addMBB(InMBB);
406 }
407 }
408
409 MachineBasicBlock::iterator InsertPos = MBB->getFirstNonPHI();
410 Register StrideReg = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass);
411 BuildMI(*MBB, InsertPos, DebugLoc(), TII->get(X86::MOV64ri), StrideReg)
412 .addImm(64);
413 Register TileReg = PHI.getOperand(0).getReg();
414 MachineInstr *NewMI = addDirectMem(
415 BuildMI(*MBB, InsertPos, DebugLoc(), TII->get(X86::PTILELOADDV), TileReg)
416 .addReg(RowReg)
417 .addReg(ColReg),
418 StackAddrReg);
419 MachineOperand &MO = NewMI->getOperand(5);
420 MO.setReg(StrideReg);
421 MO.setIsKill(true);
422 PHI.eraseFromParent();
423 VisitedPHIs.erase(&PHI);
424 }
425
isTileRegDef(MachineRegisterInfo * MRI,MachineInstr & MI)426 static bool isTileRegDef(MachineRegisterInfo *MRI, MachineInstr &MI) {
427 MachineOperand &MO = MI.getOperand(0);
428 if (MO.isReg() && MO.getReg().isVirtual() &&
429 MRI->getRegClass(MO.getReg())->getID() == X86::TILERegClassID)
430 return true;
431 return false;
432 }
433
canonicalizePHIs(MachineBasicBlock & MBB)434 void X86FastPreTileConfig::canonicalizePHIs(MachineBasicBlock &MBB) {
435 SmallVector<MachineInstr *, 8> PHIs;
436
437 for (MachineInstr &MI : MBB) {
438 if (!MI.isPHI())
439 break;
440 if (!isTileRegDef(MRI, MI))
441 continue;
442 PHIs.push_back(&MI);
443 }
444 // Canonicalize the phi node first. One tile phi may depeneds previous
445 // phi node. For below case, we need convert %t4.
446 //
447 // BB0:
448 // %t3 = phi (t1 BB1, t2 BB0)
449 // %t4 = phi (t5 BB1, t3 BB0)
450 // -->
451 // %t3 = phi (t1 BB1, t2 BB0)
452 // %t4 = phi (t5 BB1, t2 BB0)
453 //
454 while (!PHIs.empty()) {
455 MachineInstr *PHI = PHIs.pop_back_val();
456
457 // Find the operand that is incoming from the same MBB and the def
458 // is also phi node.
459 MachineOperand *InMO = nullptr;
460 MachineInstr *DefMI = nullptr;
461 for (unsigned I = 1, E = PHI->getNumOperands(); I != E; I += 2) {
462 Register InTileReg = PHI->getOperand(I).getReg();
463 MachineBasicBlock *InMBB = PHI->getOperand(I + 1).getMBB();
464 DefMI = MRI->getVRegDef(InTileReg);
465 if (InMBB != &MBB || !DefMI->isPHI())
466 continue;
467
468 InMO = &PHI->getOperand(I);
469 break;
470 }
471 // If can't find such operand, do nothing.
472 if (!InMO)
473 continue;
474
475 // Current phi node depends on previous phi node. Break the
476 // dependency.
477 Register DefTileReg;
478 for (unsigned I = 1, E = DefMI->getNumOperands(); I != E; I += 2) {
479 MachineBasicBlock *InMBB = PHI->getOperand(I + 1).getMBB();
480 if (InMBB != &MBB)
481 continue;
482 DefTileReg = DefMI->getOperand(I).getReg();
483 InMO->setReg(DefTileReg);
484 break;
485 }
486 }
487 }
488
convertPHIs(MachineBasicBlock & MBB)489 void X86FastPreTileConfig::convertPHIs(MachineBasicBlock &MBB) {
490 SmallVector<MachineInstr *, 8> PHIs;
491 for (MachineInstr &MI : MBB) {
492 if (!MI.isPHI())
493 break;
494 if (!isTileRegDef(MRI, MI))
495 continue;
496 PHIs.push_back(&MI);
497 }
498 while (!PHIs.empty()) {
499 MachineInstr *MI = PHIs.pop_back_val();
500 VisitedPHIs.clear();
501 convertPHI(&MBB, *MI);
502 }
503 }
504
505 // PreTileConfig should configure the tile registers based on basic
506 // block.
configBasicBlock(MachineBasicBlock & MBB)507 bool X86FastPreTileConfig::configBasicBlock(MachineBasicBlock &MBB) {
508 this->MBB = &MBB;
509 bool Change = false;
510 MachineInstr *LastShapeMI = nullptr;
511 MachineInstr *LastTileCfg = nullptr;
512 bool HasUnconfigTile = false;
513
514 auto Config = [&](MachineInstr &Before) {
515 if (CfgSS == -1)
516 CfgSS = MFI->CreateStackObject(ST->getTileConfigSize(),
517 ST->getTileConfigAlignment(), false);
518 LastTileCfg = addFrameReference(
519 BuildMI(MBB, Before, DebugLoc(), TII->get(X86::PLDTILECFGV)), CfgSS);
520 LastShapeMI = nullptr;
521 Change = true;
522 };
523 auto HasTileOperand = [](MachineRegisterInfo *MRI, MachineInstr &MI) {
524 for (const MachineOperand &MO : MI.operands()) {
525 if (!MO.isReg())
526 continue;
527 Register Reg = MO.getReg();
528 if (Reg.isVirtual() &&
529 MRI->getRegClass(Reg)->getID() == X86::TILERegClassID)
530 return true;
531 }
532 return false;
533 };
534 for (MachineInstr &MI : reverse(MBB)) {
535 // We have transformed phi node before configuring BB.
536 if (MI.isPHI())
537 break;
538 // Don't collect the shape of used tile, the tile should be defined
539 // before the tile use. Spill and reload would happen if there is only
540 // tile use after ldtilecfg, so the shape can be collected from reload.
541 // Take below code for example. %t would be reloaded before tilestore
542 // call
543 // ....
544 // tilestore %r, %c, %t
545 // -->
546 // call
547 // ldtilecfg
548 // %t = tileload %r, %c
549 // tilestore %r, %c, %t
550 if (HasTileOperand(MRI, MI))
551 HasUnconfigTile = true;
552 // According to AMX ABI, all the tile registers including config register
553 // are volatile. Caller need to save/restore config register.
554 if (MI.isCall() && HasUnconfigTile) {
555 MachineBasicBlock::iterator I;
556 if (LastShapeMI && dominates(MBB, MI, LastShapeMI))
557 I = ++LastShapeMI->getIterator();
558 else
559 I = ++MI.getIterator();
560 Config(*I);
561 HasUnconfigTile = false;
562 continue;
563 }
564 if (!isTileDef(MRI, MI))
565 continue;
566 //
567 //---------------------------------------------------------------------
568 // Don't handle COPY instruction. If the src and dst of the COPY can be
569 // in the same config in below case, we just check the shape of t0.
570 // def row0
571 // def col0
572 // ldtilecfg
573 // t0 = tielzero(row0, col0)
574 // t1 = copy t0
575 // ...
576 // If the src and dst of the COPY can NOT be in the same config in below
577 // case. Reload would be generated befor the copy instruction.
578 // def row0
579 // def col0
580 // t0 = tielzero(row0, col0)
581 // spill t0
582 // ...
583 // def row1
584 // def col1
585 // ldtilecfg
586 // t1 = tilezero(row1, col1)
587 // reload t0
588 // t1 = copy t0
589 //---------------------------------------------------------------------
590 //
591 // If MI dominate the last shape def instruction, we need insert
592 // ldtilecfg after LastShapeMI now. The config doesn't include
593 // current MI.
594 // def row0
595 // def col0
596 // tilezero(row0, col0) <- MI
597 // def row1
598 // def col1
599 // ldtilecfg <- insert
600 // tilezero(row1, col1)
601 if (LastShapeMI && dominates(MBB, MI, LastShapeMI))
602 Config(*(++LastShapeMI->getIterator()));
603 MachineOperand *RowMO = &MI.getOperand(1);
604 MachineOperand *ColMO = &MI.getOperand(2);
605 MachineInstr *RowMI = MRI->getVRegDef(RowMO->getReg());
606 MachineInstr *ColMI = MRI->getVRegDef(ColMO->getReg());
607 // If the shape is defined in current MBB, check the domination.
608 // FIXME how about loop?
609 if (RowMI->getParent() == &MBB) {
610 if (!LastShapeMI)
611 LastShapeMI = RowMI;
612 else if (dominates(MBB, LastShapeMI, RowMI))
613 LastShapeMI = RowMI;
614 }
615 if (ColMI->getParent() == &MBB) {
616 if (!LastShapeMI)
617 LastShapeMI = ColMI;
618 else if (dominates(MBB, LastShapeMI, ColMI))
619 LastShapeMI = ColMI;
620 }
621 // If there is user live out of the tilecfg, spill it and reload in
622 // before the user.
623 Register TileReg = MI.getOperand(0).getReg();
624 if (mayLiveOut(TileReg, LastTileCfg))
625 spill(++MI.getIterator(), TileReg, false);
626 for (MachineInstr &UseMI : MRI->use_instructions(TileReg)) {
627 if (UseMI.getParent() == &MBB) {
628 // check user should not across ldtilecfg
629 if (!LastTileCfg || !dominates(MBB, LastTileCfg, UseMI))
630 continue;
631 // reload befor UseMI
632 reload(UseMI.getIterator(), TileReg, RowMO, ColMO);
633 } else {
634 // Don't reload for phi instruction, we handle phi reload separately.
635 // TODO: merge the reload for the same user MBB.
636 if (!UseMI.isPHI())
637 reload(UseMI.getIterator(), TileReg, RowMO, ColMO);
638 }
639 }
640 }
641
642 // Configure tile registers at the head of the MBB
643 if (HasUnconfigTile) {
644 MachineInstr *Before;
645 if (LastShapeMI == nullptr || LastShapeMI->isPHI())
646 Before = &*MBB.getFirstNonPHI();
647 else
648 Before = &*(++LastShapeMI->getIterator());
649
650 Config(*Before);
651 }
652
653 return Change;
654 }
655
runOnMachineFunction(MachineFunction & MFunc)656 bool X86FastPreTileConfig::runOnMachineFunction(MachineFunction &MFunc) {
657 MF = &MFunc;
658 MRI = &MFunc.getRegInfo();
659 ST = &MFunc.getSubtarget<X86Subtarget>();
660 TII = ST->getInstrInfo();
661 X86FI = MFunc.getInfo<X86MachineFunctionInfo>();
662 MFI = &MFunc.getFrameInfo();
663 TRI = ST->getRegisterInfo();
664 CfgSS = -1;
665
666 unsigned NumVirtRegs = MRI->getNumVirtRegs();
667 // Abandon early if there is no tile register to config.
668 bool HasVirtTileReg = false;
669 for (unsigned I = 0, E = NumVirtRegs; I != E; ++I) {
670 Register VirtReg = Register::index2VirtReg(I);
671 if (MRI->getRegClass(VirtReg)->getID() == X86::TILERegClassID) {
672 HasVirtTileReg = true;
673 break;
674 }
675 }
676 if (!HasVirtTileReg)
677 return false;
678
679 StackSlotForVirtReg.resize(NumVirtRegs);
680 MayLiveAcrossBlocks.clear();
681 // We will create register during config. *3 is to make sure
682 // the virtual register number doesn't exceed the size of
683 // the bit vector.
684 MayLiveAcrossBlocks.resize(NumVirtRegs * 3);
685 bool Change = false;
686 assert(MRI->isSSA());
687
688 // Canonicalize the phi node first.
689 for (MachineBasicBlock &MBB : MFunc)
690 canonicalizePHIs(MBB);
691
692 // Loop over all of the basic blocks in reverse post order and insert
693 // ldtilecfg for tile registers. The reserse post order is to facilitate
694 // PHI node convert.
695 ReversePostOrderTraversal<MachineFunction *> RPOT(MF);
696 for (MachineBasicBlock *MBB : RPOT) {
697 convertPHIs(*MBB);
698 Change |= configBasicBlock(*MBB);
699 }
700
701 if (Change)
702 InitializeTileConfigStackSpace();
703
704 StackSlotForVirtReg.clear();
705 return Change;
706 }
707
createX86FastPreTileConfigPass()708 FunctionPass *llvm::createX86FastPreTileConfigPass() {
709 return new X86FastPreTileConfig();
710 }
711