1//===-- X86FastPreTileConfig.cpp - Fast Tile Register Configure------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file Pass to preconfig the shape of physical tile registers
10/// It inserts ldtilecfg ahead of each group of tile registers. The algorithm
11/// walk each instruction of basic block in reverse order. All the tile
12/// registers that live out the basic block would be spilled and reloaded
13/// before its user. It also check the depenedency of the shape to ensure
14/// the shape is defined before ldtilecfg.
15//
16//===----------------------------------------------------------------------===//
17
18#include "X86.h"
19#include "X86InstrBuilder.h"
20#include "X86MachineFunctionInfo.h"
21#include "X86RegisterInfo.h"
22#include "X86Subtarget.h"
23#include "llvm/ADT/PostOrderIterator.h"
24#include "llvm/ADT/Statistic.h"
25#include "llvm/CodeGen/MachineFrameInfo.h"
26#include "llvm/CodeGen/MachineFunctionAnalysisManager.h"
27#include "llvm/CodeGen/MachineFunctionPass.h"
28#include "llvm/CodeGen/MachineInstr.h"
29#include "llvm/CodeGen/MachinePassManager.h"
30#include "llvm/CodeGen/MachineRegisterInfo.h"
31#include "llvm/CodeGen/Passes.h"
32#include "llvm/CodeGen/TargetInstrInfo.h"
33#include "llvm/CodeGen/TargetRegisterInfo.h"
34#include "llvm/IR/Analysis.h"
35#include "llvm/Support/Debug.h"
36
37using namespace llvm;
38
39#define DEBUG_TYPE "x86-fast-pre-tile-config"
40
41STATISTIC(NumStores, "Number of stores added");
42STATISTIC(NumLoads, "Number of loads added");
43
44namespace {
45
46class X86FastPreTileConfigImpl {
47public:
48 X86FastPreTileConfigImpl() : StackSlotForVirtReg(-1) {}
49 bool runOnMachineFunction(MachineFunction &MF);
50
51private:
52 MachineFunction *MF = nullptr;
53 const X86Subtarget *ST = nullptr;
54 const TargetInstrInfo *TII = nullptr;
55 MachineRegisterInfo *MRI = nullptr;
56 X86MachineFunctionInfo *X86FI = nullptr;
57 MachineFrameInfo *MFI = nullptr;
58 const TargetRegisterInfo *TRI = nullptr;
59 MachineBasicBlock *MBB = nullptr;
60 int CfgSS = -1;
61 struct PHIInfo {
62 Register Row;
63 Register Col;
64 Register StackAddr;
65 };
66 DenseMap<MachineInstr *, struct PHIInfo> VisitedPHIs;
67
68 /// Maps virtual regs to the frame index where these values are spilled.
69 IndexedMap<int, VirtReg2IndexFunctor> StackSlotForVirtReg;
70
71 /// Has a bit set for tile virtual register for which it was determined
72 /// that it is alive across blocks.
73 BitVector MayLiveAcrossBlocks;
74
75 int getStackSpaceFor(Register VirtReg);
76 void InitializeTileConfigStackSpace();
77 bool mayLiveOut(Register VirtReg, MachineInstr *CfgMI);
78 void spill(MachineBasicBlock::iterator Before, Register VirtReg, bool Kill);
79 void reload(MachineBasicBlock::iterator UseMI, Register VirtReg,
80 MachineOperand *RowMO, MachineOperand *ColMO);
81 void canonicalizePHIs(MachineBasicBlock &MBB);
82 void convertPHI(MachineBasicBlock *MBB, MachineInstr &PHI);
83 void convertPHIs(MachineBasicBlock &MBB);
84 bool configBasicBlock(MachineBasicBlock &MBB);
85};
86
87class X86FastPreTileConfigLegacy : public MachineFunctionPass {
88public:
89 X86FastPreTileConfigLegacy() : MachineFunctionPass(ID) {}
90
91 /// Return the pass name.
92 StringRef getPassName() const override {
93 return "Fast Tile Register Preconfigure";
94 }
95
96 /// Perform tile register configure.
97 bool runOnMachineFunction(MachineFunction &MFunc) override;
98
99 static char ID;
100};
101
102} // end anonymous namespace
103
104char X86FastPreTileConfigLegacy::ID = 0;
105
106INITIALIZE_PASS_BEGIN(X86FastPreTileConfigLegacy, DEBUG_TYPE,
107 "Fast Tile Register Preconfigure", false, false)
108INITIALIZE_PASS_END(X86FastPreTileConfigLegacy, DEBUG_TYPE,
109 "Fast Tile Register Preconfigure", false, false)
110
111static bool dominates(MachineBasicBlock &MBB,
112 MachineBasicBlock::const_iterator A,
113 MachineBasicBlock::const_iterator B) {
114 auto MBBEnd = MBB.end();
115 if (B == MBBEnd)
116 return true;
117
118 MachineBasicBlock::const_iterator I = MBB.begin();
119 for (; &*I != A && &*I != B; ++I)
120 ;
121
122 return &*I == A;
123}
124
125/// This allocates space for the specified virtual register to be held on the
126/// stack.
127int X86FastPreTileConfigImpl::getStackSpaceFor(Register VirtReg) {
128 // Find the location Reg would belong...
129 int SS = StackSlotForVirtReg[VirtReg];
130 // Already has space allocated?
131 if (SS != -1)
132 return SS;
133
134 // Allocate a new stack object for this spill location...
135 const TargetRegisterClass &RC = *MRI->getRegClass(Reg: VirtReg);
136 unsigned Size = TRI->getSpillSize(RC);
137 Align Alignment = TRI->getSpillAlign(RC);
138 int FrameIdx = MFI->CreateSpillStackObject(Size, Alignment);
139
140 // Assign the slot.
141 StackSlotForVirtReg[VirtReg] = FrameIdx;
142 return FrameIdx;
143}
144
145/// Returns false if \p VirtReg is known to not live out of the current config.
146/// If \p VirtReg live out of the current MBB, it must live out of the current
147/// config
148bool X86FastPreTileConfigImpl::mayLiveOut(Register VirtReg,
149 MachineInstr *CfgMI) {
150 if (MayLiveAcrossBlocks.test(Idx: VirtReg.virtRegIndex()))
151 return true;
152
153 for (const MachineInstr &UseInst : MRI->use_nodbg_instructions(Reg: VirtReg)) {
154 if (UseInst.getParent() != MBB) {
155 MayLiveAcrossBlocks.set(VirtReg.virtRegIndex());
156 return true;
157 }
158
159 // The use and def are in the same MBB. If the tile register is
160 // reconfigured, it is crobbered and we need to spill and reload
161 // tile register.
162 if (CfgMI) {
163 if (dominates(MBB&: *MBB, A: *CfgMI, B: UseInst)) {
164 MayLiveAcrossBlocks.set(VirtReg.virtRegIndex());
165 return true;
166 }
167 }
168 }
169
170 return false;
171}
172
173void X86FastPreTileConfigImpl::InitializeTileConfigStackSpace() {
174 MachineBasicBlock &MBB = MF->front();
175 MachineInstr *MI = &*MBB.getFirstNonPHI();
176 DebugLoc DL;
177 if (ST->hasAVX512()) {
178 Register Zmm = MRI->createVirtualRegister(RegClass: &X86::VR512RegClass);
179 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII->get(Opcode: X86::AVX512_512_SET0), DestReg: Zmm);
180 addFrameReference(MIB: BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII->get(Opcode: X86::VMOVUPSZmr)), FI: CfgSS)
181 .addReg(RegNo: Zmm);
182 } else if (ST->hasAVX2()) {
183 Register Ymm = MRI->createVirtualRegister(RegClass: &X86::VR256RegClass);
184 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII->get(Opcode: X86::AVX_SET0), DestReg: Ymm);
185 addFrameReference(MIB: BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII->get(Opcode: X86::VMOVUPSYmr)), FI: CfgSS)
186 .addReg(RegNo: Ymm);
187 addFrameReference(MIB: BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII->get(Opcode: X86::VMOVUPSYmr)), FI: CfgSS,
188 Offset: 32)
189 .addReg(RegNo: Ymm);
190 } else {
191 assert(ST->hasSSE2() && "AMX should assume SSE2 enabled");
192 unsigned StoreOpc = ST->hasAVX() ? X86::VMOVUPSmr : X86::MOVUPSmr;
193 Register Xmm = MRI->createVirtualRegister(RegClass: &X86::VR128RegClass);
194 BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII->get(Opcode: X86::V_SET0), DestReg: Xmm);
195 addFrameReference(MIB: BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII->get(Opcode: StoreOpc)), FI: CfgSS)
196 .addReg(RegNo: Xmm);
197 addFrameReference(MIB: BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII->get(Opcode: StoreOpc)), FI: CfgSS, Offset: 16)
198 .addReg(RegNo: Xmm);
199 addFrameReference(MIB: BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII->get(Opcode: StoreOpc)), FI: CfgSS, Offset: 32)
200 .addReg(RegNo: Xmm);
201 addFrameReference(MIB: BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII->get(Opcode: StoreOpc)), FI: CfgSS, Offset: 48)
202 .addReg(RegNo: Xmm);
203 }
204 // Fill in the palette first.
205 addFrameReference(MIB: BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: TII->get(Opcode: X86::MOV8mi)), FI: CfgSS)
206 .addImm(Val: 1);
207}
208
209/// Insert spill instruction for \p AssignedReg before \p Before.
210/// TODO: Update DBG_VALUEs with \p VirtReg operands with the stack slot.
211void X86FastPreTileConfigImpl::spill(MachineBasicBlock::iterator Before,
212 Register VirtReg, bool Kill) {
213 LLVM_DEBUG(dbgs() << "Spilling " << printReg(VirtReg, TRI) << " \n");
214 int FI = getStackSpaceFor(VirtReg);
215 LLVM_DEBUG(dbgs() << " to stack slot #" << FI << '\n');
216
217 const TargetRegisterClass &RC = *MRI->getRegClass(Reg: VirtReg);
218 // Don't need shape information for tile store, becasue it is adjacent to
219 // the tile def instruction.
220 TII->storeRegToStackSlot(MBB&: *MBB, MI: Before, SrcReg: VirtReg, isKill: Kill, FrameIndex: FI, RC: &RC, VReg: Register());
221 ++NumStores;
222
223 // TODO: update DBG_VALUEs
224}
225
226/// Insert reload instruction for \p PhysReg before \p Before.
227void X86FastPreTileConfigImpl::reload(MachineBasicBlock::iterator UseMI,
228 Register OrigReg, MachineOperand *RowMO,
229 MachineOperand *ColMO) {
230 int FI = getStackSpaceFor(VirtReg: OrigReg);
231 const TargetRegisterClass &RC = *MRI->getRegClass(Reg: OrigReg);
232 Register TileReg;
233 // Fold copy to tileload
234 // BB1:
235 // spill src to s
236 //
237 // BB2:
238 // t = copy src
239 // -->
240 // t = tileload (s)
241 if (UseMI->isCopy())
242 TileReg = UseMI->getOperand(i: 0).getReg();
243 else
244 TileReg = MRI->createVirtualRegister(RegClass: &RC);
245 // Can't use TII->loadRegFromStackSlot(), because we need the shape
246 // information for reload.
247 // tileloadd (%sp, %idx), %tmm
248 unsigned Opc = X86::PTILELOADDV;
249 Register StrideReg = MRI->createVirtualRegister(RegClass: &X86::GR64_NOSPRegClass);
250 // FIXME: MBB is not the parent of UseMI.
251 MachineInstr *NewMI = BuildMI(BB&: *UseMI->getParent(), I: UseMI, MIMD: DebugLoc(),
252 MCID: TII->get(Opcode: X86::MOV64ri), DestReg: StrideReg)
253 .addImm(Val: 64);
254 NewMI = addFrameReference(
255 MIB: BuildMI(BB&: *UseMI->getParent(), I: UseMI, MIMD: DebugLoc(), MCID: TII->get(Opcode: Opc), DestReg: TileReg)
256 .addReg(RegNo: RowMO->getReg())
257 .addReg(RegNo: ColMO->getReg()),
258 FI);
259 MachineOperand &MO = NewMI->getOperand(i: 5);
260 MO.setReg(StrideReg);
261 MO.setIsKill(true);
262 RowMO->setIsKill(false);
263 ColMO->setIsKill(false);
264 // Erase copy instruction after it is folded.
265 if (UseMI->isCopy()) {
266 UseMI->eraseFromParent();
267 } else {
268 // Replace the register in the user MI.
269 for (auto &MO : UseMI->operands()) {
270 if (MO.isReg() && MO.getReg() == OrigReg)
271 MO.setReg(TileReg);
272 }
273 }
274
275 ++NumLoads;
276 LLVM_DEBUG(dbgs() << "Reloading " << printReg(OrigReg, TRI) << " into "
277 << printReg(TileReg, TRI) << '\n');
278}
279
280static bool isTileRegister(MachineRegisterInfo *MRI, Register Reg) {
281 if (Reg.isVirtual() &&
282 (MRI->getRegClass(Reg)->getID() == X86::TILERegClassID)) {
283 return true;
284 }
285
286 if (Reg >= X86::TMM0 && Reg <= X86::TMM7)
287 return true;
288
289 return false;
290}
291
292static bool isTileDef(MachineRegisterInfo *MRI, MachineInstr &MI) {
293 // The instruction must have 3 operands: tile def, row, col.
294 if (MI.isDebugInstr() || MI.getNumOperands() < 3 || !MI.isPseudo())
295 return false;
296 MachineOperand &MO = MI.getOperand(i: 0);
297
298 if (!MO.isReg())
299 return false;
300
301 return isTileRegister(MRI, Reg: MO.getReg());
302}
303
304static ShapeT getShape(MachineRegisterInfo *MRI, Register TileReg) {
305 MachineInstr *MI = MRI->getVRegDef(Reg: TileReg);
306 if (isTileDef(MRI, MI&: *MI)) {
307 MachineOperand *RowMO = &MI->getOperand(i: 1);
308 MachineOperand *ColMO = &MI->getOperand(i: 2);
309 return ShapeT(RowMO, ColMO, MRI);
310 } else if (MI->isCopy()) {
311 TileReg = MI->getOperand(i: 1).getReg();
312 return getShape(MRI, TileReg);
313 }
314
315 // The def should not be PHI node, because we walk the MBB in reverse post
316 // order.
317 assert(MI->isPHI() && "Unexpected PHI when get shape.");
318 llvm_unreachable("Unexpected MI when get shape.");
319}
320
321// BB0:
322// spill t0 to s0
323// BB1:
324// spill t1 to s1
325//
326// BB2:
327// t = phi [t0, bb0] [t1, bb1]
328// -->
329// row = phi [r0, bb0] [r1, bb1]
330// col = phi [c0, bb0] [c1, bb1]
331// s = phi [s0, bb0] [s1, bb1]
332// t = tileload row, col, s
333// The new instruction is inserted at the end of the phi node. The order
334// of the original phi node is not ensured.
335void X86FastPreTileConfigImpl::convertPHI(MachineBasicBlock *MBB,
336 MachineInstr &PHI) {
337 // 1. Create instruction to get stack slot address of each incoming block.
338 // 2. Create PHI node for the stack address.
339 // 3. Create PHI node for shape. If one of the incoming shape is immediate
340 // use the immediate and delete the PHI node.
341 // 4. Create tileload instruction from the stack address.
342 Register StackAddrReg = MRI->createVirtualRegister(RegClass: &X86::GR64_NOSPRegClass);
343 MachineInstrBuilder AddrPHI = BuildMI(BB&: *MBB, I: ++PHI.getIterator(), MIMD: DebugLoc(),
344 MCID: TII->get(Opcode: X86::PHI), DestReg: StackAddrReg);
345 Register RowReg = MRI->createVirtualRegister(RegClass: &X86::GR16RegClass);
346 MachineInstrBuilder RowPHI = BuildMI(BB&: *MBB, I: ++PHI.getIterator(), MIMD: DebugLoc(),
347 MCID: TII->get(Opcode: X86::PHI), DestReg: RowReg);
348 Register ColReg = MRI->createVirtualRegister(RegClass: &X86::GR16RegClass);
349 MachineInstrBuilder ColPHI = BuildMI(BB&: *MBB, I: ++PHI.getIterator(), MIMD: DebugLoc(),
350 MCID: TII->get(Opcode: X86::PHI), DestReg: ColReg);
351 // Record the mapping of phi node and its row/column information.
352 VisitedPHIs[&PHI] = {.Row: RowReg, .Col: ColReg, .StackAddr: StackAddrReg};
353
354 for (unsigned I = 1, E = PHI.getNumOperands(); I != E; I += 2) {
355 // Get the 2 incoming value of tile register and MBB.
356 Register InTileReg = PHI.getOperand(i: I).getReg();
357 // Mark it as liveout, so that it will be spilled when visit
358 // the incoming MBB. Otherwise since phi will be deleted, it
359 // would miss spill when visit incoming MBB.
360 MayLiveAcrossBlocks.set(InTileReg.virtRegIndex());
361 MachineBasicBlock *InMBB = PHI.getOperand(i: I + 1).getMBB();
362
363 MachineInstr *TileDefMI = MRI->getVRegDef(Reg: InTileReg);
364 MachineBasicBlock::iterator InsertPos;
365 if (TileDefMI->isPHI()) {
366 InsertPos = TileDefMI->getParent()->getFirstNonPHI();
367 if (auto It = VisitedPHIs.find(Val: TileDefMI);
368 It != VisitedPHIs.end()) { // circular phi reference
369 // def t1
370 // / \
371 // def t2 t3 = phi(t1, t4) <--
372 // \ / |
373 // t4 = phi(t2, t3)-------------
374 //
375 // For each (row, column and stack address) append phi incoming value.
376 // Create r3 = phi(r1, r4)
377 // Create r4 = phi(r2, r3)
378 Register InRowReg = It->second.Row;
379 Register InColReg = It->second.Col;
380 Register InStackAddrReg = It->second.StackAddr;
381 RowPHI.addReg(RegNo: InRowReg).addMBB(MBB: InMBB);
382 ColPHI.addReg(RegNo: InColReg).addMBB(MBB: InMBB);
383 AddrPHI.addReg(RegNo: InStackAddrReg).addMBB(MBB: InMBB);
384 continue;
385 } else {
386 // Recursively convert PHI to tileload
387 convertPHI(MBB: TileDefMI->getParent(), PHI&: *TileDefMI);
388 // The PHI node is coverted to tileload instruction. Get the stack
389 // address from tileload operands.
390 MachineInstr *TileLoad = MRI->getVRegDef(Reg: InTileReg);
391 assert(TileLoad && TileLoad->getOpcode() == X86::PTILELOADDV);
392 Register InRowReg = TileLoad->getOperand(i: 1).getReg();
393 Register InColReg = TileLoad->getOperand(i: 2).getReg();
394 Register InStackAddrReg = TileLoad->getOperand(i: 3).getReg();
395 RowPHI.addReg(RegNo: InRowReg).addMBB(MBB: InMBB);
396 ColPHI.addReg(RegNo: InColReg).addMBB(MBB: InMBB);
397 AddrPHI.addReg(RegNo: InStackAddrReg).addMBB(MBB: InMBB);
398 }
399 } else {
400 InsertPos = TileDefMI->getIterator();
401
402 // Fill the incoming operand of row/column phi instruction.
403 ShapeT Shape = getShape(MRI, TileReg: InTileReg);
404 Shape.getRow()->setIsKill(false);
405 Shape.getCol()->setIsKill(false);
406 RowPHI.addReg(RegNo: Shape.getRow()->getReg()).addMBB(MBB: InMBB);
407 ColPHI.addReg(RegNo: Shape.getCol()->getReg()).addMBB(MBB: InMBB);
408
409 // The incoming tile register live out of its def BB, it would be spilled.
410 // Create MI to get the spill stack slot address for the tile register
411 int FI = getStackSpaceFor(VirtReg: InTileReg);
412 Register InStackAddrReg =
413 MRI->createVirtualRegister(RegClass: &X86::GR64_NOSPRegClass);
414 addOffset(MIB: BuildMI(BB&: *TileDefMI->getParent(), I: InsertPos, MIMD: DebugLoc(),
415 MCID: TII->get(Opcode: X86::LEA64r), DestReg: InStackAddrReg)
416 .addFrameIndex(Idx: FI),
417 Offset: 0);
418 AddrPHI.addReg(RegNo: InStackAddrReg).addMBB(MBB: InMBB);
419 }
420 }
421
422 MachineBasicBlock::iterator InsertPos = MBB->getFirstNonPHI();
423 Register StrideReg = MRI->createVirtualRegister(RegClass: &X86::GR64_NOSPRegClass);
424 BuildMI(BB&: *MBB, I: InsertPos, MIMD: DebugLoc(), MCID: TII->get(Opcode: X86::MOV64ri), DestReg: StrideReg)
425 .addImm(Val: 64);
426 Register TileReg = PHI.getOperand(i: 0).getReg();
427 MachineInstr *NewMI = addDirectMem(
428 MIB: BuildMI(BB&: *MBB, I: InsertPos, MIMD: DebugLoc(), MCID: TII->get(Opcode: X86::PTILELOADDV), DestReg: TileReg)
429 .addReg(RegNo: RowReg)
430 .addReg(RegNo: ColReg),
431 Reg: StackAddrReg);
432 MachineOperand &MO = NewMI->getOperand(i: 5);
433 MO.setReg(StrideReg);
434 MO.setIsKill(true);
435 PHI.eraseFromParent();
436 VisitedPHIs.erase(Val: &PHI);
437}
438
439static bool isTileRegDef(MachineRegisterInfo *MRI, MachineInstr &MI) {
440 MachineOperand &MO = MI.getOperand(i: 0);
441 if (MO.isReg() && MO.getReg().isVirtual() && isTileRegister(MRI, Reg: MO.getReg()))
442 return true;
443 return false;
444}
445
446void X86FastPreTileConfigImpl::canonicalizePHIs(MachineBasicBlock &MBB) {
447 SmallVector<MachineInstr *, 8> PHIs;
448
449 for (MachineInstr &MI : MBB) {
450 if (!MI.isPHI())
451 break;
452 if (!isTileRegDef(MRI, MI))
453 continue;
454 PHIs.push_back(Elt: &MI);
455 }
456 // Canonicalize the phi node first. One tile phi may depeneds previous
457 // phi node. For below case, we need convert %t4.
458 //
459 // BB0:
460 // %t3 = phi (t1 BB1, t2 BB0)
461 // %t4 = phi (t5 BB1, t3 BB0)
462 // -->
463 // %t3 = phi (t1 BB1, t2 BB0)
464 // %t4 = phi (t5 BB1, t2 BB0)
465 //
466 while (!PHIs.empty()) {
467 MachineInstr *PHI = PHIs.pop_back_val();
468
469 // Find the operand that is incoming from the same MBB and the def
470 // is also phi node.
471 MachineOperand *InMO = nullptr;
472 MachineInstr *DefMI = nullptr;
473 for (unsigned I = 1, E = PHI->getNumOperands(); I != E; I += 2) {
474 Register InTileReg = PHI->getOperand(i: I).getReg();
475 MachineBasicBlock *InMBB = PHI->getOperand(i: I + 1).getMBB();
476 DefMI = MRI->getVRegDef(Reg: InTileReg);
477 if (InMBB != &MBB || !DefMI->isPHI())
478 continue;
479
480 InMO = &PHI->getOperand(i: I);
481 break;
482 }
483 // If can't find such operand, do nothing.
484 if (!InMO)
485 continue;
486
487 // Current phi node depends on previous phi node. Break the
488 // dependency.
489 Register DefTileReg;
490 for (unsigned I = 1, E = DefMI->getNumOperands(); I != E; I += 2) {
491 MachineBasicBlock *InMBB = PHI->getOperand(i: I + 1).getMBB();
492 if (InMBB != &MBB)
493 continue;
494 DefTileReg = DefMI->getOperand(i: I).getReg();
495 InMO->setReg(DefTileReg);
496 break;
497 }
498 }
499}
500
501void X86FastPreTileConfigImpl::convertPHIs(MachineBasicBlock &MBB) {
502 SmallVector<MachineInstr *, 8> PHIs;
503 for (MachineInstr &MI : MBB) {
504 if (!MI.isPHI())
505 break;
506 if (!isTileRegDef(MRI, MI))
507 continue;
508 PHIs.push_back(Elt: &MI);
509 }
510 while (!PHIs.empty()) {
511 MachineInstr *MI = PHIs.pop_back_val();
512 VisitedPHIs.clear();
513 convertPHI(MBB: &MBB, PHI&: *MI);
514 }
515}
516
517// PreTileConfig should configure the tile registers based on basic
518// block.
519bool X86FastPreTileConfigImpl::configBasicBlock(MachineBasicBlock &MBB) {
520 this->MBB = &MBB;
521 bool Change = false;
522 MachineInstr *LastShapeMI = nullptr;
523 MachineInstr *LastTileCfg = nullptr;
524 bool HasUnconfigTile = false;
525
526 auto Config = [&](MachineInstr &Before) {
527 if (CfgSS == -1)
528 CfgSS = MFI->CreateStackObject(Size: ST->getTileConfigSize(),
529 Alignment: ST->getTileConfigAlignment(), isSpillSlot: false);
530 LastTileCfg = addFrameReference(
531 MIB: BuildMI(BB&: MBB, I&: Before, MIMD: DebugLoc(), MCID: TII->get(Opcode: X86::PLDTILECFGV)), FI: CfgSS);
532 LastShapeMI = nullptr;
533 Change = true;
534 };
535 auto HasTileOperand = [](MachineRegisterInfo *MRI, MachineInstr &MI) {
536 for (const MachineOperand &MO : MI.operands()) {
537 if (!MO.isReg())
538 continue;
539 Register Reg = MO.getReg();
540 if (Reg.isVirtual() && isTileRegister(MRI, Reg))
541 return true;
542 }
543 return false;
544 };
545 for (MachineInstr &MI : reverse(C&: MBB)) {
546 // We have transformed phi node before configuring BB.
547 if (MI.isPHI())
548 break;
549 // Don't collect the shape of used tile, the tile should be defined
550 // before the tile use. Spill and reload would happen if there is only
551 // tile use after ldtilecfg, so the shape can be collected from reload.
552 // Take below code for example. %t would be reloaded before tilestore
553 // call
554 // ....
555 // tilestore %r, %c, %t
556 // -->
557 // call
558 // ldtilecfg
559 // %t = tileload %r, %c
560 // tilestore %r, %c, %t
561 if (HasTileOperand(MRI, MI))
562 HasUnconfigTile = true;
563 // According to AMX ABI, all the tile registers including config register
564 // are volatile. Caller need to save/restore config register.
565 if (MI.isCall() && HasUnconfigTile) {
566 MachineBasicBlock::iterator I;
567 if (LastShapeMI && dominates(MBB, A: MI, B: LastShapeMI))
568 I = ++LastShapeMI->getIterator();
569 else {
570 // Call can overwrite registers like rax, ensure the tile config
571 // instruction is sinked closer to first instruction that uses tile.
572 auto UseIt = MI.getIterator();
573 while (UseIt != MBB.end()) {
574 if (HasTileOperand(MRI, *UseIt))
575 break;
576 ++UseIt;
577 }
578 I = UseIt;
579 }
580 Config(*I);
581 HasUnconfigTile = false;
582 continue;
583 }
584 if (!isTileDef(MRI, MI))
585 continue;
586 //
587 //---------------------------------------------------------------------
588 // Don't handle COPY instruction. If the src and dst of the COPY can be
589 // in the same config in below case, we just check the shape of t0.
590 // def row0
591 // def col0
592 // ldtilecfg
593 // t0 = tielzero(row0, col0)
594 // t1 = copy t0
595 // ...
596 // If the src and dst of the COPY can NOT be in the same config in below
597 // case. Reload would be generated befor the copy instruction.
598 // def row0
599 // def col0
600 // t0 = tielzero(row0, col0)
601 // spill t0
602 // ...
603 // def row1
604 // def col1
605 // ldtilecfg
606 // t1 = tilezero(row1, col1)
607 // reload t0
608 // t1 = copy t0
609 //---------------------------------------------------------------------
610 //
611 // If MI dominate the last shape def instruction, we need insert
612 // ldtilecfg after LastShapeMI now. The config doesn't include
613 // current MI.
614 // def row0
615 // def col0
616 // tilezero(row0, col0) <- MI
617 // def row1
618 // def col1
619 // ldtilecfg <- insert
620 // tilezero(row1, col1)
621 if (LastShapeMI && dominates(MBB, A: MI, B: LastShapeMI))
622 Config(*(++LastShapeMI->getIterator()));
623 MachineOperand *RowMO = &MI.getOperand(i: 1);
624 MachineOperand *ColMO = &MI.getOperand(i: 2);
625 MachineInstr *RowMI = MRI->getVRegDef(Reg: RowMO->getReg());
626 MachineInstr *ColMI = MRI->getVRegDef(Reg: ColMO->getReg());
627 // If the shape is defined in current MBB, check the domination.
628 // FIXME how about loop?
629 if (RowMI->getParent() == &MBB) {
630 if (!LastShapeMI)
631 LastShapeMI = RowMI;
632 else if (dominates(MBB, A: LastShapeMI, B: RowMI))
633 LastShapeMI = RowMI;
634 }
635 if (ColMI->getParent() == &MBB) {
636 if (!LastShapeMI)
637 LastShapeMI = ColMI;
638 else if (dominates(MBB, A: LastShapeMI, B: ColMI))
639 LastShapeMI = ColMI;
640 }
641
642 // If there is user live out of the tilecfg, spill it and reload in
643 // before the user.
644 Register TileReg = MI.getOperand(i: 0).getReg();
645 if (mayLiveOut(VirtReg: TileReg, CfgMI: LastTileCfg))
646 spill(Before: ++MI.getIterator(), VirtReg: TileReg, Kill: false);
647 for (MachineInstr &UseMI : MRI->use_instructions(Reg: TileReg)) {
648 if (UseMI.getParent() == &MBB) {
649 // check user should not across ldtilecfg
650 if (!LastTileCfg || !dominates(MBB, A: LastTileCfg, B: UseMI))
651 continue;
652 // reload befor UseMI
653 reload(UseMI: UseMI.getIterator(), OrigReg: TileReg, RowMO, ColMO);
654 } else {
655 // Don't reload for phi instruction, we handle phi reload separately.
656 // TODO: merge the reload for the same user MBB.
657 if (!UseMI.isPHI())
658 reload(UseMI: UseMI.getIterator(), OrigReg: TileReg, RowMO, ColMO);
659 }
660 }
661 }
662
663 // Configure tile registers at the head of the MBB
664 if (HasUnconfigTile) {
665 MachineInstr *Before;
666 if (LastShapeMI == nullptr || LastShapeMI->isPHI())
667 Before = &*MBB.getFirstNonPHI();
668 else
669 Before = &*(++LastShapeMI->getIterator());
670
671 Config(*Before);
672 }
673
674 return Change;
675}
676
677bool X86FastPreTileConfigImpl::runOnMachineFunction(MachineFunction &MFunc) {
678 X86FI = MFunc.getInfo<X86MachineFunctionInfo>();
679 // Early exit in the common case of non-AMX code.
680 if (X86FI->getAMXProgModel() != AMXProgModelEnum::ManagedRA)
681 return false;
682
683 MF = &MFunc;
684 MRI = &MFunc.getRegInfo();
685 ST = &MFunc.getSubtarget<X86Subtarget>();
686 TII = ST->getInstrInfo();
687 MFI = &MFunc.getFrameInfo();
688 TRI = ST->getRegisterInfo();
689 CfgSS = -1;
690
691 unsigned NumVirtRegs = MRI->getNumVirtRegs();
692
693 StackSlotForVirtReg.resize(S: NumVirtRegs);
694 MayLiveAcrossBlocks.clear();
695 // We will create register during config. *3 is to make sure
696 // the virtual register number doesn't exceed the size of
697 // the bit vector.
698 MayLiveAcrossBlocks.resize(N: NumVirtRegs * 3);
699 bool Change = false;
700 assert(MRI->isSSA());
701
702 // Canonicalize the phi node first.
703 for (MachineBasicBlock &MBB : MFunc)
704 canonicalizePHIs(MBB);
705
706 // Loop over all of the basic blocks in reverse post order and insert
707 // ldtilecfg for tile registers. The reserse post order is to facilitate
708 // PHI node convert.
709 ReversePostOrderTraversal<MachineFunction *> RPOT(MF);
710 for (MachineBasicBlock *MBB : RPOT) {
711 convertPHIs(MBB&: *MBB);
712 Change |= configBasicBlock(MBB&: *MBB);
713 }
714
715 if (Change)
716 InitializeTileConfigStackSpace();
717
718 StackSlotForVirtReg.clear();
719 return Change;
720}
721
722FunctionPass *llvm::createX86FastPreTileConfigLegacyPass() {
723 return new X86FastPreTileConfigLegacy();
724}
725
726bool X86FastPreTileConfigLegacy::runOnMachineFunction(MachineFunction &MF) {
727 X86FastPreTileConfigImpl Impl;
728 return Impl.runOnMachineFunction(MFunc&: MF);
729}
730
731PreservedAnalyses
732X86FastPreTileConfigPass::run(MachineFunction &MF,
733 MachineFunctionAnalysisManager &MFAM) {
734 X86FastPreTileConfigImpl Impl;
735 bool Changed = Impl.runOnMachineFunction(MFunc&: MF);
736 return Changed ? getMachineFunctionPassPreservedAnalyses()
737 : PreservedAnalyses::all();
738}
739