1//===-- X86FastTileConfig.cpp - Fast Tile Register Configure---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file Pass to config the shape of AMX physical registers
10/// AMX register need to be configured before use. Before FastRegAllocation pass
11/// the ldtilecfg instruction is inserted, however at that time we don't
12/// know the shape of each physical tile registers, because the register
13/// allocation is not done yet. This pass runs after register allocation
14/// pass. It collects the shape information of each physical tile register
15/// and store the shape in the stack slot that is allocated for load config
16/// to tile config register.
17//
18//===----------------------------------------------------------------------===//
19
20#include "X86.h"
21#include "X86InstrBuilder.h"
22#include "X86MachineFunctionInfo.h"
23#include "X86Subtarget.h"
24#include "llvm/CodeGen/MachineFrameInfo.h"
25#include "llvm/CodeGen/MachineFunctionPass.h"
26#include "llvm/CodeGen/MachineInstr.h"
27#include "llvm/CodeGen/MachineRegisterInfo.h"
28#include "llvm/CodeGen/Passes.h"
29#include "llvm/CodeGen/TargetInstrInfo.h"
30#include "llvm/CodeGen/TargetRegisterInfo.h"
31
32using namespace llvm;
33
34#define DEBUG_TYPE "x86-fast-tile-config"
35
36namespace {
37
38class X86FastTileConfigImpl {
39public:
40 bool runOnMachineFunction(MachineFunction &MF);
41
42private:
43 // context
44 MachineFunction *MF = nullptr;
45 const TargetInstrInfo *TII = nullptr;
46 MachineRegisterInfo *MRI = nullptr;
47 const TargetRegisterInfo *TRI = nullptr;
48 X86MachineFunctionInfo *X86FI = nullptr;
49
50 bool configBasicBlock(MachineBasicBlock &MBB);
51};
52
53class X86FastTileConfigLegacy : public MachineFunctionPass {
54public:
55 X86FastTileConfigLegacy() : MachineFunctionPass(ID) {}
56
57 /// Return the pass name.
58 StringRef getPassName() const override {
59 return "Fast Tile Register Configure";
60 }
61
62 void getAnalysisUsage(AnalysisUsage &AU) const override {
63 AU.setPreservesAll();
64 MachineFunctionPass::getAnalysisUsage(AU);
65 }
66
67 /// Perform register allocation.
68 bool runOnMachineFunction(MachineFunction &MFunc) override;
69
70 MachineFunctionProperties getRequiredProperties() const override {
71 return MachineFunctionProperties().setNoPHIs();
72 }
73
74 static char ID;
75};
76
77} // end anonymous namespace
78
79char X86FastTileConfigLegacy::ID = 0;
80
81INITIALIZE_PASS_BEGIN(X86FastTileConfigLegacy, DEBUG_TYPE,
82 "Fast Tile Register Configure", false, false)
83INITIALIZE_PASS_END(X86FastTileConfigLegacy, DEBUG_TYPE,
84 "Fast Tile Register Configure", false, false)
85
86static bool isTileDef(MachineRegisterInfo *MRI, MachineInstr &MI) {
87 // There is no phi instruction after register allocation.
88 assert(MI.isPHI() == false);
89 // The instruction must have 3 operands: tile def, row, col.
90 // It should be AMX pseudo instruction that have shape operand.
91 if (MI.isDebugInstr() || MI.isCopy() || MI.getNumOperands() < 3 ||
92 !MI.isPseudo())
93 return false;
94 MachineOperand &MO = MI.getOperand(i: 0);
95
96 if (MO.isReg()) {
97 Register Reg = MO.getReg();
98 // FIXME: It may be used after Greedy RA and the physical
99 // register is not rewritten yet.
100 if (Reg.isVirtual()) {
101 if (MRI->getRegClass(Reg)->getID() == X86::TILERegClassID)
102 return true;
103 }
104 if (Reg >= X86::TMM0 && Reg <= X86::TMM7)
105 return true;
106 }
107
108 return false;
109}
110
111static unsigned getTMMIndex(Register Reg) {
112 if (Reg >= X86::TMM0 && Reg <= X86::TMM7)
113 return Reg - X86::TMM0;
114 llvm_unreachable("Invalid Tmm Reg!");
115}
116
117// PreTileConfig should configure the tile registers based on basic
118// block.
119bool X86FastTileConfigImpl::configBasicBlock(MachineBasicBlock &MBB) {
120 bool Change = false;
121 SmallVector<std::pair<unsigned, ShapeT>, 6> ShapeInfos;
122 for (MachineInstr &MI : reverse(C&: MBB)) {
123 if (!isTileDef(MRI, MI) && MI.getOpcode() != X86::PLDTILECFGV)
124 continue;
125 // AMX instructions that define tile register.
126 if (MI.getOpcode() != X86::PLDTILECFGV) {
127 MachineOperand &Row = MI.getOperand(i: 1);
128 unsigned TMMIdx = getTMMIndex(Reg: MI.getOperand(i: 0).getReg());
129 MachineOperand &Col = MI.getOperand(i: 2);
130 ShapeInfos.push_back(Elt: {TMMIdx, ShapeT(&Row, &Col)});
131 } else { // PLDTILECFGV
132 // Rewrite the shape information to memory. Stack slot should have
133 // been initialized to zero in pre config.
134 int SS = MI.getOperand(i: 0).getIndex(); // tile config stack slot.
135 for (auto &ShapeInfo : ShapeInfos) {
136 DebugLoc DL;
137 unsigned TMMIdx = ShapeInfo.first;
138 Register RowReg = ShapeInfo.second.getRow()->getReg();
139 Register ColReg = ShapeInfo.second.getCol()->getReg();
140 // Here is the data format for the tile config.
141 // 0 palette
142 // 1 start_row
143 // 2-15 reserved, must be zero
144 // 16-17 tile0.colsb Tile 0 bytes per row.
145 // 18-19 tile1.colsb Tile 1 bytes per row.
146 // 20-21 tile2.colsb Tile 2 bytes per row.
147 // ... (sequence continues)
148 // 30-31 tile7.colsb Tile 7 bytes per row.
149 // 32-47 reserved, must be zero
150 // 48 tile0.rows Tile 0 rows.
151 // 49 tile1.rows Tile 1 rows.
152 // 50 tile2.rows Tile 2 rows.
153 // ... (sequence continues)
154 // 55 tile7.rows Tile 7 rows.
155 // 56-63 reserved, must be zero
156 int RowOffset = 48 + TMMIdx;
157 int ColOffset = 16 + TMMIdx * 2;
158
159 Register SubRowReg = TRI->getSubReg(Reg: RowReg, Idx: X86::sub_8bit);
160 BuildMI(BB&: MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: X86::IMPLICIT_DEF), DestReg: SubRowReg);
161 MachineInstrBuilder StoreRow =
162 BuildMI(BB&: MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: X86::MOV8mr));
163 addFrameReference(MIB: StoreRow, FI: SS, Offset: RowOffset).addReg(RegNo: SubRowReg);
164
165 MachineInstrBuilder StoreCol =
166 BuildMI(BB&: MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: X86::MOV16mr));
167 addFrameReference(MIB: StoreCol, FI: SS, Offset: ColOffset).addReg(RegNo: ColReg);
168 }
169 ShapeInfos.clear();
170 Change = true;
171 }
172 }
173
174 return Change;
175}
176
177bool X86FastTileConfigImpl::runOnMachineFunction(MachineFunction &MFunc) {
178 X86FI = MFunc.getInfo<X86MachineFunctionInfo>();
179 // Early exit in the common case of non-AMX code.
180 if (X86FI->getAMXProgModel() != AMXProgModelEnum::ManagedRA)
181 return false;
182
183 MF = &MFunc;
184 MRI = &MFunc.getRegInfo();
185 const TargetSubtargetInfo *ST = &MFunc.getSubtarget<X86Subtarget>();
186 TRI = ST->getRegisterInfo();
187 TII = MFunc.getSubtarget().getInstrInfo();
188 bool Change = false;
189
190 // Loop over all of the basic blocks, eliminating virtual register references
191 for (MachineBasicBlock &MBB : MFunc)
192 Change |= configBasicBlock(MBB);
193
194 return Change;
195}
196
197FunctionPass *llvm::createX86FastTileConfigLegacyPass() {
198 return new X86FastTileConfigLegacy();
199}
200
201bool X86FastTileConfigLegacy::runOnMachineFunction(MachineFunction &MF) {
202 X86FastTileConfigImpl Impl;
203 return Impl.runOnMachineFunction(MFunc&: MF);
204}
205
206PreservedAnalyses
207X86FastTileConfigPass::run(MachineFunction &MF,
208 MachineFunctionAnalysisManager &MFAM) {
209 X86FastTileConfigImpl Impl;
210 Impl.runOnMachineFunction(MFunc&: MF);
211 return getMachineFunctionPassPreservedAnalyses().preserveSet<CFGAnalyses>();
212}
213