1 | //===-- X86FastTileConfig.cpp - Fast Tile Register Configure---------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | /// \file Pass to config the shape of AMX physical registers |
10 | /// AMX register need to be configured before use. Before FastRegAllocation pass |
11 | /// the ldtilecfg instruction is inserted, however at that time we don't |
12 | /// know the shape of each physical tile registers, because the register |
13 | /// allocation is not done yet. This pass runs after register allocation |
14 | /// pass. It collects the shape information of each physical tile register |
15 | /// and store the shape in the stack slot that is allocated for load config |
16 | /// to tile config register. |
17 | // |
18 | //===----------------------------------------------------------------------===// |
19 | |
20 | #include "X86.h" |
21 | #include "X86InstrBuilder.h" |
22 | #include "X86MachineFunctionInfo.h" |
23 | #include "X86Subtarget.h" |
24 | #include "llvm/CodeGen/MachineFrameInfo.h" |
25 | #include "llvm/CodeGen/MachineFunctionPass.h" |
26 | #include "llvm/CodeGen/MachineInstr.h" |
27 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
28 | #include "llvm/CodeGen/Passes.h" |
29 | #include "llvm/CodeGen/TargetInstrInfo.h" |
30 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
31 | |
32 | using namespace llvm; |
33 | |
34 | #define DEBUG_TYPE "fasttileconfig" |
35 | |
36 | namespace { |
37 | |
38 | class X86FastTileConfig : public MachineFunctionPass { |
39 | // context |
40 | MachineFunction *MF = nullptr; |
41 | const TargetInstrInfo *TII = nullptr; |
42 | MachineRegisterInfo *MRI = nullptr; |
43 | const TargetRegisterInfo *TRI = nullptr; |
44 | X86MachineFunctionInfo *X86FI = nullptr; |
45 | |
46 | bool configBasicBlock(MachineBasicBlock &MBB); |
47 | |
48 | public: |
49 | X86FastTileConfig() : MachineFunctionPass(ID) {} |
50 | |
51 | /// Return the pass name. |
52 | StringRef getPassName() const override { |
53 | return "Fast Tile Register Configure" ; |
54 | } |
55 | |
56 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
57 | AU.setPreservesAll(); |
58 | MachineFunctionPass::getAnalysisUsage(AU); |
59 | } |
60 | |
61 | /// Perform register allocation. |
62 | bool runOnMachineFunction(MachineFunction &MFunc) override; |
63 | |
64 | MachineFunctionProperties getRequiredProperties() const override { |
65 | return MachineFunctionProperties().setNoPHIs(); |
66 | } |
67 | |
68 | static char ID; |
69 | }; |
70 | |
71 | } // end anonymous namespace |
72 | |
73 | char X86FastTileConfig::ID = 0; |
74 | |
75 | INITIALIZE_PASS_BEGIN(X86FastTileConfig, DEBUG_TYPE, |
76 | "Fast Tile Register Configure" , false, false) |
77 | INITIALIZE_PASS_END(X86FastTileConfig, DEBUG_TYPE, |
78 | "Fast Tile Register Configure" , false, false) |
79 | |
80 | static unsigned getNumDefTiles(MachineRegisterInfo *MRI, MachineInstr &MI) { |
81 | // There is no phi instruction after register allocation. |
82 | assert(MI.isPHI() == false); |
83 | // The instruction must have 3 operands: tile def, row, col. |
84 | // It should be AMX pseudo instruction that have shape operand. |
85 | if (MI.isDebugInstr() || MI.isCopy() || MI.getNumOperands() < 3 || |
86 | !MI.isPseudo()) |
87 | return 0; |
88 | MachineOperand &MO = MI.getOperand(i: 0); |
89 | |
90 | if (MO.isReg()) { |
91 | Register Reg = MO.getReg(); |
92 | // FIXME: It may be used after Greedy RA and the physical |
93 | // register is not rewritten yet. |
94 | if (Reg.isVirtual()) { |
95 | if (MRI->getRegClass(Reg)->getID() == X86::TILERegClassID) |
96 | return 1; |
97 | if (MRI->getRegClass(Reg)->getID() == X86::TILEPAIRRegClassID) |
98 | return 2; |
99 | } |
100 | if (Reg >= X86::TMM0 && Reg <= X86::TMM7) |
101 | return 1; |
102 | if (Reg >= X86::TMM0_TMM1 && Reg <= X86::TMM6_TMM7) |
103 | return 2; |
104 | } |
105 | |
106 | return 0; |
107 | } |
108 | |
109 | static unsigned getTMMIndex(Register Reg) { |
110 | if (Reg >= X86::TMM0 && Reg <= X86::TMM7) |
111 | return Reg - X86::TMM0; |
112 | if (Reg >= X86::TMM0_TMM1 && Reg <= X86::TMM6_TMM7) |
113 | return (Reg - X86::TMM0_TMM1) * 2; |
114 | llvm_unreachable("Invalid Tmm Reg!" ); |
115 | } |
116 | |
117 | // PreTileConfig should configure the tile registers based on basic |
118 | // block. |
119 | bool X86FastTileConfig::configBasicBlock(MachineBasicBlock &MBB) { |
120 | bool Change = false; |
121 | SmallVector<std::pair<unsigned, ShapeT>, 6> ShapeInfos; |
122 | for (MachineInstr &MI : reverse(C&: MBB)) { |
123 | unsigned DefNum = getNumDefTiles(MRI, MI); |
124 | if (DefNum == 0 && MI.getOpcode() != X86::PLDTILECFGV) |
125 | continue; |
126 | // AMX instructions that define tile register. |
127 | if (MI.getOpcode() != X86::PLDTILECFGV) { |
128 | MachineOperand &Row = MI.getOperand(i: 1); |
129 | unsigned TMMIdx = getTMMIndex(Reg: MI.getOperand(i: 0).getReg()); |
130 | for (unsigned I = 0; I < DefNum; I++) { |
131 | MachineOperand &Col = MI.getOperand(i: 2 + I); |
132 | ShapeInfos.push_back(Elt: {TMMIdx + I, ShapeT(&Row, &Col)}); |
133 | } |
134 | } else { // PLDTILECFGV |
135 | // Rewrite the shape information to memory. Stack slot should have |
136 | // been initialized to zero in pre config. |
137 | int SS = MI.getOperand(i: 0).getIndex(); // tile config stack slot. |
138 | for (auto &ShapeInfo : ShapeInfos) { |
139 | DebugLoc DL; |
140 | unsigned TMMIdx = ShapeInfo.first; |
141 | Register RowReg = ShapeInfo.second.getRow()->getReg(); |
142 | Register ColReg = ShapeInfo.second.getCol()->getReg(); |
143 | // Here is the data format for the tile config. |
144 | // 0 palette |
145 | // 1 start_row |
146 | // 2-15 reserved, must be zero |
147 | // 16-17 tile0.colsb Tile 0 bytes per row. |
148 | // 18-19 tile1.colsb Tile 1 bytes per row. |
149 | // 20-21 tile2.colsb Tile 2 bytes per row. |
150 | // ... (sequence continues) |
151 | // 30-31 tile7.colsb Tile 7 bytes per row. |
152 | // 32-47 reserved, must be zero |
153 | // 48 tile0.rows Tile 0 rows. |
154 | // 49 tile1.rows Tile 1 rows. |
155 | // 50 tile2.rows Tile 2 rows. |
156 | // ... (sequence continues) |
157 | // 55 tile7.rows Tile 7 rows. |
158 | // 56-63 reserved, must be zero |
159 | int RowOffset = 48 + TMMIdx; |
160 | int ColOffset = 16 + TMMIdx * 2; |
161 | |
162 | Register SubRowReg = TRI->getSubReg(Reg: RowReg, Idx: X86::sub_8bit); |
163 | BuildMI(BB&: MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: X86::IMPLICIT_DEF), DestReg: SubRowReg); |
164 | MachineInstrBuilder StoreRow = |
165 | BuildMI(BB&: MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: X86::MOV8mr)); |
166 | addFrameReference(MIB: StoreRow, FI: SS, Offset: RowOffset).addReg(RegNo: SubRowReg); |
167 | |
168 | MachineInstrBuilder StoreCol = |
169 | BuildMI(BB&: MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: X86::MOV16mr)); |
170 | addFrameReference(MIB: StoreCol, FI: SS, Offset: ColOffset).addReg(RegNo: ColReg); |
171 | } |
172 | ShapeInfos.clear(); |
173 | Change = true; |
174 | } |
175 | } |
176 | |
177 | return Change; |
178 | } |
179 | |
180 | bool X86FastTileConfig::runOnMachineFunction(MachineFunction &MFunc) { |
181 | X86FI = MFunc.getInfo<X86MachineFunctionInfo>(); |
182 | // Early exit in the common case of non-AMX code. |
183 | if (X86FI->getAMXProgModel() != AMXProgModelEnum::ManagedRA) |
184 | return false; |
185 | |
186 | MF = &MFunc; |
187 | MRI = &MFunc.getRegInfo(); |
188 | const TargetSubtargetInfo *ST = &MFunc.getSubtarget<X86Subtarget>(); |
189 | TRI = ST->getRegisterInfo(); |
190 | TII = MFunc.getSubtarget().getInstrInfo(); |
191 | bool Change = false; |
192 | |
193 | // Loop over all of the basic blocks, eliminating virtual register references |
194 | for (MachineBasicBlock &MBB : MFunc) |
195 | Change |= configBasicBlock(MBB); |
196 | |
197 | return Change; |
198 | } |
199 | |
200 | FunctionPass *llvm::createX86FastTileConfigPass() { |
201 | return new X86FastTileConfig(); |
202 | } |
203 | |