1 | //===-- X86FastTileConfig.cpp - Fast Tile Register Configure---------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | /// \file Pass to config the shape of AMX physical registers |
10 | /// AMX register need to be configured before use. Before FastRegAllocation pass |
11 | /// the ldtilecfg instruction is inserted, however at that time we don't |
12 | /// know the shape of each physical tile registers, because the register |
13 | /// allocation is not done yet. This pass runs after register allocation |
14 | /// pass. It collects the shape information of each physical tile register |
15 | /// and store the shape in the stack slot that is allocated for load config |
16 | /// to tile config register. |
17 | // |
18 | //===----------------------------------------------------------------------===// |
19 | |
20 | #include "X86.h" |
21 | #include "X86InstrBuilder.h" |
22 | #include "X86MachineFunctionInfo.h" |
23 | #include "X86RegisterInfo.h" |
24 | #include "X86Subtarget.h" |
25 | #include "llvm/CodeGen/MachineFrameInfo.h" |
26 | #include "llvm/CodeGen/MachineFunctionPass.h" |
27 | #include "llvm/CodeGen/MachineInstr.h" |
28 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
29 | #include "llvm/CodeGen/Passes.h" |
30 | #include "llvm/CodeGen/TargetInstrInfo.h" |
31 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
32 | #include "llvm/InitializePasses.h" |
33 | |
34 | using namespace llvm; |
35 | |
36 | #define DEBUG_TYPE "fasttileconfig" |
37 | |
38 | namespace { |
39 | |
40 | class X86FastTileConfig : public MachineFunctionPass { |
41 | // context |
42 | MachineFunction *MF = nullptr; |
43 | const TargetInstrInfo *TII = nullptr; |
44 | MachineRegisterInfo *MRI = nullptr; |
45 | const TargetRegisterInfo *TRI = nullptr; |
46 | X86MachineFunctionInfo *X86FI = nullptr; |
47 | |
48 | bool configBasicBlock(MachineBasicBlock &MBB); |
49 | |
50 | public: |
51 | X86FastTileConfig() : MachineFunctionPass(ID) {} |
52 | |
53 | /// Return the pass name. |
54 | StringRef getPassName() const override { |
55 | return "Fast Tile Register Configure" ; |
56 | } |
57 | |
58 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
59 | AU.setPreservesAll(); |
60 | MachineFunctionPass::getAnalysisUsage(AU); |
61 | } |
62 | |
63 | /// Perform register allocation. |
64 | bool runOnMachineFunction(MachineFunction &MFunc) override; |
65 | |
66 | MachineFunctionProperties getRequiredProperties() const override { |
67 | return MachineFunctionProperties().set( |
68 | MachineFunctionProperties::Property::NoPHIs); |
69 | } |
70 | |
71 | static char ID; |
72 | }; |
73 | |
74 | } // end anonymous namespace |
75 | |
76 | char X86FastTileConfig::ID = 0; |
77 | |
78 | INITIALIZE_PASS_BEGIN(X86FastTileConfig, DEBUG_TYPE, |
79 | "Fast Tile Register Configure" , false, false) |
80 | INITIALIZE_PASS_END(X86FastTileConfig, DEBUG_TYPE, |
81 | "Fast Tile Register Configure" , false, false) |
82 | |
83 | static bool isTileDef(MachineRegisterInfo *MRI, MachineInstr &MI) { |
84 | // There is no phi instruction after register allocation. |
85 | assert(MI.isPHI() == false); |
86 | // The instruction must have 3 operands: tile def, row, col. |
87 | // It should be AMX pseudo instruction that have shape operand. |
88 | if (MI.isDebugInstr() || MI.isCopy() || MI.getNumOperands() < 3 || |
89 | !MI.isPseudo()) |
90 | return false; |
91 | MachineOperand &MO = MI.getOperand(i: 0); |
92 | |
93 | if (MO.isReg()) { |
94 | Register Reg = MO.getReg(); |
95 | // FIXME it may be used after Greedy RA and the physical |
96 | // register is not rewritten yet. |
97 | if (Reg.isVirtual() && |
98 | MRI->getRegClass(Reg)->getID() == X86::TILERegClassID) |
99 | return true; |
100 | if (Reg >= X86::TMM0 && Reg <= X86::TMM7) |
101 | return true; |
102 | } |
103 | |
104 | return false; |
105 | } |
106 | |
107 | // PreTileConfig should configure the tile registers based on basic |
108 | // block. |
109 | bool X86FastTileConfig::configBasicBlock(MachineBasicBlock &MBB) { |
110 | bool Change = false; |
111 | SmallVector<std::pair<unsigned, ShapeT>, 6> ShapeInfos; |
112 | for (MachineInstr &MI : reverse(C&: MBB)) { |
113 | if (!isTileDef(MRI, MI) && MI.getOpcode() != X86::PLDTILECFGV) |
114 | continue; |
115 | // AMX instructions that define tile register. |
116 | if (MI.getOpcode() != X86::PLDTILECFGV) { |
117 | MachineOperand &Row = MI.getOperand(i: 1); |
118 | MachineOperand &Col = MI.getOperand(i: 2); |
119 | unsigned TMMIdx = MI.getOperand(i: 0).getReg() - X86::TMM0; |
120 | ShapeInfos.push_back(Elt: {TMMIdx, ShapeT(&Row, &Col)}); |
121 | } else { // PLDTILECFGV |
122 | // Rewrite the shape information to memory. Stack slot should have |
123 | // been initialized to zero in pre config. |
124 | int SS = MI.getOperand(i: 0).getIndex(); // tile config stack slot. |
125 | for (auto &ShapeInfo : ShapeInfos) { |
126 | DebugLoc DL; |
127 | unsigned TMMIdx = ShapeInfo.first; |
128 | Register RowReg = ShapeInfo.second.getRow()->getReg(); |
129 | Register ColReg = ShapeInfo.second.getCol()->getReg(); |
130 | // Here is the data format for the tile config. |
131 | // 0 palette |
132 | // 1 start_row |
133 | // 2-15 reserved, must be zero |
134 | // 16-17 tile0.colsb Tile 0 bytes per row. |
135 | // 18-19 tile1.colsb Tile 1 bytes per row. |
136 | // 20-21 tile2.colsb Tile 2 bytes per row. |
137 | // ... (sequence continues) |
138 | // 30-31 tile7.colsb Tile 7 bytes per row. |
139 | // 32-47 reserved, must be zero |
140 | // 48 tile0.rows Tile 0 rows. |
141 | // 49 tile1.rows Tile 1 rows. |
142 | // 50 tile2.rows Tile 2 rows. |
143 | // ... (sequence continues) |
144 | // 55 tile7.rows Tile 7 rows. |
145 | // 56-63 reserved, must be zero |
146 | int RowOffset = 48 + TMMIdx; |
147 | int ColOffset = 16 + TMMIdx * 2; |
148 | |
149 | Register SubRowReg = TRI->getSubReg(Reg: RowReg, Idx: X86::sub_8bit); |
150 | BuildMI(BB&: MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: X86::IMPLICIT_DEF), DestReg: SubRowReg); |
151 | MachineInstrBuilder StoreRow = |
152 | BuildMI(BB&: MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: X86::MOV8mr)); |
153 | addFrameReference(MIB: StoreRow, FI: SS, Offset: RowOffset).addReg(RegNo: SubRowReg); |
154 | |
155 | MachineInstrBuilder StoreCol = |
156 | BuildMI(BB&: MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: X86::MOV16mr)); |
157 | addFrameReference(MIB: StoreCol, FI: SS, Offset: ColOffset).addReg(RegNo: ColReg); |
158 | } |
159 | ShapeInfos.clear(); |
160 | Change = true; |
161 | } |
162 | } |
163 | |
164 | return Change; |
165 | } |
166 | |
167 | bool X86FastTileConfig::runOnMachineFunction(MachineFunction &MFunc) { |
168 | X86FI = MFunc.getInfo<X86MachineFunctionInfo>(); |
169 | // Early exit in the common case of non-AMX code. |
170 | if (X86FI->getAMXProgModel() != AMXProgModelEnum::ManagedRA) |
171 | return false; |
172 | |
173 | MF = &MFunc; |
174 | MRI = &MFunc.getRegInfo(); |
175 | const TargetSubtargetInfo *ST = &MFunc.getSubtarget<X86Subtarget>(); |
176 | TRI = ST->getRegisterInfo(); |
177 | TII = MFunc.getSubtarget().getInstrInfo(); |
178 | bool Change = false; |
179 | |
180 | // Loop over all of the basic blocks, eliminating virtual register references |
181 | for (MachineBasicBlock &MBB : MFunc) |
182 | Change |= configBasicBlock(MBB); |
183 | |
184 | return Change; |
185 | } |
186 | |
187 | FunctionPass *llvm::createX86FastTileConfigPass() { |
188 | return new X86FastTileConfig(); |
189 | } |
190 | |