| 1 | //===-- X86FastTileConfig.cpp - Fast Tile Register Configure---------------===// | 
|---|
| 2 | // | 
|---|
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | 
|---|
| 4 | // See https://llvm.org/LICENSE.txt for license information. | 
|---|
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | 
|---|
| 6 | // | 
|---|
| 7 | //===----------------------------------------------------------------------===// | 
|---|
| 8 | // | 
|---|
| 9 | /// \file Pass to config the shape of AMX physical registers | 
|---|
| 10 | /// AMX register need to be configured before use. Before FastRegAllocation pass | 
|---|
| 11 | /// the ldtilecfg instruction is inserted, however at that time we don't | 
|---|
| 12 | /// know the shape of each physical tile registers, because the register | 
|---|
| 13 | /// allocation is not done yet. This pass runs after register allocation | 
|---|
| 14 | /// pass. It collects the shape information of each physical tile register | 
|---|
| 15 | /// and store the shape in the stack slot that is allocated for load config | 
|---|
| 16 | /// to tile config register. | 
|---|
| 17 | // | 
|---|
| 18 | //===----------------------------------------------------------------------===// | 
|---|
| 19 |  | 
|---|
| 20 | #include "X86.h" | 
|---|
| 21 | #include "X86InstrBuilder.h" | 
|---|
| 22 | #include "X86MachineFunctionInfo.h" | 
|---|
| 23 | #include "X86Subtarget.h" | 
|---|
| 24 | #include "llvm/CodeGen/MachineFrameInfo.h" | 
|---|
| 25 | #include "llvm/CodeGen/MachineFunctionPass.h" | 
|---|
| 26 | #include "llvm/CodeGen/MachineInstr.h" | 
|---|
| 27 | #include "llvm/CodeGen/MachineRegisterInfo.h" | 
|---|
| 28 | #include "llvm/CodeGen/Passes.h" | 
|---|
| 29 | #include "llvm/CodeGen/TargetInstrInfo.h" | 
|---|
| 30 | #include "llvm/CodeGen/TargetRegisterInfo.h" | 
|---|
| 31 |  | 
|---|
| 32 | using namespace llvm; | 
|---|
| 33 |  | 
|---|
| 34 | #define DEBUG_TYPE "fasttileconfig" | 
|---|
| 35 |  | 
|---|
| 36 | namespace { | 
|---|
| 37 |  | 
|---|
| 38 | class X86FastTileConfig : public MachineFunctionPass { | 
|---|
| 39 | // context | 
|---|
| 40 | MachineFunction *MF = nullptr; | 
|---|
| 41 | const TargetInstrInfo *TII = nullptr; | 
|---|
| 42 | MachineRegisterInfo *MRI = nullptr; | 
|---|
| 43 | const TargetRegisterInfo *TRI = nullptr; | 
|---|
| 44 | X86MachineFunctionInfo *X86FI = nullptr; | 
|---|
| 45 |  | 
|---|
| 46 | bool configBasicBlock(MachineBasicBlock &MBB); | 
|---|
| 47 |  | 
|---|
| 48 | public: | 
|---|
| 49 | X86FastTileConfig() : MachineFunctionPass(ID) {} | 
|---|
| 50 |  | 
|---|
| 51 | /// Return the pass name. | 
|---|
| 52 | StringRef getPassName() const override { | 
|---|
| 53 | return "Fast Tile Register Configure"; | 
|---|
| 54 | } | 
|---|
| 55 |  | 
|---|
| 56 | void getAnalysisUsage(AnalysisUsage &AU) const override { | 
|---|
| 57 | AU.setPreservesAll(); | 
|---|
| 58 | MachineFunctionPass::getAnalysisUsage(AU); | 
|---|
| 59 | } | 
|---|
| 60 |  | 
|---|
| 61 | /// Perform register allocation. | 
|---|
| 62 | bool runOnMachineFunction(MachineFunction &MFunc) override; | 
|---|
| 63 |  | 
|---|
| 64 | MachineFunctionProperties getRequiredProperties() const override { | 
|---|
| 65 | return MachineFunctionProperties().setNoPHIs(); | 
|---|
| 66 | } | 
|---|
| 67 |  | 
|---|
| 68 | static char ID; | 
|---|
| 69 | }; | 
|---|
| 70 |  | 
|---|
| 71 | } // end anonymous namespace | 
|---|
| 72 |  | 
|---|
| 73 | char X86FastTileConfig::ID = 0; | 
|---|
| 74 |  | 
|---|
| 75 | INITIALIZE_PASS_BEGIN(X86FastTileConfig, DEBUG_TYPE, | 
|---|
| 76 | "Fast Tile Register Configure", false, false) | 
|---|
| 77 | INITIALIZE_PASS_END(X86FastTileConfig, DEBUG_TYPE, | 
|---|
| 78 | "Fast Tile Register Configure", false, false) | 
|---|
| 79 |  | 
|---|
| 80 | static unsigned getNumDefTiles(MachineRegisterInfo *MRI, MachineInstr &MI) { | 
|---|
| 81 | // There is no phi instruction after register allocation. | 
|---|
| 82 | assert(MI.isPHI() == false); | 
|---|
| 83 | // The instruction must have 3 operands: tile def, row, col. | 
|---|
| 84 | // It should be AMX pseudo instruction that have shape operand. | 
|---|
| 85 | if (MI.isDebugInstr() || MI.isCopy() || MI.getNumOperands() < 3 || | 
|---|
| 86 | !MI.isPseudo()) | 
|---|
| 87 | return 0; | 
|---|
| 88 | MachineOperand &MO = MI.getOperand(i: 0); | 
|---|
| 89 |  | 
|---|
| 90 | if (MO.isReg()) { | 
|---|
| 91 | Register Reg = MO.getReg(); | 
|---|
| 92 | // FIXME: It may be used after Greedy RA and the physical | 
|---|
| 93 | // register is not rewritten yet. | 
|---|
| 94 | if (Reg.isVirtual()) { | 
|---|
| 95 | if (MRI->getRegClass(Reg)->getID() == X86::TILERegClassID) | 
|---|
| 96 | return 1; | 
|---|
| 97 | if (MRI->getRegClass(Reg)->getID() == X86::TILEPAIRRegClassID) | 
|---|
| 98 | return 2; | 
|---|
| 99 | } | 
|---|
| 100 | if (Reg >= X86::TMM0 && Reg <= X86::TMM7) | 
|---|
| 101 | return 1; | 
|---|
| 102 | if (Reg >= X86::TMM0_TMM1 && Reg <= X86::TMM6_TMM7) | 
|---|
| 103 | return 2; | 
|---|
| 104 | } | 
|---|
| 105 |  | 
|---|
| 106 | return 0; | 
|---|
| 107 | } | 
|---|
| 108 |  | 
|---|
| 109 | static unsigned getTMMIndex(Register Reg) { | 
|---|
| 110 | if (Reg >= X86::TMM0 && Reg <= X86::TMM7) | 
|---|
| 111 | return Reg - X86::TMM0; | 
|---|
| 112 | if (Reg >= X86::TMM0_TMM1 && Reg <= X86::TMM6_TMM7) | 
|---|
| 113 | return (Reg - X86::TMM0_TMM1) * 2; | 
|---|
| 114 | llvm_unreachable( "Invalid Tmm Reg!"); | 
|---|
| 115 | } | 
|---|
| 116 |  | 
|---|
| 117 | // PreTileConfig should configure the tile registers based on basic | 
|---|
| 118 | // block. | 
|---|
| 119 | bool X86FastTileConfig::configBasicBlock(MachineBasicBlock &MBB) { | 
|---|
| 120 | bool Change = false; | 
|---|
| 121 | SmallVector<std::pair<unsigned, ShapeT>, 6> ShapeInfos; | 
|---|
| 122 | for (MachineInstr &MI : reverse(C&: MBB)) { | 
|---|
| 123 | unsigned DefNum = getNumDefTiles(MRI, MI); | 
|---|
| 124 | if (DefNum == 0 && MI.getOpcode() != X86::PLDTILECFGV) | 
|---|
| 125 | continue; | 
|---|
| 126 | // AMX instructions that define tile register. | 
|---|
| 127 | if (MI.getOpcode() != X86::PLDTILECFGV) { | 
|---|
| 128 | MachineOperand &Row = MI.getOperand(i: 1); | 
|---|
| 129 | unsigned TMMIdx = getTMMIndex(Reg: MI.getOperand(i: 0).getReg()); | 
|---|
| 130 | for (unsigned I = 0; I < DefNum; I++) { | 
|---|
| 131 | MachineOperand &Col = MI.getOperand(i: 2 + I); | 
|---|
| 132 | ShapeInfos.push_back(Elt: {TMMIdx + I, ShapeT(&Row, &Col)}); | 
|---|
| 133 | } | 
|---|
| 134 | } else { // PLDTILECFGV | 
|---|
| 135 | // Rewrite the shape information to memory. Stack slot should have | 
|---|
| 136 | // been initialized to zero in pre config. | 
|---|
| 137 | int SS = MI.getOperand(i: 0).getIndex(); // tile config stack slot. | 
|---|
| 138 | for (auto &ShapeInfo : ShapeInfos) { | 
|---|
| 139 | DebugLoc DL; | 
|---|
| 140 | unsigned TMMIdx = ShapeInfo.first; | 
|---|
| 141 | Register RowReg = ShapeInfo.second.getRow()->getReg(); | 
|---|
| 142 | Register ColReg = ShapeInfo.second.getCol()->getReg(); | 
|---|
| 143 | // Here is the data format for the tile config. | 
|---|
| 144 | // 0      palette | 
|---|
| 145 | // 1      start_row | 
|---|
| 146 | // 2-15   reserved, must be zero | 
|---|
| 147 | // 16-17  tile0.colsb Tile 0 bytes per row. | 
|---|
| 148 | // 18-19  tile1.colsb Tile 1 bytes per row. | 
|---|
| 149 | // 20-21  tile2.colsb Tile 2 bytes per row. | 
|---|
| 150 | // ... (sequence continues) | 
|---|
| 151 | // 30-31  tile7.colsb Tile 7 bytes per row. | 
|---|
| 152 | // 32-47  reserved, must be zero | 
|---|
| 153 | // 48     tile0.rows Tile 0 rows. | 
|---|
| 154 | // 49     tile1.rows Tile 1 rows. | 
|---|
| 155 | // 50     tile2.rows Tile 2 rows. | 
|---|
| 156 | // ... (sequence continues) | 
|---|
| 157 | // 55     tile7.rows Tile 7 rows. | 
|---|
| 158 | // 56-63  reserved, must be zero | 
|---|
| 159 | int RowOffset = 48 + TMMIdx; | 
|---|
| 160 | int ColOffset = 16 + TMMIdx * 2; | 
|---|
| 161 |  | 
|---|
| 162 | Register SubRowReg = TRI->getSubReg(Reg: RowReg, Idx: X86::sub_8bit); | 
|---|
| 163 | BuildMI(BB&: MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: X86::IMPLICIT_DEF), DestReg: SubRowReg); | 
|---|
| 164 | MachineInstrBuilder StoreRow = | 
|---|
| 165 | BuildMI(BB&: MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: X86::MOV8mr)); | 
|---|
| 166 | addFrameReference(MIB: StoreRow, FI: SS, Offset: RowOffset).addReg(RegNo: SubRowReg); | 
|---|
| 167 |  | 
|---|
| 168 | MachineInstrBuilder StoreCol = | 
|---|
| 169 | BuildMI(BB&: MBB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: X86::MOV16mr)); | 
|---|
| 170 | addFrameReference(MIB: StoreCol, FI: SS, Offset: ColOffset).addReg(RegNo: ColReg); | 
|---|
| 171 | } | 
|---|
| 172 | ShapeInfos.clear(); | 
|---|
| 173 | Change = true; | 
|---|
| 174 | } | 
|---|
| 175 | } | 
|---|
| 176 |  | 
|---|
| 177 | return Change; | 
|---|
| 178 | } | 
|---|
| 179 |  | 
|---|
| 180 | bool X86FastTileConfig::runOnMachineFunction(MachineFunction &MFunc) { | 
|---|
| 181 | X86FI = MFunc.getInfo<X86MachineFunctionInfo>(); | 
|---|
| 182 | // Early exit in the common case of non-AMX code. | 
|---|
| 183 | if (X86FI->getAMXProgModel() != AMXProgModelEnum::ManagedRA) | 
|---|
| 184 | return false; | 
|---|
| 185 |  | 
|---|
| 186 | MF = &MFunc; | 
|---|
| 187 | MRI = &MFunc.getRegInfo(); | 
|---|
| 188 | const TargetSubtargetInfo *ST = &MFunc.getSubtarget<X86Subtarget>(); | 
|---|
| 189 | TRI = ST->getRegisterInfo(); | 
|---|
| 190 | TII = MFunc.getSubtarget().getInstrInfo(); | 
|---|
| 191 | bool Change = false; | 
|---|
| 192 |  | 
|---|
| 193 | // Loop over all of the basic blocks, eliminating virtual register references | 
|---|
| 194 | for (MachineBasicBlock &MBB : MFunc) | 
|---|
| 195 | Change |= configBasicBlock(MBB); | 
|---|
| 196 |  | 
|---|
| 197 | return Change; | 
|---|
| 198 | } | 
|---|
| 199 |  | 
|---|
| 200 | FunctionPass *llvm::createX86FastTileConfigPass() { | 
|---|
| 201 | return new X86FastTileConfig(); | 
|---|
| 202 | } | 
|---|
| 203 |  | 
|---|