| 1 | //===-- MVEVPTBlockPass.cpp - Insert MVE VPT blocks -----------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include "ARM.h" |
| 10 | #include "ARMMachineFunctionInfo.h" |
| 11 | #include "ARMSubtarget.h" |
| 12 | #include "Thumb2InstrInfo.h" |
| 13 | #include "llvm/ADT/SmallVector.h" |
| 14 | #include "llvm/ADT/Statistic.h" |
| 15 | #include "llvm/ADT/StringRef.h" |
| 16 | #include "llvm/CodeGen/MachineBasicBlock.h" |
| 17 | #include "llvm/CodeGen/MachineFunction.h" |
| 18 | #include "llvm/CodeGen/MachineFunctionPass.h" |
| 19 | #include "llvm/CodeGen/MachineInstr.h" |
| 20 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
| 21 | #include "llvm/CodeGen/MachineInstrBundle.h" |
| 22 | #include "llvm/CodeGen/MachineOperand.h" |
| 23 | #include "llvm/IR/DebugLoc.h" |
| 24 | #include "llvm/Support/Debug.h" |
| 25 | #include <cassert> |
| 26 | #include <new> |
| 27 | |
| 28 | using namespace llvm; |
| 29 | |
| 30 | #define DEBUG_TYPE "arm-mve-vpt" |
| 31 | |
| 32 | namespace { |
| 33 | class MVEVPTBlock : public MachineFunctionPass { |
| 34 | public: |
| 35 | static char ID; |
| 36 | const Thumb2InstrInfo *TII; |
| 37 | const TargetRegisterInfo *TRI; |
| 38 | |
| 39 | MVEVPTBlock() : MachineFunctionPass(ID) {} |
| 40 | |
| 41 | bool runOnMachineFunction(MachineFunction &Fn) override; |
| 42 | |
| 43 | MachineFunctionProperties getRequiredProperties() const override { |
| 44 | return MachineFunctionProperties().setNoVRegs(); |
| 45 | } |
| 46 | |
| 47 | StringRef getPassName() const override { |
| 48 | return "MVE VPT block insertion pass" ; |
| 49 | } |
| 50 | |
| 51 | private: |
| 52 | bool InsertVPTBlocks(MachineBasicBlock &MBB); |
| 53 | }; |
| 54 | |
| 55 | char MVEVPTBlock::ID = 0; |
| 56 | |
| 57 | } // end anonymous namespace |
| 58 | |
| 59 | INITIALIZE_PASS(MVEVPTBlock, DEBUG_TYPE, "ARM MVE VPT block pass" , false, false) |
| 60 | |
| 61 | static MachineInstr *findVCMPToFoldIntoVPST(MachineBasicBlock::iterator MI, |
| 62 | const TargetRegisterInfo *TRI, |
| 63 | unsigned &NewOpcode) { |
| 64 | // Search backwards to the instruction that defines VPR. This may or not |
| 65 | // be a VCMP, we check that after this loop. If we find another instruction |
| 66 | // that reads cpsr, we return nullptr. |
| 67 | MachineBasicBlock::iterator CmpMI = MI; |
| 68 | while (CmpMI != MI->getParent()->begin()) { |
| 69 | --CmpMI; |
| 70 | if (CmpMI->modifiesRegister(Reg: ARM::VPR, TRI)) |
| 71 | break; |
| 72 | if (CmpMI->readsRegister(Reg: ARM::VPR, TRI)) |
| 73 | break; |
| 74 | } |
| 75 | |
| 76 | if (CmpMI == MI) |
| 77 | return nullptr; |
| 78 | NewOpcode = VCMPOpcodeToVPT(Opcode: CmpMI->getOpcode()); |
| 79 | if (NewOpcode == 0) |
| 80 | return nullptr; |
| 81 | |
| 82 | // Search forward from CmpMI to MI, checking if either register was def'd |
| 83 | if (registerDefinedBetween(Reg: CmpMI->getOperand(i: 1).getReg(), From: std::next(x: CmpMI), |
| 84 | To: MI, TRI)) |
| 85 | return nullptr; |
| 86 | if (registerDefinedBetween(Reg: CmpMI->getOperand(i: 2).getReg(), From: std::next(x: CmpMI), |
| 87 | To: MI, TRI)) |
| 88 | return nullptr; |
| 89 | return &*CmpMI; |
| 90 | } |
| 91 | |
| 92 | // Advances Iter past a block of predicated instructions. |
| 93 | // Returns true if it successfully skipped the whole block of predicated |
| 94 | // instructions. Returns false when it stopped early (due to MaxSteps), or if |
| 95 | // Iter didn't point to a predicated instruction. |
| 96 | static bool StepOverPredicatedInstrs(MachineBasicBlock::instr_iterator &Iter, |
| 97 | MachineBasicBlock::instr_iterator EndIter, |
| 98 | unsigned MaxSteps, |
| 99 | unsigned &) { |
| 100 | ARMVCC::VPTCodes NextPred = ARMVCC::None; |
| 101 | Register PredReg; |
| 102 | NumInstrsSteppedOver = 0; |
| 103 | |
| 104 | while (Iter != EndIter) { |
| 105 | if (Iter->isDebugInstr()) { |
| 106 | // Skip debug instructions |
| 107 | ++Iter; |
| 108 | continue; |
| 109 | } |
| 110 | |
| 111 | NextPred = getVPTInstrPredicate(MI: *Iter, PredReg); |
| 112 | assert(NextPred != ARMVCC::Else && |
| 113 | "VPT block pass does not expect Else preds" ); |
| 114 | if (NextPred == ARMVCC::None || MaxSteps == 0) |
| 115 | break; |
| 116 | --MaxSteps; |
| 117 | ++Iter; |
| 118 | ++NumInstrsSteppedOver; |
| 119 | }; |
| 120 | |
| 121 | return NumInstrsSteppedOver != 0 && |
| 122 | (NextPred == ARMVCC::None || Iter == EndIter); |
| 123 | } |
| 124 | |
| 125 | // Returns true if at least one instruction in the range [Iter, End) defines |
| 126 | // or kills VPR. |
| 127 | static bool IsVPRDefinedOrKilledByBlock(MachineBasicBlock::iterator Iter, |
| 128 | MachineBasicBlock::iterator End) { |
| 129 | for (; Iter != End; ++Iter) |
| 130 | if (Iter->definesRegister(Reg: ARM::VPR, /*TRI=*/nullptr) || |
| 131 | Iter->killsRegister(Reg: ARM::VPR, /*TRI=*/nullptr)) |
| 132 | return true; |
| 133 | return false; |
| 134 | } |
| 135 | |
| 136 | // Creates a T, TT, TTT or TTTT BlockMask depending on BlockSize. |
| 137 | static ARM::PredBlockMask GetInitialBlockMask(unsigned BlockSize) { |
| 138 | switch (BlockSize) { |
| 139 | case 1: |
| 140 | return ARM::PredBlockMask::T; |
| 141 | case 2: |
| 142 | return ARM::PredBlockMask::TT; |
| 143 | case 3: |
| 144 | return ARM::PredBlockMask::TTT; |
| 145 | case 4: |
| 146 | return ARM::PredBlockMask::TTTT; |
| 147 | default: |
| 148 | llvm_unreachable("Invalid BlockSize!" ); |
| 149 | } |
| 150 | } |
| 151 | |
| 152 | // Given an iterator (Iter) that points at an instruction with a "Then" |
| 153 | // predicate, tries to create the largest block of continuous predicated |
| 154 | // instructions possible, and returns the VPT Block Mask of that block. |
| 155 | // |
| 156 | // This will try to perform some minor optimization in order to maximize the |
| 157 | // size of the block. |
| 158 | static ARM::PredBlockMask |
| 159 | CreateVPTBlock(MachineBasicBlock::instr_iterator &Iter, |
| 160 | MachineBasicBlock::instr_iterator EndIter, |
| 161 | SmallVectorImpl<MachineInstr *> &DeadInstructions) { |
| 162 | MachineBasicBlock::instr_iterator BlockBeg = Iter; |
| 163 | (void)BlockBeg; |
| 164 | assert(getVPTInstrPredicate(*Iter) == ARMVCC::Then && |
| 165 | "Expected a Predicated Instruction" ); |
| 166 | |
| 167 | LLVM_DEBUG(dbgs() << "VPT block created for: " ; Iter->dump()); |
| 168 | |
| 169 | unsigned BlockSize; |
| 170 | StepOverPredicatedInstrs(Iter, EndIter, MaxSteps: 4, NumInstrsSteppedOver&: BlockSize); |
| 171 | |
| 172 | LLVM_DEBUG(for (MachineBasicBlock::instr_iterator AddedInstIter = |
| 173 | std::next(BlockBeg); |
| 174 | AddedInstIter != Iter; ++AddedInstIter) { |
| 175 | if (AddedInstIter->isDebugInstr()) |
| 176 | continue; |
| 177 | dbgs() << " adding: " ; |
| 178 | AddedInstIter->dump(); |
| 179 | }); |
| 180 | |
| 181 | // Generate the initial BlockMask |
| 182 | ARM::PredBlockMask BlockMask = GetInitialBlockMask(BlockSize); |
| 183 | |
| 184 | // Remove VPNOTs while there's still room in the block, so we can make the |
| 185 | // largest block possible. |
| 186 | ARMVCC::VPTCodes CurrentPredicate = ARMVCC::Else; |
| 187 | while (BlockSize < 4 && Iter != EndIter && |
| 188 | Iter->getOpcode() == ARM::MVE_VPNOT) { |
| 189 | |
| 190 | // Try to skip all of the predicated instructions after the VPNOT, stopping |
| 191 | // after (4 - BlockSize). If we can't skip them all, stop. |
| 192 | unsigned ElseInstCnt = 0; |
| 193 | MachineBasicBlock::instr_iterator VPNOTBlockEndIter = std::next(x: Iter); |
| 194 | if (!StepOverPredicatedInstrs(Iter&: VPNOTBlockEndIter, EndIter, MaxSteps: (4 - BlockSize), |
| 195 | NumInstrsSteppedOver&: ElseInstCnt)) |
| 196 | break; |
| 197 | |
| 198 | // Check if this VPNOT can be removed or not: It can only be removed if at |
| 199 | // least one of the predicated instruction that follows it kills or sets |
| 200 | // VPR. |
| 201 | if (!IsVPRDefinedOrKilledByBlock(Iter, End: VPNOTBlockEndIter)) |
| 202 | break; |
| 203 | |
| 204 | LLVM_DEBUG(dbgs() << " removing VPNOT: " ; Iter->dump()); |
| 205 | |
| 206 | // Record the new size of the block |
| 207 | BlockSize += ElseInstCnt; |
| 208 | assert(BlockSize <= 4 && "Block is too large!" ); |
| 209 | |
| 210 | // Record the VPNot to remove it later. |
| 211 | DeadInstructions.push_back(Elt: &*Iter); |
| 212 | ++Iter; |
| 213 | |
| 214 | // Replace the predicates of the instructions we're adding. |
| 215 | // Note that we are using "Iter" to iterate over the block so we can update |
| 216 | // it at the same time. |
| 217 | for (; Iter != VPNOTBlockEndIter; ++Iter) { |
| 218 | if (Iter->isDebugInstr()) |
| 219 | continue; |
| 220 | |
| 221 | // Find the register in which the predicate is |
| 222 | int OpIdx = findFirstVPTPredOperandIdx(MI: *Iter); |
| 223 | assert(OpIdx != -1); |
| 224 | |
| 225 | // Change the predicate and update the mask |
| 226 | Iter->getOperand(i: OpIdx).setImm(CurrentPredicate); |
| 227 | BlockMask = expandPredBlockMask(BlockMask, Kind: CurrentPredicate); |
| 228 | |
| 229 | LLVM_DEBUG(dbgs() << " adding : " ; Iter->dump()); |
| 230 | } |
| 231 | |
| 232 | CurrentPredicate = |
| 233 | (CurrentPredicate == ARMVCC::Then ? ARMVCC::Else : ARMVCC::Then); |
| 234 | } |
| 235 | return BlockMask; |
| 236 | } |
| 237 | |
| 238 | bool MVEVPTBlock::InsertVPTBlocks(MachineBasicBlock &Block) { |
| 239 | bool Modified = false; |
| 240 | MachineBasicBlock::instr_iterator MBIter = Block.instr_begin(); |
| 241 | MachineBasicBlock::instr_iterator EndIter = Block.instr_end(); |
| 242 | |
| 243 | SmallVector<MachineInstr *, 4> DeadInstructions; |
| 244 | |
| 245 | while (MBIter != EndIter) { |
| 246 | MachineInstr *MI = &*MBIter; |
| 247 | Register PredReg; |
| 248 | DebugLoc DL = MI->getDebugLoc(); |
| 249 | |
| 250 | ARMVCC::VPTCodes Pred = getVPTInstrPredicate(MI: *MI, PredReg); |
| 251 | |
| 252 | // The idea of the predicate is that None, Then and Else are for use when |
| 253 | // handling assembly language: they correspond to the three possible |
| 254 | // suffixes "", "t" and "e" on the mnemonic. So when instructions are read |
| 255 | // from assembly source or disassembled from object code, you expect to |
| 256 | // see a mixture whenever there's a long VPT block. But in code |
| 257 | // generation, we hope we'll never generate an Else as input to this pass. |
| 258 | assert(Pred != ARMVCC::Else && "VPT block pass does not expect Else preds" ); |
| 259 | |
| 260 | if (Pred == ARMVCC::None) { |
| 261 | ++MBIter; |
| 262 | continue; |
| 263 | } |
| 264 | |
| 265 | ARM::PredBlockMask BlockMask = |
| 266 | CreateVPTBlock(Iter&: MBIter, EndIter, DeadInstructions); |
| 267 | |
| 268 | // Search back for a VCMP that can be folded to create a VPT, or else |
| 269 | // create a VPST directly |
| 270 | MachineInstrBuilder MIBuilder; |
| 271 | unsigned NewOpcode; |
| 272 | LLVM_DEBUG(dbgs() << " final block mask: " << (unsigned)BlockMask << "\n" ); |
| 273 | if (MachineInstr *VCMP = findVCMPToFoldIntoVPST(MI, TRI, NewOpcode)) { |
| 274 | LLVM_DEBUG(dbgs() << " folding VCMP into VPST: " ; VCMP->dump()); |
| 275 | MIBuilder = BuildMI(BB&: Block, I: MI, MIMD: DL, MCID: TII->get(Opcode: NewOpcode)); |
| 276 | MIBuilder.addImm(Val: (uint64_t)BlockMask); |
| 277 | MIBuilder.add(MO: VCMP->getOperand(i: 1)); |
| 278 | MIBuilder.add(MO: VCMP->getOperand(i: 2)); |
| 279 | MIBuilder.add(MO: VCMP->getOperand(i: 3)); |
| 280 | |
| 281 | // We need to remove any kill flags between the original VCMP and the new |
| 282 | // insertion point. |
| 283 | for (MachineInstr &MII : |
| 284 | make_range(x: VCMP->getIterator(), y: MI->getIterator())) { |
| 285 | MII.clearRegisterKills(Reg: VCMP->getOperand(i: 1).getReg(), RegInfo: TRI); |
| 286 | MII.clearRegisterKills(Reg: VCMP->getOperand(i: 2).getReg(), RegInfo: TRI); |
| 287 | } |
| 288 | |
| 289 | VCMP->eraseFromParent(); |
| 290 | } else { |
| 291 | MIBuilder = BuildMI(BB&: Block, I: MI, MIMD: DL, MCID: TII->get(Opcode: ARM::MVE_VPST)); |
| 292 | MIBuilder.addImm(Val: (uint64_t)BlockMask); |
| 293 | } |
| 294 | |
| 295 | // Erase all dead instructions (VPNOT's). Do that now so that they do not |
| 296 | // mess with the bundle creation. |
| 297 | for (MachineInstr *DeadMI : DeadInstructions) |
| 298 | DeadMI->eraseFromParent(); |
| 299 | DeadInstructions.clear(); |
| 300 | |
| 301 | finalizeBundle( |
| 302 | MBB&: Block, FirstMI: MachineBasicBlock::instr_iterator(MIBuilder.getInstr()), LastMI: MBIter); |
| 303 | |
| 304 | Modified = true; |
| 305 | } |
| 306 | |
| 307 | return Modified; |
| 308 | } |
| 309 | |
| 310 | bool MVEVPTBlock::runOnMachineFunction(MachineFunction &Fn) { |
| 311 | const ARMSubtarget &STI = Fn.getSubtarget<ARMSubtarget>(); |
| 312 | |
| 313 | if (!STI.isThumb2() || !STI.hasMVEIntegerOps()) |
| 314 | return false; |
| 315 | |
| 316 | TII = static_cast<const Thumb2InstrInfo *>(STI.getInstrInfo()); |
| 317 | TRI = STI.getRegisterInfo(); |
| 318 | |
| 319 | LLVM_DEBUG(dbgs() << "********** ARM MVE VPT BLOCKS **********\n" |
| 320 | << "********** Function: " << Fn.getName() << '\n'); |
| 321 | |
| 322 | bool Modified = false; |
| 323 | for (MachineBasicBlock &MBB : Fn) |
| 324 | Modified |= InsertVPTBlocks(Block&: MBB); |
| 325 | |
| 326 | LLVM_DEBUG(dbgs() << "**************************************\n" ); |
| 327 | return Modified; |
| 328 | } |
| 329 | |
| 330 | /// createMVEVPTBlock - Returns an instance of the MVE VPT block |
| 331 | /// insertion pass. |
| 332 | FunctionPass *llvm::createMVEVPTBlockPass() { return new MVEVPTBlock(); } |
| 333 | |