| 1 | //===-- MVETPAndVPTOptimisationsPass.cpp ----------------------------------===// | 
|---|
| 2 | // | 
|---|
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | 
|---|
| 4 | // See https://llvm.org/LICENSE.txt for license information. | 
|---|
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | 
|---|
| 6 | // | 
|---|
| 7 | //===----------------------------------------------------------------------===// | 
|---|
| 8 | // | 
|---|
| 9 | /// \file This pass does a few optimisations related to Tail predicated loops | 
|---|
| 10 | /// and MVE VPT blocks before register allocation is performed. For VPT blocks | 
|---|
| 11 | /// the goal is to maximize the sizes of the blocks that will be created by the | 
|---|
| 12 | /// MVE VPT Block Insertion pass (which runs after register allocation). For | 
|---|
| 13 | /// tail predicated loops we transform the loop into something that will | 
|---|
| 14 | /// hopefully make the backend ARMLowOverheadLoops pass's job easier. | 
|---|
| 15 | /// | 
|---|
| 16 | //===----------------------------------------------------------------------===// | 
|---|
| 17 |  | 
|---|
| 18 | #include "ARM.h" | 
|---|
| 19 | #include "ARMSubtarget.h" | 
|---|
| 20 | #include "MVETailPredUtils.h" | 
|---|
| 21 | #include "Thumb2InstrInfo.h" | 
|---|
| 22 | #include "llvm/ADT/SmallVector.h" | 
|---|
| 23 | #include "llvm/CodeGen/MachineBasicBlock.h" | 
|---|
| 24 | #include "llvm/CodeGen/MachineDominators.h" | 
|---|
| 25 | #include "llvm/CodeGen/MachineFunction.h" | 
|---|
| 26 | #include "llvm/CodeGen/MachineFunctionPass.h" | 
|---|
| 27 | #include "llvm/CodeGen/MachineInstr.h" | 
|---|
| 28 | #include "llvm/CodeGen/MachineLoopInfo.h" | 
|---|
| 29 | #include "llvm/InitializePasses.h" | 
|---|
| 30 | #include "llvm/Support/Debug.h" | 
|---|
| 31 | #include <cassert> | 
|---|
| 32 |  | 
|---|
| 33 | using namespace llvm; | 
|---|
| 34 |  | 
|---|
| 35 | #define DEBUG_TYPE "arm-mve-vpt-opts" | 
|---|
| 36 |  | 
|---|
| 37 | static cl::opt<bool> | 
|---|
| 38 | MergeEndDec( "arm-enable-merge-loopenddec", cl::Hidden, | 
|---|
| 39 | cl::desc( "Enable merging Loop End and Dec instructions."), | 
|---|
| 40 | cl::init(Val: true)); | 
|---|
| 41 |  | 
|---|
| 42 | static cl::opt<bool> | 
|---|
| 43 | SetLRPredicate( "arm-set-lr-predicate", cl::Hidden, | 
|---|
| 44 | cl::desc( "Enable setting lr as a predicate in tail predication regions."), | 
|---|
| 45 | cl::init(Val: true)); | 
|---|
| 46 |  | 
|---|
| 47 | namespace { | 
|---|
| 48 | class MVETPAndVPTOptimisations : public MachineFunctionPass { | 
|---|
| 49 | public: | 
|---|
| 50 | static char ID; | 
|---|
| 51 | const Thumb2InstrInfo *TII; | 
|---|
| 52 | MachineRegisterInfo *MRI; | 
|---|
| 53 |  | 
|---|
| 54 | MVETPAndVPTOptimisations() : MachineFunctionPass(ID) {} | 
|---|
| 55 |  | 
|---|
| 56 | bool runOnMachineFunction(MachineFunction &Fn) override; | 
|---|
| 57 |  | 
|---|
| 58 | void getAnalysisUsage(AnalysisUsage &AU) const override { | 
|---|
| 59 | AU.addRequired<MachineLoopInfoWrapperPass>(); | 
|---|
| 60 | AU.addPreserved<MachineLoopInfoWrapperPass>(); | 
|---|
| 61 | AU.addRequired<MachineDominatorTreeWrapperPass>(); | 
|---|
| 62 | AU.addPreserved<MachineDominatorTreeWrapperPass>(); | 
|---|
| 63 | MachineFunctionPass::getAnalysisUsage(AU); | 
|---|
| 64 | } | 
|---|
| 65 |  | 
|---|
| 66 | StringRef getPassName() const override { | 
|---|
| 67 | return "ARM MVE TailPred and VPT Optimisation Pass"; | 
|---|
| 68 | } | 
|---|
| 69 |  | 
|---|
| 70 | private: | 
|---|
| 71 | bool LowerWhileLoopStart(MachineLoop *ML); | 
|---|
| 72 | bool MergeLoopEnd(MachineLoop *ML); | 
|---|
| 73 | bool ConvertTailPredLoop(MachineLoop *ML, MachineDominatorTree *DT); | 
|---|
| 74 | MachineInstr &ReplaceRegisterUseWithVPNOT(MachineBasicBlock &MBB, | 
|---|
| 75 | MachineInstr &Instr, | 
|---|
| 76 | MachineOperand &User, | 
|---|
| 77 | Register Target); | 
|---|
| 78 | bool ReduceOldVCCRValueUses(MachineBasicBlock &MBB); | 
|---|
| 79 | bool ReplaceVCMPsByVPNOTs(MachineBasicBlock &MBB); | 
|---|
| 80 | bool ReplaceConstByVPNOTs(MachineBasicBlock &MBB, MachineDominatorTree *DT); | 
|---|
| 81 | bool ConvertVPSEL(MachineBasicBlock &MBB); | 
|---|
| 82 | bool HintDoLoopStartReg(MachineBasicBlock &MBB); | 
|---|
| 83 | MachineInstr *CheckForLRUseInPredecessors(MachineBasicBlock *, | 
|---|
| 84 | MachineInstr *LoopStart); | 
|---|
| 85 | }; | 
|---|
| 86 |  | 
|---|
| 87 | char MVETPAndVPTOptimisations::ID = 0; | 
|---|
| 88 |  | 
|---|
| 89 | } // end anonymous namespace | 
|---|
| 90 |  | 
|---|
| 91 | INITIALIZE_PASS_BEGIN(MVETPAndVPTOptimisations, DEBUG_TYPE, | 
|---|
| 92 | "ARM MVE TailPred and VPT Optimisations pass", false, | 
|---|
| 93 | false) | 
|---|
| 94 | INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass) | 
|---|
| 95 | INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) | 
|---|
| 96 | INITIALIZE_PASS_END(MVETPAndVPTOptimisations, DEBUG_TYPE, | 
|---|
| 97 | "ARM MVE TailPred and VPT Optimisations pass", false, false) | 
|---|
| 98 |  | 
|---|
| 99 | static MachineInstr *LookThroughCOPY(MachineInstr *MI, | 
|---|
| 100 | MachineRegisterInfo *MRI) { | 
|---|
| 101 | while (MI && MI->getOpcode() == TargetOpcode::COPY && | 
|---|
| 102 | MI->getOperand(i: 1).getReg().isVirtual()) | 
|---|
| 103 | MI = MRI->getVRegDef(Reg: MI->getOperand(i: 1).getReg()); | 
|---|
| 104 | return MI; | 
|---|
| 105 | } | 
|---|
| 106 |  | 
|---|
| 107 | // Given a loop ML, this attempts to find the t2LoopEnd, t2LoopDec and | 
|---|
| 108 | // corresponding PHI that make up a low overhead loop. Only handles 'do' loops | 
|---|
| 109 | // at the moment, returning a t2DoLoopStart in LoopStart. | 
|---|
| 110 | static bool findLoopComponents(MachineLoop *ML, MachineRegisterInfo *MRI, | 
|---|
| 111 | MachineInstr *&LoopStart, MachineInstr *&LoopPhi, | 
|---|
| 112 | MachineInstr *&LoopDec, MachineInstr *&LoopEnd) { | 
|---|
| 113 | MachineBasicBlock * = ML->getHeader(); | 
|---|
| 114 | MachineBasicBlock *Latch = ML->getLoopLatch(); | 
|---|
| 115 | if (!Header || !Latch) { | 
|---|
| 116 | LLVM_DEBUG(dbgs() << "  no Loop Latch or Header\n"); | 
|---|
| 117 | return false; | 
|---|
| 118 | } | 
|---|
| 119 |  | 
|---|
| 120 | // Find the loop end from the terminators. | 
|---|
| 121 | LoopEnd = nullptr; | 
|---|
| 122 | for (auto &T : Latch->terminators()) { | 
|---|
| 123 | if (T.getOpcode() == ARM::t2LoopEnd && T.getOperand(i: 1).getMBB() == Header) { | 
|---|
| 124 | LoopEnd = &T; | 
|---|
| 125 | break; | 
|---|
| 126 | } | 
|---|
| 127 | if (T.getOpcode() == ARM::t2LoopEndDec && | 
|---|
| 128 | T.getOperand(i: 2).getMBB() == Header) { | 
|---|
| 129 | LoopEnd = &T; | 
|---|
| 130 | break; | 
|---|
| 131 | } | 
|---|
| 132 | } | 
|---|
| 133 | if (!LoopEnd) { | 
|---|
| 134 | LLVM_DEBUG(dbgs() << "  no LoopEnd\n"); | 
|---|
| 135 | return false; | 
|---|
| 136 | } | 
|---|
| 137 | LLVM_DEBUG(dbgs() << "  found loop end: "<< *LoopEnd); | 
|---|
| 138 |  | 
|---|
| 139 | // Find the dec from the use of the end. There may be copies between | 
|---|
| 140 | // instructions. We expect the loop to loop like: | 
|---|
| 141 | //   $vs = t2DoLoopStart ... | 
|---|
| 142 | // loop: | 
|---|
| 143 | //   $vp = phi [ $vs ], [ $vd ] | 
|---|
| 144 | //   ... | 
|---|
| 145 | //   $vd = t2LoopDec $vp | 
|---|
| 146 | //   ... | 
|---|
| 147 | //   t2LoopEnd $vd, loop | 
|---|
| 148 | if (LoopEnd->getOpcode() == ARM::t2LoopEndDec) | 
|---|
| 149 | LoopDec = LoopEnd; | 
|---|
| 150 | else { | 
|---|
| 151 | LoopDec = | 
|---|
| 152 | LookThroughCOPY(MI: MRI->getVRegDef(Reg: LoopEnd->getOperand(i: 0).getReg()), MRI); | 
|---|
| 153 | if (!LoopDec || LoopDec->getOpcode() != ARM::t2LoopDec) { | 
|---|
| 154 | LLVM_DEBUG(dbgs() << "  didn't find LoopDec where we expected!\n"); | 
|---|
| 155 | return false; | 
|---|
| 156 | } | 
|---|
| 157 | } | 
|---|
| 158 | LLVM_DEBUG(dbgs() << "  found loop dec: "<< *LoopDec); | 
|---|
| 159 |  | 
|---|
| 160 | LoopPhi = | 
|---|
| 161 | LookThroughCOPY(MI: MRI->getVRegDef(Reg: LoopDec->getOperand(i: 1).getReg()), MRI); | 
|---|
| 162 | if (!LoopPhi || LoopPhi->getOpcode() != TargetOpcode::PHI || | 
|---|
| 163 | LoopPhi->getNumOperands() != 5 || | 
|---|
| 164 | (LoopPhi->getOperand(i: 2).getMBB() != Latch && | 
|---|
| 165 | LoopPhi->getOperand(i: 4).getMBB() != Latch)) { | 
|---|
| 166 | LLVM_DEBUG(dbgs() << "  didn't find PHI where we expected!\n"); | 
|---|
| 167 | return false; | 
|---|
| 168 | } | 
|---|
| 169 | LLVM_DEBUG(dbgs() << "  found loop phi: "<< *LoopPhi); | 
|---|
| 170 |  | 
|---|
| 171 | Register StartReg = LoopPhi->getOperand(i: 2).getMBB() == Latch | 
|---|
| 172 | ? LoopPhi->getOperand(i: 3).getReg() | 
|---|
| 173 | : LoopPhi->getOperand(i: 1).getReg(); | 
|---|
| 174 | LoopStart = LookThroughCOPY(MI: MRI->getVRegDef(Reg: StartReg), MRI); | 
|---|
| 175 | if (!LoopStart || (LoopStart->getOpcode() != ARM::t2DoLoopStart && | 
|---|
| 176 | LoopStart->getOpcode() != ARM::t2WhileLoopSetup && | 
|---|
| 177 | LoopStart->getOpcode() != ARM::t2WhileLoopStartLR)) { | 
|---|
| 178 | LLVM_DEBUG(dbgs() << "  didn't find Start where we expected!\n"); | 
|---|
| 179 | return false; | 
|---|
| 180 | } | 
|---|
| 181 | LLVM_DEBUG(dbgs() << "  found loop start: "<< *LoopStart); | 
|---|
| 182 |  | 
|---|
| 183 | return true; | 
|---|
| 184 | } | 
|---|
| 185 |  | 
|---|
| 186 | static void RevertWhileLoopSetup(MachineInstr *MI, const TargetInstrInfo *TII) { | 
|---|
| 187 | MachineBasicBlock *MBB = MI->getParent(); | 
|---|
| 188 | assert(MI->getOpcode() == ARM::t2WhileLoopSetup && | 
|---|
| 189 | "Only expected a t2WhileLoopSetup in RevertWhileLoopStart!"); | 
|---|
| 190 |  | 
|---|
| 191 | // Subs | 
|---|
| 192 | MachineInstrBuilder MIB = | 
|---|
| 193 | BuildMI(BB&: *MBB, I: MI, MIMD: MI->getDebugLoc(), MCID: TII->get(Opcode: ARM::t2SUBri)); | 
|---|
| 194 | MIB.add(MO: MI->getOperand(i: 0)); | 
|---|
| 195 | MIB.add(MO: MI->getOperand(i: 1)); | 
|---|
| 196 | MIB.addImm(Val: 0); | 
|---|
| 197 | MIB.addImm(Val: ARMCC::AL); | 
|---|
| 198 | MIB.addReg(RegNo: ARM::NoRegister); | 
|---|
| 199 | MIB.addReg(RegNo: ARM::CPSR, flags: RegState::Define); | 
|---|
| 200 |  | 
|---|
| 201 | // Attempt to find a t2WhileLoopStart and revert to a t2Bcc. | 
|---|
| 202 | for (MachineInstr &I : MBB->terminators()) { | 
|---|
| 203 | if (I.getOpcode() == ARM::t2WhileLoopStart) { | 
|---|
| 204 | MachineInstrBuilder MIB = | 
|---|
| 205 | BuildMI(BB&: *MBB, I: &I, MIMD: I.getDebugLoc(), MCID: TII->get(Opcode: ARM::t2Bcc)); | 
|---|
| 206 | MIB.add(MO: MI->getOperand(i: 1)); // branch target | 
|---|
| 207 | MIB.addImm(Val: ARMCC::EQ); | 
|---|
| 208 | MIB.addReg(RegNo: ARM::CPSR); | 
|---|
| 209 | I.eraseFromParent(); | 
|---|
| 210 | break; | 
|---|
| 211 | } | 
|---|
| 212 | } | 
|---|
| 213 |  | 
|---|
| 214 | MI->eraseFromParent(); | 
|---|
| 215 | } | 
|---|
| 216 |  | 
|---|
| 217 | // The Hardware Loop insertion and ISel Lowering produce the pseudos for the | 
|---|
| 218 | // start of a while loop: | 
|---|
| 219 | //   %a:gprlr = t2WhileLoopSetup %Cnt | 
|---|
| 220 | //   t2WhileLoopStart %a, %BB | 
|---|
| 221 | // We want to convert those to a single instruction which, like t2LoopEndDec and | 
|---|
| 222 | // t2DoLoopStartTP is both a terminator and produces a value: | 
|---|
| 223 | //   %a:grplr: t2WhileLoopStartLR %Cnt, %BB | 
|---|
| 224 | // | 
|---|
| 225 | // Otherwise if we can't, we revert the loop. t2WhileLoopSetup and | 
|---|
| 226 | // t2WhileLoopStart are not valid past regalloc. | 
|---|
| 227 | bool MVETPAndVPTOptimisations::LowerWhileLoopStart(MachineLoop *ML) { | 
|---|
| 228 | LLVM_DEBUG(dbgs() << "LowerWhileLoopStart on loop " | 
|---|
| 229 | << ML->getHeader()->getName() << "\n"); | 
|---|
| 230 |  | 
|---|
| 231 | MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec; | 
|---|
| 232 | if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd)) | 
|---|
| 233 | return false; | 
|---|
| 234 |  | 
|---|
| 235 | if (LoopStart->getOpcode() != ARM::t2WhileLoopSetup) | 
|---|
| 236 | return false; | 
|---|
| 237 |  | 
|---|
| 238 | Register LR = LoopStart->getOperand(i: 0).getReg(); | 
|---|
| 239 | auto WLSIt = find_if(Range: MRI->use_nodbg_instructions(Reg: LR), P: [](auto &MI) { | 
|---|
| 240 | return MI.getOpcode() == ARM::t2WhileLoopStart; | 
|---|
| 241 | }); | 
|---|
| 242 | if (!MergeEndDec || WLSIt == MRI->use_instr_nodbg_end()) { | 
|---|
| 243 | RevertWhileLoopSetup(MI: LoopStart, TII); | 
|---|
| 244 | RevertLoopDec(MI: LoopStart, TII); | 
|---|
| 245 | RevertLoopEnd(MI: LoopStart, TII); | 
|---|
| 246 | return true; | 
|---|
| 247 | } | 
|---|
| 248 |  | 
|---|
| 249 | MachineInstrBuilder MI = | 
|---|
| 250 | BuildMI(BB&: *WLSIt->getParent(), I&: *WLSIt, MIMD: WLSIt->getDebugLoc(), | 
|---|
| 251 | MCID: TII->get(Opcode: ARM::t2WhileLoopStartLR), DestReg: LR) | 
|---|
| 252 | .add(MO: LoopStart->getOperand(i: 1)) | 
|---|
| 253 | .add(MO: WLSIt->getOperand(i: 1)); | 
|---|
| 254 | (void)MI; | 
|---|
| 255 | LLVM_DEBUG(dbgs() << "Lowered WhileLoopStart into: "<< *MI.getInstr()); | 
|---|
| 256 |  | 
|---|
| 257 | WLSIt->eraseFromParent(); | 
|---|
| 258 | LoopStart->eraseFromParent(); | 
|---|
| 259 | return true; | 
|---|
| 260 | } | 
|---|
| 261 |  | 
|---|
| 262 | // Return true if this instruction is invalid in a low overhead loop, usually | 
|---|
| 263 | // because it clobbers LR. | 
|---|
| 264 | static bool IsInvalidTPInstruction(MachineInstr &MI) { | 
|---|
| 265 | return MI.isCall() || isLoopStart(MI); | 
|---|
| 266 | } | 
|---|
| 267 |  | 
|---|
| 268 | // Starting from PreHeader, search for invalid instructions back until the | 
|---|
| 269 | // LoopStart block is reached. If invalid instructions are found, the loop start | 
|---|
| 270 | // is reverted from a WhileLoopStart to a DoLoopStart on the same loop. Will | 
|---|
| 271 | // return the new DLS LoopStart if updated. | 
|---|
| 272 | MachineInstr *MVETPAndVPTOptimisations::CheckForLRUseInPredecessors( | 
|---|
| 273 | MachineBasicBlock *, MachineInstr *LoopStart) { | 
|---|
| 274 | SmallVector<MachineBasicBlock *> Worklist; | 
|---|
| 275 | SmallPtrSet<MachineBasicBlock *, 4> Visited; | 
|---|
| 276 | Worklist.push_back(Elt: PreHeader); | 
|---|
| 277 | Visited.insert(Ptr: LoopStart->getParent()); | 
|---|
| 278 |  | 
|---|
| 279 | while (!Worklist.empty()) { | 
|---|
| 280 | MachineBasicBlock *MBB = Worklist.pop_back_val(); | 
|---|
| 281 | if (Visited.count(Ptr: MBB)) | 
|---|
| 282 | continue; | 
|---|
| 283 |  | 
|---|
| 284 | for (MachineInstr &MI : *MBB) { | 
|---|
| 285 | if (!IsInvalidTPInstruction(MI)) | 
|---|
| 286 | continue; | 
|---|
| 287 |  | 
|---|
| 288 | LLVM_DEBUG(dbgs() << "Found LR use in predecessors, reverting: "<< MI); | 
|---|
| 289 |  | 
|---|
| 290 | // Create a t2DoLoopStart at the end of the preheader. | 
|---|
| 291 | MachineInstrBuilder MIB = | 
|---|
| 292 | BuildMI(BB&: *PreHeader, I: PreHeader->getFirstTerminator(), | 
|---|
| 293 | MIMD: LoopStart->getDebugLoc(), MCID: TII->get(Opcode: ARM::t2DoLoopStart)); | 
|---|
| 294 | MIB.add(MO: LoopStart->getOperand(i: 0)); | 
|---|
| 295 | MIB.add(MO: LoopStart->getOperand(i: 1)); | 
|---|
| 296 |  | 
|---|
| 297 | // Make sure to remove the kill flags, to prevent them from being invalid. | 
|---|
| 298 | LoopStart->getOperand(i: 1).setIsKill(false); | 
|---|
| 299 |  | 
|---|
| 300 | // Revert the t2WhileLoopStartLR to a CMP and Br. | 
|---|
| 301 | RevertWhileLoopStartLR(MI: LoopStart, TII, BrOpc: ARM::t2Bcc, UseCmp: true); | 
|---|
| 302 | return MIB; | 
|---|
| 303 | } | 
|---|
| 304 |  | 
|---|
| 305 | Visited.insert(Ptr: MBB); | 
|---|
| 306 | llvm::append_range(C&: Worklist, R: MBB->predecessors()); | 
|---|
| 307 | } | 
|---|
| 308 | return LoopStart; | 
|---|
| 309 | } | 
|---|
| 310 |  | 
|---|
| 311 | // This function converts loops with t2LoopEnd and t2LoopEnd instructions into | 
|---|
| 312 | // a single t2LoopEndDec instruction. To do that it needs to make sure that LR | 
|---|
| 313 | // will be valid to be used for the low overhead loop, which means nothing else | 
|---|
| 314 | // is using LR (especially calls) and there are no superfluous copies in the | 
|---|
| 315 | // loop. The t2LoopEndDec is a branching terminator that produces a value (the | 
|---|
| 316 | // decrement) around the loop edge, which means we need to be careful that they | 
|---|
| 317 | // will be valid to allocate without any spilling. | 
|---|
| 318 | bool MVETPAndVPTOptimisations::MergeLoopEnd(MachineLoop *ML) { | 
|---|
| 319 | if (!MergeEndDec) | 
|---|
| 320 | return false; | 
|---|
| 321 |  | 
|---|
| 322 | LLVM_DEBUG(dbgs() << "MergeLoopEnd on loop "<< ML->getHeader()->getName() | 
|---|
| 323 | << "\n"); | 
|---|
| 324 |  | 
|---|
| 325 | MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec; | 
|---|
| 326 | if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd)) | 
|---|
| 327 | return false; | 
|---|
| 328 |  | 
|---|
| 329 | // Check if there is an illegal instruction (a call) in the low overhead loop | 
|---|
| 330 | // and if so revert it now before we get any further. While loops also need to | 
|---|
| 331 | // check the preheaders, but can be reverted to a DLS loop if needed. | 
|---|
| 332 | auto * = ML->getLoopPreheader(); | 
|---|
| 333 | if (LoopStart->getOpcode() == ARM::t2WhileLoopStartLR && PreHeader) | 
|---|
| 334 | LoopStart = CheckForLRUseInPredecessors(PreHeader, LoopStart); | 
|---|
| 335 |  | 
|---|
| 336 | for (MachineBasicBlock *MBB : ML->blocks()) { | 
|---|
| 337 | for (MachineInstr &MI : *MBB) { | 
|---|
| 338 | if (IsInvalidTPInstruction(MI)) { | 
|---|
| 339 | LLVM_DEBUG(dbgs() << "Found LR use in loop, reverting: "<< MI); | 
|---|
| 340 | if (LoopStart->getOpcode() == ARM::t2DoLoopStart) | 
|---|
| 341 | RevertDoLoopStart(MI: LoopStart, TII); | 
|---|
| 342 | else | 
|---|
| 343 | RevertWhileLoopStartLR(MI: LoopStart, TII); | 
|---|
| 344 | RevertLoopDec(MI: LoopDec, TII); | 
|---|
| 345 | RevertLoopEnd(MI: LoopEnd, TII); | 
|---|
| 346 | return true; | 
|---|
| 347 | } | 
|---|
| 348 | } | 
|---|
| 349 | } | 
|---|
| 350 |  | 
|---|
| 351 | // Remove any copies from the loop, to ensure the phi that remains is both | 
|---|
| 352 | // simpler and contains no extra uses. Because t2LoopEndDec is a terminator | 
|---|
| 353 | // that cannot spill, we need to be careful what remains in the loop. | 
|---|
| 354 | Register PhiReg = LoopPhi->getOperand(i: 0).getReg(); | 
|---|
| 355 | Register DecReg = LoopDec->getOperand(i: 0).getReg(); | 
|---|
| 356 | Register StartReg = LoopStart->getOperand(i: 0).getReg(); | 
|---|
| 357 | // Ensure the uses are expected, and collect any copies we want to remove. | 
|---|
| 358 | SmallVector<MachineInstr *, 4> Copies; | 
|---|
| 359 | auto CheckUsers = [&Copies](Register BaseReg, | 
|---|
| 360 | ArrayRef<MachineInstr *> ExpectedUsers, | 
|---|
| 361 | MachineRegisterInfo *MRI) { | 
|---|
| 362 | SmallVector<Register, 4> Worklist; | 
|---|
| 363 | Worklist.push_back(Elt: BaseReg); | 
|---|
| 364 | while (!Worklist.empty()) { | 
|---|
| 365 | Register Reg = Worklist.pop_back_val(); | 
|---|
| 366 | for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) { | 
|---|
| 367 | if (llvm::is_contained(Range&: ExpectedUsers, Element: &MI)) | 
|---|
| 368 | continue; | 
|---|
| 369 | if (MI.getOpcode() != TargetOpcode::COPY || | 
|---|
| 370 | !MI.getOperand(i: 0).getReg().isVirtual()) { | 
|---|
| 371 | LLVM_DEBUG(dbgs() << "Extra users of register found: "<< MI); | 
|---|
| 372 | return false; | 
|---|
| 373 | } | 
|---|
| 374 | Worklist.push_back(Elt: MI.getOperand(i: 0).getReg()); | 
|---|
| 375 | Copies.push_back(Elt: &MI); | 
|---|
| 376 | } | 
|---|
| 377 | } | 
|---|
| 378 | return true; | 
|---|
| 379 | }; | 
|---|
| 380 | if (!CheckUsers(PhiReg, {LoopDec}, MRI) || | 
|---|
| 381 | !CheckUsers(DecReg, {LoopPhi, LoopEnd}, MRI) || | 
|---|
| 382 | !CheckUsers(StartReg, {LoopPhi}, MRI)) { | 
|---|
| 383 | // Don't leave a t2WhileLoopStartLR without the LoopDecEnd. | 
|---|
| 384 | if (LoopStart->getOpcode() == ARM::t2WhileLoopStartLR) { | 
|---|
| 385 | RevertWhileLoopStartLR(MI: LoopStart, TII); | 
|---|
| 386 | RevertLoopDec(MI: LoopDec, TII); | 
|---|
| 387 | RevertLoopEnd(MI: LoopEnd, TII); | 
|---|
| 388 | return true; | 
|---|
| 389 | } | 
|---|
| 390 | return false; | 
|---|
| 391 | } | 
|---|
| 392 |  | 
|---|
| 393 | MRI->constrainRegClass(Reg: StartReg, RC: &ARM::GPRlrRegClass); | 
|---|
| 394 | MRI->constrainRegClass(Reg: PhiReg, RC: &ARM::GPRlrRegClass); | 
|---|
| 395 | MRI->constrainRegClass(Reg: DecReg, RC: &ARM::GPRlrRegClass); | 
|---|
| 396 |  | 
|---|
| 397 | if (LoopPhi->getOperand(i: 2).getMBB() == ML->getLoopLatch()) { | 
|---|
| 398 | LoopPhi->getOperand(i: 3).setReg(StartReg); | 
|---|
| 399 | LoopPhi->getOperand(i: 1).setReg(DecReg); | 
|---|
| 400 | } else { | 
|---|
| 401 | LoopPhi->getOperand(i: 1).setReg(StartReg); | 
|---|
| 402 | LoopPhi->getOperand(i: 3).setReg(DecReg); | 
|---|
| 403 | } | 
|---|
| 404 |  | 
|---|
| 405 | SmallVector<MachineOperand, 4> Cond;              // For analyzeBranch. | 
|---|
| 406 | MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For analyzeBranch. | 
|---|
| 407 | if (!TII->analyzeBranch(MBB&: *LoopEnd->getParent(), TBB, FBB, Cond) && !FBB) { | 
|---|
| 408 | // If the LoopEnd falls through, need to insert a t2B to the fall-through | 
|---|
| 409 | // block so that the non-analyzable t2LoopEndDec doesn't fall through. | 
|---|
| 410 | MachineFunction::iterator MBBI = ++LoopEnd->getParent()->getIterator(); | 
|---|
| 411 | BuildMI(BB: LoopEnd->getParent(), MIMD: DebugLoc(), MCID: TII->get(Opcode: ARM::t2B)) | 
|---|
| 412 | .addMBB(MBB: &*MBBI) | 
|---|
| 413 | .add(MOs: predOps(Pred: ARMCC::AL)); | 
|---|
| 414 | } | 
|---|
| 415 |  | 
|---|
| 416 | // Replace the loop dec and loop end as a single instruction. | 
|---|
| 417 | MachineInstrBuilder MI = | 
|---|
| 418 | BuildMI(BB&: *LoopEnd->getParent(), I&: *LoopEnd, MIMD: LoopEnd->getDebugLoc(), | 
|---|
| 419 | MCID: TII->get(Opcode: ARM::t2LoopEndDec), DestReg: DecReg) | 
|---|
| 420 | .addReg(RegNo: PhiReg) | 
|---|
| 421 | .add(MO: LoopEnd->getOperand(i: 1)); | 
|---|
| 422 | (void)MI; | 
|---|
| 423 | LLVM_DEBUG(dbgs() << "Merged LoopDec and End into: "<< *MI.getInstr()); | 
|---|
| 424 |  | 
|---|
| 425 | LoopDec->eraseFromParent(); | 
|---|
| 426 | LoopEnd->eraseFromParent(); | 
|---|
| 427 | for (auto *MI : Copies) | 
|---|
| 428 | MI->eraseFromParent(); | 
|---|
| 429 | return true; | 
|---|
| 430 | } | 
|---|
| 431 |  | 
|---|
| 432 | // Convert t2DoLoopStart to t2DoLoopStartTP if the loop contains VCTP | 
|---|
| 433 | // instructions. This keeps the VCTP count reg operand on the t2DoLoopStartTP | 
|---|
| 434 | // instruction, making the backend ARMLowOverheadLoops passes job of finding the | 
|---|
| 435 | // VCTP operand much simpler. | 
|---|
| 436 | bool MVETPAndVPTOptimisations::ConvertTailPredLoop(MachineLoop *ML, | 
|---|
| 437 | MachineDominatorTree *DT) { | 
|---|
| 438 | LLVM_DEBUG(dbgs() << "ConvertTailPredLoop on loop " | 
|---|
| 439 | << ML->getHeader()->getName() << "\n"); | 
|---|
| 440 |  | 
|---|
| 441 | // Find some loop components including the LoopEnd/Dec/Start, and any VCTP's | 
|---|
| 442 | // in the loop. | 
|---|
| 443 | MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec; | 
|---|
| 444 | if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd)) | 
|---|
| 445 | return false; | 
|---|
| 446 | if (LoopDec != LoopEnd || (LoopStart->getOpcode() != ARM::t2DoLoopStart && | 
|---|
| 447 | LoopStart->getOpcode() != ARM::t2WhileLoopStartLR)) | 
|---|
| 448 | return false; | 
|---|
| 449 |  | 
|---|
| 450 | SmallVector<MachineInstr *, 4> VCTPs; | 
|---|
| 451 | SmallVector<MachineInstr *, 4> MVEInstrs; | 
|---|
| 452 | for (MachineBasicBlock *BB : ML->blocks()) { | 
|---|
| 453 | for (MachineInstr &MI : *BB) | 
|---|
| 454 | if (isVCTP(MI: &MI)) | 
|---|
| 455 | VCTPs.push_back(Elt: &MI); | 
|---|
| 456 | else if (findFirstVPTPredOperandIdx(MI) != -1) | 
|---|
| 457 | MVEInstrs.push_back(Elt: &MI); | 
|---|
| 458 | } | 
|---|
| 459 |  | 
|---|
| 460 | if (VCTPs.empty()) { | 
|---|
| 461 | LLVM_DEBUG(dbgs() << "  no VCTPs\n"); | 
|---|
| 462 | return false; | 
|---|
| 463 | } | 
|---|
| 464 |  | 
|---|
| 465 | // Check all VCTPs are the same. | 
|---|
| 466 | MachineInstr *FirstVCTP = *VCTPs.begin(); | 
|---|
| 467 | for (MachineInstr *VCTP : VCTPs) { | 
|---|
| 468 | LLVM_DEBUG(dbgs() << "  with VCTP "<< *VCTP); | 
|---|
| 469 | if (VCTP->getOpcode() != FirstVCTP->getOpcode() || | 
|---|
| 470 | VCTP->getOperand(i: 0).getReg() != FirstVCTP->getOperand(i: 0).getReg()) { | 
|---|
| 471 | LLVM_DEBUG(dbgs() << "  VCTP's are not identical\n"); | 
|---|
| 472 | return false; | 
|---|
| 473 | } | 
|---|
| 474 | } | 
|---|
| 475 |  | 
|---|
| 476 | // Check for the register being used can be setup before the loop. We expect | 
|---|
| 477 | // this to be: | 
|---|
| 478 | //   $vx = ... | 
|---|
| 479 | // loop: | 
|---|
| 480 | //   $vp = PHI [ $vx ], [ $vd ] | 
|---|
| 481 | //   .. | 
|---|
| 482 | //   $vpr = VCTP $vp | 
|---|
| 483 | //   .. | 
|---|
| 484 | //   $vd = t2SUBri $vp, #n | 
|---|
| 485 | //   .. | 
|---|
| 486 | Register CountReg = FirstVCTP->getOperand(i: 1).getReg(); | 
|---|
| 487 | if (!CountReg.isVirtual()) { | 
|---|
| 488 | LLVM_DEBUG(dbgs() << "  cannot determine VCTP PHI\n"); | 
|---|
| 489 | return false; | 
|---|
| 490 | } | 
|---|
| 491 | MachineInstr *Phi = LookThroughCOPY(MI: MRI->getVRegDef(Reg: CountReg), MRI); | 
|---|
| 492 | if (!Phi || Phi->getOpcode() != TargetOpcode::PHI || | 
|---|
| 493 | Phi->getNumOperands() != 5 || | 
|---|
| 494 | (Phi->getOperand(i: 2).getMBB() != ML->getLoopLatch() && | 
|---|
| 495 | Phi->getOperand(i: 4).getMBB() != ML->getLoopLatch())) { | 
|---|
| 496 | LLVM_DEBUG(dbgs() << "  cannot determine VCTP Count\n"); | 
|---|
| 497 | return false; | 
|---|
| 498 | } | 
|---|
| 499 | CountReg = Phi->getOperand(i: 2).getMBB() == ML->getLoopLatch() | 
|---|
| 500 | ? Phi->getOperand(i: 3).getReg() | 
|---|
| 501 | : Phi->getOperand(i: 1).getReg(); | 
|---|
| 502 |  | 
|---|
| 503 | // Replace the t2DoLoopStart with the t2DoLoopStartTP, move it to the end of | 
|---|
| 504 | // the preheader and add the new CountReg to it. We attempt to place it late | 
|---|
| 505 | // in the preheader, but may need to move that earlier based on uses. | 
|---|
| 506 | MachineBasicBlock *MBB = LoopStart->getParent(); | 
|---|
| 507 | MachineBasicBlock::iterator InsertPt = MBB->getFirstTerminator(); | 
|---|
| 508 | for (MachineInstr &Use : | 
|---|
| 509 | MRI->use_instructions(Reg: LoopStart->getOperand(i: 0).getReg())) | 
|---|
| 510 | if ((InsertPt != MBB->end() && !DT->dominates(A: &*InsertPt, B: &Use)) || | 
|---|
| 511 | !DT->dominates(A: ML->getHeader(), B: Use.getParent())) { | 
|---|
| 512 | LLVM_DEBUG(dbgs() << "  InsertPt could not be a terminator!\n"); | 
|---|
| 513 | return false; | 
|---|
| 514 | } | 
|---|
| 515 |  | 
|---|
| 516 | unsigned NewOpc = LoopStart->getOpcode() == ARM::t2DoLoopStart | 
|---|
| 517 | ? ARM::t2DoLoopStartTP | 
|---|
| 518 | : ARM::t2WhileLoopStartTP; | 
|---|
| 519 | MachineInstrBuilder MI = | 
|---|
| 520 | BuildMI(BB&: *MBB, I: InsertPt, MIMD: LoopStart->getDebugLoc(), MCID: TII->get(Opcode: NewOpc)) | 
|---|
| 521 | .add(MO: LoopStart->getOperand(i: 0)) | 
|---|
| 522 | .add(MO: LoopStart->getOperand(i: 1)) | 
|---|
| 523 | .addReg(RegNo: CountReg); | 
|---|
| 524 | if (NewOpc == ARM::t2WhileLoopStartTP) | 
|---|
| 525 | MI.add(MO: LoopStart->getOperand(i: 2)); | 
|---|
| 526 | LLVM_DEBUG(dbgs() << "Replacing "<< *LoopStart << "  with " | 
|---|
| 527 | << *MI.getInstr()); | 
|---|
| 528 | MRI->constrainRegClass(Reg: CountReg, RC: &ARM::rGPRRegClass); | 
|---|
| 529 | LoopStart->eraseFromParent(); | 
|---|
| 530 |  | 
|---|
| 531 | if (SetLRPredicate) { | 
|---|
| 532 | // Each instruction in the loop needs to be using LR as the predicate from | 
|---|
| 533 | // the Phi as the predicate. | 
|---|
| 534 | Register LR = LoopPhi->getOperand(i: 0).getReg(); | 
|---|
| 535 | for (MachineInstr *MI : MVEInstrs) { | 
|---|
| 536 | int Idx = findFirstVPTPredOperandIdx(MI: *MI); | 
|---|
| 537 | MI->getOperand(i: Idx + 2).setReg(LR); | 
|---|
| 538 | } | 
|---|
| 539 | } | 
|---|
| 540 |  | 
|---|
| 541 | return true; | 
|---|
| 542 | } | 
|---|
| 543 |  | 
|---|
| 544 | // Returns true if Opcode is any VCMP Opcode. | 
|---|
| 545 | static bool IsVCMP(unsigned Opcode) { return VCMPOpcodeToVPT(Opcode) != 0; } | 
|---|
| 546 |  | 
|---|
| 547 | // Returns true if a VCMP with this Opcode can have its operands swapped. | 
|---|
| 548 | // There is 2 kind of VCMP that can't have their operands swapped: Float VCMPs, | 
|---|
| 549 | // and VCMPr instructions (since the r is always on the right). | 
|---|
| 550 | static bool CanHaveSwappedOperands(unsigned Opcode) { | 
|---|
| 551 | switch (Opcode) { | 
|---|
| 552 | default: | 
|---|
| 553 | return true; | 
|---|
| 554 | case ARM::MVE_VCMPf32: | 
|---|
| 555 | case ARM::MVE_VCMPf16: | 
|---|
| 556 | case ARM::MVE_VCMPf32r: | 
|---|
| 557 | case ARM::MVE_VCMPf16r: | 
|---|
| 558 | case ARM::MVE_VCMPi8r: | 
|---|
| 559 | case ARM::MVE_VCMPi16r: | 
|---|
| 560 | case ARM::MVE_VCMPi32r: | 
|---|
| 561 | case ARM::MVE_VCMPu8r: | 
|---|
| 562 | case ARM::MVE_VCMPu16r: | 
|---|
| 563 | case ARM::MVE_VCMPu32r: | 
|---|
| 564 | case ARM::MVE_VCMPs8r: | 
|---|
| 565 | case ARM::MVE_VCMPs16r: | 
|---|
| 566 | case ARM::MVE_VCMPs32r: | 
|---|
| 567 | return false; | 
|---|
| 568 | } | 
|---|
| 569 | } | 
|---|
| 570 |  | 
|---|
| 571 | // Returns the CondCode of a VCMP Instruction. | 
|---|
| 572 | static ARMCC::CondCodes GetCondCode(MachineInstr &Instr) { | 
|---|
| 573 | assert(IsVCMP(Instr.getOpcode()) && "Inst must be a VCMP"); | 
|---|
| 574 | return ARMCC::CondCodes(Instr.getOperand(i: 3).getImm()); | 
|---|
| 575 | } | 
|---|
| 576 |  | 
|---|
| 577 | // Returns true if Cond is equivalent to a VPNOT instruction on the result of | 
|---|
| 578 | // Prev. Cond and Prev must be VCMPs. | 
|---|
| 579 | static bool IsVPNOTEquivalent(MachineInstr &Cond, MachineInstr &Prev) { | 
|---|
| 580 | assert(IsVCMP(Cond.getOpcode()) && IsVCMP(Prev.getOpcode())); | 
|---|
| 581 |  | 
|---|
| 582 | // Opcodes must match. | 
|---|
| 583 | if (Cond.getOpcode() != Prev.getOpcode()) | 
|---|
| 584 | return false; | 
|---|
| 585 |  | 
|---|
| 586 | MachineOperand &CondOP1 = Cond.getOperand(i: 1), &CondOP2 = Cond.getOperand(i: 2); | 
|---|
| 587 | MachineOperand &PrevOP1 = Prev.getOperand(i: 1), &PrevOP2 = Prev.getOperand(i: 2); | 
|---|
| 588 |  | 
|---|
| 589 | // If the VCMP has the opposite condition with the same operands, we can | 
|---|
| 590 | // replace it with a VPNOT | 
|---|
| 591 | ARMCC::CondCodes ExpectedCode = GetCondCode(Instr&: Cond); | 
|---|
| 592 | ExpectedCode = ARMCC::getOppositeCondition(CC: ExpectedCode); | 
|---|
| 593 | if (ExpectedCode == GetCondCode(Instr&: Prev)) | 
|---|
| 594 | if (CondOP1.isIdenticalTo(Other: PrevOP1) && CondOP2.isIdenticalTo(Other: PrevOP2)) | 
|---|
| 595 | return true; | 
|---|
| 596 | // Check again with operands swapped if possible | 
|---|
| 597 | if (!CanHaveSwappedOperands(Opcode: Cond.getOpcode())) | 
|---|
| 598 | return false; | 
|---|
| 599 | ExpectedCode = ARMCC::getSwappedCondition(CC: ExpectedCode); | 
|---|
| 600 | return ExpectedCode == GetCondCode(Instr&: Prev) && CondOP1.isIdenticalTo(Other: PrevOP2) && | 
|---|
| 601 | CondOP2.isIdenticalTo(Other: PrevOP1); | 
|---|
| 602 | } | 
|---|
| 603 |  | 
|---|
| 604 | // Returns true if Instr writes to VCCR. | 
|---|
| 605 | static bool IsWritingToVCCR(MachineInstr &Instr) { | 
|---|
| 606 | if (Instr.getNumOperands() == 0) | 
|---|
| 607 | return false; | 
|---|
| 608 | MachineOperand &Dst = Instr.getOperand(i: 0); | 
|---|
| 609 | if (!Dst.isReg()) | 
|---|
| 610 | return false; | 
|---|
| 611 | Register DstReg = Dst.getReg(); | 
|---|
| 612 | if (!DstReg.isVirtual()) | 
|---|
| 613 | return false; | 
|---|
| 614 | MachineRegisterInfo &RegInfo = Instr.getMF()->getRegInfo(); | 
|---|
| 615 | const TargetRegisterClass *RegClass = RegInfo.getRegClassOrNull(Reg: DstReg); | 
|---|
| 616 | return RegClass && (RegClass->getID() == ARM::VCCRRegClassID); | 
|---|
| 617 | } | 
|---|
| 618 |  | 
|---|
| 619 | // Transforms | 
|---|
| 620 | //    <Instr that uses %A ('User' Operand)> | 
|---|
| 621 | // Into | 
|---|
| 622 | //    %K = VPNOT %Target | 
|---|
| 623 | //    <Instr that uses %K ('User' Operand)> | 
|---|
| 624 | // And returns the newly inserted VPNOT. | 
|---|
| 625 | // This optimization is done in the hopes of preventing spills/reloads of VPR by | 
|---|
| 626 | // reducing the number of VCCR values with overlapping lifetimes. | 
|---|
| 627 | MachineInstr &MVETPAndVPTOptimisations::ReplaceRegisterUseWithVPNOT( | 
|---|
| 628 | MachineBasicBlock &MBB, MachineInstr &Instr, MachineOperand &User, | 
|---|
| 629 | Register Target) { | 
|---|
| 630 | Register NewResult = MRI->createVirtualRegister(RegClass: MRI->getRegClass(Reg: Target)); | 
|---|
| 631 |  | 
|---|
| 632 | MachineInstrBuilder MIBuilder = | 
|---|
| 633 | BuildMI(BB&: MBB, I: &Instr, MIMD: Instr.getDebugLoc(), MCID: TII->get(Opcode: ARM::MVE_VPNOT)) | 
|---|
| 634 | .addDef(RegNo: NewResult) | 
|---|
| 635 | .addReg(RegNo: Target); | 
|---|
| 636 | addUnpredicatedMveVpredNOp(MIB&: MIBuilder); | 
|---|
| 637 |  | 
|---|
| 638 | // Make the user use NewResult instead, and clear its kill flag. | 
|---|
| 639 | User.setReg(NewResult); | 
|---|
| 640 | User.setIsKill(false); | 
|---|
| 641 |  | 
|---|
| 642 | LLVM_DEBUG(dbgs() << "  Inserting VPNOT (for spill prevention): "; | 
|---|
| 643 | MIBuilder.getInstr()->dump()); | 
|---|
| 644 |  | 
|---|
| 645 | return *MIBuilder.getInstr(); | 
|---|
| 646 | } | 
|---|
| 647 |  | 
|---|
| 648 | // Moves a VPNOT before its first user if an instruction that uses Reg is found | 
|---|
| 649 | // in-between the VPNOT and its user. | 
|---|
| 650 | // Returns true if there is at least one user of the VPNOT in the block. | 
|---|
| 651 | static bool MoveVPNOTBeforeFirstUser(MachineBasicBlock &MBB, | 
|---|
| 652 | MachineBasicBlock::iterator Iter, | 
|---|
| 653 | Register Reg) { | 
|---|
| 654 | assert(Iter->getOpcode() == ARM::MVE_VPNOT && "Not a VPNOT!"); | 
|---|
| 655 | assert(getVPTInstrPredicate(*Iter) == ARMVCC::None && | 
|---|
| 656 | "The VPNOT cannot be predicated"); | 
|---|
| 657 |  | 
|---|
| 658 | MachineInstr &VPNOT = *Iter; | 
|---|
| 659 | Register VPNOTResult = VPNOT.getOperand(i: 0).getReg(); | 
|---|
| 660 | Register VPNOTOperand = VPNOT.getOperand(i: 1).getReg(); | 
|---|
| 661 |  | 
|---|
| 662 | // Whether the VPNOT will need to be moved, and whether we found a user of the | 
|---|
| 663 | // VPNOT. | 
|---|
| 664 | bool MustMove = false, HasUser = false; | 
|---|
| 665 | MachineOperand *VPNOTOperandKiller = nullptr; | 
|---|
| 666 | for (; Iter != MBB.end(); ++Iter) { | 
|---|
| 667 | if (MachineOperand *MO = | 
|---|
| 668 | Iter->findRegisterUseOperand(Reg: VPNOTOperand, /*TRI=*/nullptr, | 
|---|
| 669 | /*isKill*/ true)) { | 
|---|
| 670 | // If we find the operand that kills the VPNOTOperand's result, save it. | 
|---|
| 671 | VPNOTOperandKiller = MO; | 
|---|
| 672 | } | 
|---|
| 673 |  | 
|---|
| 674 | if (Iter->findRegisterUseOperandIdx(Reg, /*TRI=*/nullptr) != -1) { | 
|---|
| 675 | MustMove = true; | 
|---|
| 676 | continue; | 
|---|
| 677 | } | 
|---|
| 678 |  | 
|---|
| 679 | if (Iter->findRegisterUseOperandIdx(Reg: VPNOTResult, /*TRI=*/nullptr) == -1) | 
|---|
| 680 | continue; | 
|---|
| 681 |  | 
|---|
| 682 | HasUser = true; | 
|---|
| 683 | if (!MustMove) | 
|---|
| 684 | break; | 
|---|
| 685 |  | 
|---|
| 686 | // Move the VPNOT right before Iter | 
|---|
| 687 | LLVM_DEBUG(dbgs() << "Moving: "; VPNOT.dump(); dbgs() << "  Before: "; | 
|---|
| 688 | Iter->dump()); | 
|---|
| 689 | MBB.splice(Where: Iter, Other: &MBB, From: VPNOT.getIterator()); | 
|---|
| 690 | // If we move the instr, and its operand was killed earlier, remove the kill | 
|---|
| 691 | // flag. | 
|---|
| 692 | if (VPNOTOperandKiller) | 
|---|
| 693 | VPNOTOperandKiller->setIsKill(false); | 
|---|
| 694 |  | 
|---|
| 695 | break; | 
|---|
| 696 | } | 
|---|
| 697 | return HasUser; | 
|---|
| 698 | } | 
|---|
| 699 |  | 
|---|
| 700 | // This optimisation attempts to reduce the number of overlapping lifetimes of | 
|---|
| 701 | // VCCR values by replacing uses of old VCCR values with VPNOTs. For example, | 
|---|
| 702 | // this replaces | 
|---|
| 703 | //    %A:vccr = (something) | 
|---|
| 704 | //    %B:vccr = VPNOT %A | 
|---|
| 705 | //    %Foo = (some op that uses %B) | 
|---|
| 706 | //    %Bar = (some op that uses %A) | 
|---|
| 707 | // With | 
|---|
| 708 | //    %A:vccr = (something) | 
|---|
| 709 | //    %B:vccr = VPNOT %A | 
|---|
| 710 | //    %Foo = (some op that uses %B) | 
|---|
| 711 | //    %TMP2:vccr = VPNOT %B | 
|---|
| 712 | //    %Bar = (some op that uses %A) | 
|---|
| 713 | bool MVETPAndVPTOptimisations::ReduceOldVCCRValueUses(MachineBasicBlock &MBB) { | 
|---|
| 714 | MachineBasicBlock::iterator Iter = MBB.begin(), End = MBB.end(); | 
|---|
| 715 | SmallVector<MachineInstr *, 4> DeadInstructions; | 
|---|
| 716 | bool Modified = false; | 
|---|
| 717 |  | 
|---|
| 718 | while (Iter != End) { | 
|---|
| 719 | Register VCCRValue, OppositeVCCRValue; | 
|---|
| 720 | // The first loop looks for 2 unpredicated instructions: | 
|---|
| 721 | //    %A:vccr = (instr)     ; A is stored in VCCRValue | 
|---|
| 722 | //    %B:vccr = VPNOT %A    ; B is stored in OppositeVCCRValue | 
|---|
| 723 | for (; Iter != End; ++Iter) { | 
|---|
| 724 | // We're only interested in unpredicated instructions that write to VCCR. | 
|---|
| 725 | if (!IsWritingToVCCR(Instr&: *Iter) || | 
|---|
| 726 | getVPTInstrPredicate(MI: *Iter) != ARMVCC::None) | 
|---|
| 727 | continue; | 
|---|
| 728 | Register Dst = Iter->getOperand(i: 0).getReg(); | 
|---|
| 729 |  | 
|---|
| 730 | // If we already have a VCCRValue, and this is a VPNOT on VCCRValue, we've | 
|---|
| 731 | // found what we were looking for. | 
|---|
| 732 | if (VCCRValue && Iter->getOpcode() == ARM::MVE_VPNOT && | 
|---|
| 733 | Iter->findRegisterUseOperandIdx(Reg: VCCRValue, /*TRI=*/nullptr) != -1) { | 
|---|
| 734 | // Move the VPNOT closer to its first user if needed, and ignore if it | 
|---|
| 735 | // has no users. | 
|---|
| 736 | if (!MoveVPNOTBeforeFirstUser(MBB, Iter, Reg: VCCRValue)) | 
|---|
| 737 | continue; | 
|---|
| 738 |  | 
|---|
| 739 | OppositeVCCRValue = Dst; | 
|---|
| 740 | ++Iter; | 
|---|
| 741 | break; | 
|---|
| 742 | } | 
|---|
| 743 |  | 
|---|
| 744 | // Else, just set VCCRValue. | 
|---|
| 745 | VCCRValue = Dst; | 
|---|
| 746 | } | 
|---|
| 747 |  | 
|---|
| 748 | // If the first inner loop didn't find anything, stop here. | 
|---|
| 749 | if (Iter == End) | 
|---|
| 750 | break; | 
|---|
| 751 |  | 
|---|
| 752 | assert(VCCRValue && OppositeVCCRValue && | 
|---|
| 753 | "VCCRValue and OppositeVCCRValue shouldn't be empty if the loop " | 
|---|
| 754 | "stopped before the end of the block!"); | 
|---|
| 755 | assert(VCCRValue != OppositeVCCRValue && | 
|---|
| 756 | "VCCRValue should not be equal to OppositeVCCRValue!"); | 
|---|
| 757 |  | 
|---|
| 758 | // LastVPNOTResult always contains the same value as OppositeVCCRValue. | 
|---|
| 759 | Register LastVPNOTResult = OppositeVCCRValue; | 
|---|
| 760 |  | 
|---|
| 761 | // This second loop tries to optimize the remaining instructions. | 
|---|
| 762 | for (; Iter != End; ++Iter) { | 
|---|
| 763 | bool IsInteresting = false; | 
|---|
| 764 |  | 
|---|
| 765 | if (MachineOperand *MO = | 
|---|
| 766 | Iter->findRegisterUseOperand(Reg: VCCRValue, /*TRI=*/nullptr)) { | 
|---|
| 767 | IsInteresting = true; | 
|---|
| 768 |  | 
|---|
| 769 | // - If the instruction is a VPNOT, it can be removed, and we can just | 
|---|
| 770 | //   replace its uses with LastVPNOTResult. | 
|---|
| 771 | // - Else, insert a new VPNOT on LastVPNOTResult to recompute VCCRValue. | 
|---|
| 772 | if (Iter->getOpcode() == ARM::MVE_VPNOT) { | 
|---|
| 773 | Register Result = Iter->getOperand(i: 0).getReg(); | 
|---|
| 774 |  | 
|---|
| 775 | MRI->replaceRegWith(FromReg: Result, ToReg: LastVPNOTResult); | 
|---|
| 776 | DeadInstructions.push_back(Elt: &*Iter); | 
|---|
| 777 | Modified = true; | 
|---|
| 778 |  | 
|---|
| 779 | LLVM_DEBUG(dbgs() | 
|---|
| 780 | << "Replacing all uses of '"<< printReg(Result) | 
|---|
| 781 | << "' with '"<< printReg(LastVPNOTResult) << "'\n"); | 
|---|
| 782 | } else { | 
|---|
| 783 | MachineInstr &VPNOT = | 
|---|
| 784 | ReplaceRegisterUseWithVPNOT(MBB, Instr&: *Iter, User&: *MO, Target: LastVPNOTResult); | 
|---|
| 785 | Modified = true; | 
|---|
| 786 |  | 
|---|
| 787 | LastVPNOTResult = VPNOT.getOperand(i: 0).getReg(); | 
|---|
| 788 | std::swap(a&: VCCRValue, b&: OppositeVCCRValue); | 
|---|
| 789 |  | 
|---|
| 790 | LLVM_DEBUG(dbgs() << "Replacing use of '"<< printReg(VCCRValue) | 
|---|
| 791 | << "' with '"<< printReg(LastVPNOTResult) | 
|---|
| 792 | << "' in instr: "<< *Iter); | 
|---|
| 793 | } | 
|---|
| 794 | } else { | 
|---|
| 795 | // If the instr uses OppositeVCCRValue, make it use LastVPNOTResult | 
|---|
| 796 | // instead as they contain the same value. | 
|---|
| 797 | if (MachineOperand *MO = Iter->findRegisterUseOperand( | 
|---|
| 798 | Reg: OppositeVCCRValue, /*TRI=*/nullptr)) { | 
|---|
| 799 | IsInteresting = true; | 
|---|
| 800 |  | 
|---|
| 801 | // This is pointless if LastVPNOTResult == OppositeVCCRValue. | 
|---|
| 802 | if (LastVPNOTResult != OppositeVCCRValue) { | 
|---|
| 803 | LLVM_DEBUG(dbgs() << "Replacing usage of '" | 
|---|
| 804 | << printReg(OppositeVCCRValue) << "' with '" | 
|---|
| 805 | << printReg(LastVPNOTResult) << " for instr: "; | 
|---|
| 806 | Iter->dump()); | 
|---|
| 807 | MO->setReg(LastVPNOTResult); | 
|---|
| 808 | Modified = true; | 
|---|
| 809 | } | 
|---|
| 810 |  | 
|---|
| 811 | MO->setIsKill(false); | 
|---|
| 812 | } | 
|---|
| 813 |  | 
|---|
| 814 | // If this is an unpredicated VPNOT on | 
|---|
| 815 | // LastVPNOTResult/OppositeVCCRValue, we can act like we inserted it. | 
|---|
| 816 | if (Iter->getOpcode() == ARM::MVE_VPNOT && | 
|---|
| 817 | getVPTInstrPredicate(MI: *Iter) == ARMVCC::None) { | 
|---|
| 818 | Register VPNOTOperand = Iter->getOperand(i: 1).getReg(); | 
|---|
| 819 | if (VPNOTOperand == LastVPNOTResult || | 
|---|
| 820 | VPNOTOperand == OppositeVCCRValue) { | 
|---|
| 821 | IsInteresting = true; | 
|---|
| 822 |  | 
|---|
| 823 | std::swap(a&: VCCRValue, b&: OppositeVCCRValue); | 
|---|
| 824 | LastVPNOTResult = Iter->getOperand(i: 0).getReg(); | 
|---|
| 825 | } | 
|---|
| 826 | } | 
|---|
| 827 | } | 
|---|
| 828 |  | 
|---|
| 829 | // If this instruction was not interesting, and it writes to VCCR, stop. | 
|---|
| 830 | if (!IsInteresting && IsWritingToVCCR(Instr&: *Iter)) | 
|---|
| 831 | break; | 
|---|
| 832 | } | 
|---|
| 833 | } | 
|---|
| 834 |  | 
|---|
| 835 | for (MachineInstr *DeadInstruction : DeadInstructions) | 
|---|
| 836 | DeadInstruction->eraseFromParent(); | 
|---|
| 837 |  | 
|---|
| 838 | return Modified; | 
|---|
| 839 | } | 
|---|
| 840 |  | 
|---|
| 841 | // This optimisation replaces VCMPs with VPNOTs when they are equivalent. | 
|---|
| 842 | bool MVETPAndVPTOptimisations::ReplaceVCMPsByVPNOTs(MachineBasicBlock &MBB) { | 
|---|
| 843 | SmallVector<MachineInstr *, 4> DeadInstructions; | 
|---|
| 844 |  | 
|---|
| 845 | // The last VCMP that we have seen and that couldn't be replaced. | 
|---|
| 846 | // This is reset when an instruction that writes to VCCR/VPR is found, or when | 
|---|
| 847 | // a VCMP is replaced with a VPNOT. | 
|---|
| 848 | // We'll only replace VCMPs with VPNOTs when this is not null, and when the | 
|---|
| 849 | // current VCMP is the opposite of PrevVCMP. | 
|---|
| 850 | MachineInstr *PrevVCMP = nullptr; | 
|---|
| 851 | // If we find an instruction that kills the result of PrevVCMP, we save the | 
|---|
| 852 | // operand here to remove the kill flag in case we need to use PrevVCMP's | 
|---|
| 853 | // result. | 
|---|
| 854 | MachineOperand *PrevVCMPResultKiller = nullptr; | 
|---|
| 855 |  | 
|---|
| 856 | for (MachineInstr &Instr : MBB.instrs()) { | 
|---|
| 857 | if (PrevVCMP) { | 
|---|
| 858 | if (MachineOperand *MO = | 
|---|
| 859 | Instr.findRegisterUseOperand(Reg: PrevVCMP->getOperand(i: 0).getReg(), | 
|---|
| 860 | /*TRI=*/nullptr, /*isKill*/ true)) { | 
|---|
| 861 | // If we come accross the instr that kills PrevVCMP's result, record it | 
|---|
| 862 | // so we can remove the kill flag later if we need to. | 
|---|
| 863 | PrevVCMPResultKiller = MO; | 
|---|
| 864 | } | 
|---|
| 865 | } | 
|---|
| 866 |  | 
|---|
| 867 | // Ignore predicated instructions. | 
|---|
| 868 | if (getVPTInstrPredicate(MI: Instr) != ARMVCC::None) | 
|---|
| 869 | continue; | 
|---|
| 870 |  | 
|---|
| 871 | // Only look at VCMPs | 
|---|
| 872 | if (!IsVCMP(Opcode: Instr.getOpcode())) { | 
|---|
| 873 | // If the instruction writes to VCCR, forget the previous VCMP. | 
|---|
| 874 | if (IsWritingToVCCR(Instr)) | 
|---|
| 875 | PrevVCMP = nullptr; | 
|---|
| 876 | continue; | 
|---|
| 877 | } | 
|---|
| 878 |  | 
|---|
| 879 | if (!PrevVCMP || !IsVPNOTEquivalent(Cond&: Instr, Prev&: *PrevVCMP)) { | 
|---|
| 880 | PrevVCMP = &Instr; | 
|---|
| 881 | continue; | 
|---|
| 882 | } | 
|---|
| 883 |  | 
|---|
| 884 | // The register containing the result of the VCMP that we're going to | 
|---|
| 885 | // replace. | 
|---|
| 886 | Register PrevVCMPResultReg = PrevVCMP->getOperand(i: 0).getReg(); | 
|---|
| 887 |  | 
|---|
| 888 | // Build a VPNOT to replace the VCMP, reusing its operands. | 
|---|
| 889 | MachineInstrBuilder MIBuilder = | 
|---|
| 890 | BuildMI(BB&: MBB, I: &Instr, MIMD: Instr.getDebugLoc(), MCID: TII->get(Opcode: ARM::MVE_VPNOT)) | 
|---|
| 891 | .add(MO: Instr.getOperand(i: 0)) | 
|---|
| 892 | .addReg(RegNo: PrevVCMPResultReg); | 
|---|
| 893 | addUnpredicatedMveVpredNOp(MIB&: MIBuilder); | 
|---|
| 894 | LLVM_DEBUG(dbgs() << "Inserting VPNOT (to replace VCMP): "; | 
|---|
| 895 | MIBuilder.getInstr()->dump(); dbgs() << "  Removed VCMP: "; | 
|---|
| 896 | Instr.dump()); | 
|---|
| 897 |  | 
|---|
| 898 | // If we found an instruction that uses, and kills PrevVCMP's result, | 
|---|
| 899 | // remove the kill flag. | 
|---|
| 900 | if (PrevVCMPResultKiller) | 
|---|
| 901 | PrevVCMPResultKiller->setIsKill(false); | 
|---|
| 902 |  | 
|---|
| 903 | // Finally, mark the old VCMP for removal and reset | 
|---|
| 904 | // PrevVCMP/PrevVCMPResultKiller. | 
|---|
| 905 | DeadInstructions.push_back(Elt: &Instr); | 
|---|
| 906 | PrevVCMP = nullptr; | 
|---|
| 907 | PrevVCMPResultKiller = nullptr; | 
|---|
| 908 | } | 
|---|
| 909 |  | 
|---|
| 910 | for (MachineInstr *DeadInstruction : DeadInstructions) | 
|---|
| 911 | DeadInstruction->eraseFromParent(); | 
|---|
| 912 |  | 
|---|
| 913 | return !DeadInstructions.empty(); | 
|---|
| 914 | } | 
|---|
| 915 |  | 
|---|
| 916 | bool MVETPAndVPTOptimisations::ReplaceConstByVPNOTs(MachineBasicBlock &MBB, | 
|---|
| 917 | MachineDominatorTree *DT) { | 
|---|
| 918 | // Scan through the block, looking for instructions that use constants moves | 
|---|
| 919 | // into VPR that are the negative of one another. These are expected to be | 
|---|
| 920 | // COPY's to VCCRRegClass, from a t2MOVi or t2MOVi16. The last seen constant | 
|---|
| 921 | // mask is kept it or and VPNOT's of it are added or reused as we scan through | 
|---|
| 922 | // the function. | 
|---|
| 923 | unsigned LastVPTImm = 0; | 
|---|
| 924 | Register LastVPTReg = 0; | 
|---|
| 925 | SmallSet<MachineInstr *, 4> DeadInstructions; | 
|---|
| 926 |  | 
|---|
| 927 | for (MachineInstr &Instr : MBB.instrs()) { | 
|---|
| 928 | // Look for predicated MVE instructions. | 
|---|
| 929 | int PIdx = llvm::findFirstVPTPredOperandIdx(MI: Instr); | 
|---|
| 930 | if (PIdx == -1) | 
|---|
| 931 | continue; | 
|---|
| 932 | Register VPR = Instr.getOperand(i: PIdx + 1).getReg(); | 
|---|
| 933 | if (!VPR.isVirtual()) | 
|---|
| 934 | continue; | 
|---|
| 935 |  | 
|---|
| 936 | // From that we are looking for an instruction like %11:vccr = COPY %9:rgpr. | 
|---|
| 937 | MachineInstr *Copy = MRI->getVRegDef(Reg: VPR); | 
|---|
| 938 | if (!Copy || Copy->getOpcode() != TargetOpcode::COPY || | 
|---|
| 939 | !Copy->getOperand(i: 1).getReg().isVirtual() || | 
|---|
| 940 | MRI->getRegClass(Reg: Copy->getOperand(i: 1).getReg()) == &ARM::VCCRRegClass) { | 
|---|
| 941 | LastVPTReg = 0; | 
|---|
| 942 | continue; | 
|---|
| 943 | } | 
|---|
| 944 | Register GPR = Copy->getOperand(i: 1).getReg(); | 
|---|
| 945 |  | 
|---|
| 946 | // Find the Immediate used by the copy. | 
|---|
| 947 | auto getImm = [&](Register GPR) -> unsigned { | 
|---|
| 948 | MachineInstr *Def = MRI->getVRegDef(Reg: GPR); | 
|---|
| 949 | if (Def && (Def->getOpcode() == ARM::t2MOVi || | 
|---|
| 950 | Def->getOpcode() == ARM::t2MOVi16)) | 
|---|
| 951 | return Def->getOperand(i: 1).getImm(); | 
|---|
| 952 | return -1U; | 
|---|
| 953 | }; | 
|---|
| 954 | unsigned Imm = getImm(GPR); | 
|---|
| 955 | if (Imm == -1U) { | 
|---|
| 956 | LastVPTReg = 0; | 
|---|
| 957 | continue; | 
|---|
| 958 | } | 
|---|
| 959 |  | 
|---|
| 960 | unsigned NotImm = ~Imm & 0xffff; | 
|---|
| 961 | if (LastVPTReg != 0 && LastVPTReg != VPR && LastVPTImm == Imm) { | 
|---|
| 962 | MRI->clearKillFlags(Reg: LastVPTReg); | 
|---|
| 963 | Instr.getOperand(i: PIdx + 1).setReg(LastVPTReg); | 
|---|
| 964 | if (MRI->use_empty(RegNo: VPR)) { | 
|---|
| 965 | DeadInstructions.insert(Ptr: Copy); | 
|---|
| 966 | if (MRI->hasOneUse(RegNo: GPR)) | 
|---|
| 967 | DeadInstructions.insert(Ptr: MRI->getVRegDef(Reg: GPR)); | 
|---|
| 968 | } | 
|---|
| 969 | LLVM_DEBUG(dbgs() << "Reusing predicate: in  "<< Instr); | 
|---|
| 970 | VPR = LastVPTReg; | 
|---|
| 971 | } else if (LastVPTReg != 0 && LastVPTImm == NotImm) { | 
|---|
| 972 | // We have found the not of a previous constant. Create a VPNot of the | 
|---|
| 973 | // earlier predicate reg and use it instead of the copy. | 
|---|
| 974 | Register NewVPR = MRI->createVirtualRegister(RegClass: &ARM::VCCRRegClass); | 
|---|
| 975 | auto VPNot = BuildMI(BB&: MBB, I: &Instr, MIMD: Instr.getDebugLoc(), | 
|---|
| 976 | MCID: TII->get(Opcode: ARM::MVE_VPNOT), DestReg: NewVPR) | 
|---|
| 977 | .addReg(RegNo: LastVPTReg); | 
|---|
| 978 | addUnpredicatedMveVpredNOp(MIB&: VPNot); | 
|---|
| 979 |  | 
|---|
| 980 | // Use the new register and check if the def is now dead. | 
|---|
| 981 | Instr.getOperand(i: PIdx + 1).setReg(NewVPR); | 
|---|
| 982 | if (MRI->use_empty(RegNo: VPR)) { | 
|---|
| 983 | DeadInstructions.insert(Ptr: Copy); | 
|---|
| 984 | if (MRI->hasOneUse(RegNo: GPR)) | 
|---|
| 985 | DeadInstructions.insert(Ptr: MRI->getVRegDef(Reg: GPR)); | 
|---|
| 986 | } | 
|---|
| 987 | MRI->clearKillFlags(Reg: LastVPTReg); | 
|---|
| 988 | LLVM_DEBUG(dbgs() << "Adding VPNot: "<< *VPNot << "  to replace use at " | 
|---|
| 989 | << Instr); | 
|---|
| 990 | VPR = NewVPR; | 
|---|
| 991 | } | 
|---|
| 992 |  | 
|---|
| 993 | LastVPTImm = Imm; | 
|---|
| 994 | LastVPTReg = VPR; | 
|---|
| 995 | } | 
|---|
| 996 |  | 
|---|
| 997 | for (MachineInstr *DI : DeadInstructions) | 
|---|
| 998 | DI->eraseFromParent(); | 
|---|
| 999 |  | 
|---|
| 1000 | return !DeadInstructions.empty(); | 
|---|
| 1001 | } | 
|---|
| 1002 |  | 
|---|
| 1003 | // Replace VPSEL with a predicated VMOV in blocks with a VCTP. This is a | 
|---|
| 1004 | // somewhat blunt approximation to allow tail predicated with vpsel | 
|---|
| 1005 | // instructions. We turn a vselect into a VPSEL in ISEL, but they have slightly | 
|---|
| 1006 | // different semantics under tail predication. Until that is modelled we just | 
|---|
| 1007 | // convert to a VMOVT (via a predicated VORR) instead. | 
|---|
| 1008 | bool MVETPAndVPTOptimisations::ConvertVPSEL(MachineBasicBlock &MBB) { | 
|---|
| 1009 | bool HasVCTP = false; | 
|---|
| 1010 | SmallVector<MachineInstr *, 4> DeadInstructions; | 
|---|
| 1011 |  | 
|---|
| 1012 | for (MachineInstr &MI : MBB.instrs()) { | 
|---|
| 1013 | if (isVCTP(MI: &MI)) { | 
|---|
| 1014 | HasVCTP = true; | 
|---|
| 1015 | continue; | 
|---|
| 1016 | } | 
|---|
| 1017 |  | 
|---|
| 1018 | if (!HasVCTP || MI.getOpcode() != ARM::MVE_VPSEL) | 
|---|
| 1019 | continue; | 
|---|
| 1020 |  | 
|---|
| 1021 | MachineInstrBuilder MIBuilder = | 
|---|
| 1022 | BuildMI(BB&: MBB, I: &MI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: ARM::MVE_VORR)) | 
|---|
| 1023 | .add(MO: MI.getOperand(i: 0)) | 
|---|
| 1024 | .add(MO: MI.getOperand(i: 1)) | 
|---|
| 1025 | .add(MO: MI.getOperand(i: 1)) | 
|---|
| 1026 | .addImm(Val: ARMVCC::Then) | 
|---|
| 1027 | .add(MO: MI.getOperand(i: 4)) | 
|---|
| 1028 | .add(MO: MI.getOperand(i: 5)) | 
|---|
| 1029 | .add(MO: MI.getOperand(i: 2)); | 
|---|
| 1030 | // Silence unused variable warning in release builds. | 
|---|
| 1031 | (void)MIBuilder; | 
|---|
| 1032 | LLVM_DEBUG(dbgs() << "Replacing VPSEL: "; MI.dump(); | 
|---|
| 1033 | dbgs() << "     with VMOVT: "; MIBuilder.getInstr()->dump()); | 
|---|
| 1034 | DeadInstructions.push_back(Elt: &MI); | 
|---|
| 1035 | } | 
|---|
| 1036 |  | 
|---|
| 1037 | for (MachineInstr *DeadInstruction : DeadInstructions) | 
|---|
| 1038 | DeadInstruction->eraseFromParent(); | 
|---|
| 1039 |  | 
|---|
| 1040 | return !DeadInstructions.empty(); | 
|---|
| 1041 | } | 
|---|
| 1042 |  | 
|---|
| 1043 | // Add a registry allocation hint for t2DoLoopStart to hint it towards LR, as | 
|---|
| 1044 | // the instruction may be removable as a noop. | 
|---|
| 1045 | bool MVETPAndVPTOptimisations::HintDoLoopStartReg(MachineBasicBlock &MBB) { | 
|---|
| 1046 | bool Changed = false; | 
|---|
| 1047 | for (MachineInstr &MI : MBB.instrs()) { | 
|---|
| 1048 | if (MI.getOpcode() != ARM::t2DoLoopStart) | 
|---|
| 1049 | continue; | 
|---|
| 1050 | Register R = MI.getOperand(i: 1).getReg(); | 
|---|
| 1051 | MachineFunction *MF = MI.getParent()->getParent(); | 
|---|
| 1052 | MF->getRegInfo().setRegAllocationHint(VReg: R, Type: ARMRI::RegLR, PrefReg: 0); | 
|---|
| 1053 | Changed = true; | 
|---|
| 1054 | } | 
|---|
| 1055 | return Changed; | 
|---|
| 1056 | } | 
|---|
| 1057 |  | 
|---|
| 1058 | bool MVETPAndVPTOptimisations::runOnMachineFunction(MachineFunction &Fn) { | 
|---|
| 1059 | const ARMSubtarget &STI = Fn.getSubtarget<ARMSubtarget>(); | 
|---|
| 1060 |  | 
|---|
| 1061 | if (!STI.isThumb2() || !STI.hasLOB()) | 
|---|
| 1062 | return false; | 
|---|
| 1063 |  | 
|---|
| 1064 | TII = static_cast<const Thumb2InstrInfo *>(STI.getInstrInfo()); | 
|---|
| 1065 | MRI = &Fn.getRegInfo(); | 
|---|
| 1066 | MachineLoopInfo *MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI(); | 
|---|
| 1067 | MachineDominatorTree *DT = | 
|---|
| 1068 | &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree(); | 
|---|
| 1069 |  | 
|---|
| 1070 | LLVM_DEBUG(dbgs() << "********** ARM MVE VPT Optimisations **********\n" | 
|---|
| 1071 | << "********** Function: "<< Fn.getName() << '\n'); | 
|---|
| 1072 |  | 
|---|
| 1073 | bool Modified = false; | 
|---|
| 1074 | for (MachineLoop *ML : MLI->getLoopsInPreorder()) { | 
|---|
| 1075 | Modified |= LowerWhileLoopStart(ML); | 
|---|
| 1076 | Modified |= MergeLoopEnd(ML); | 
|---|
| 1077 | Modified |= ConvertTailPredLoop(ML, DT); | 
|---|
| 1078 | } | 
|---|
| 1079 |  | 
|---|
| 1080 | for (MachineBasicBlock &MBB : Fn) { | 
|---|
| 1081 | Modified |= HintDoLoopStartReg(MBB); | 
|---|
| 1082 | Modified |= ReplaceConstByVPNOTs(MBB, DT); | 
|---|
| 1083 | Modified |= ReplaceVCMPsByVPNOTs(MBB); | 
|---|
| 1084 | Modified |= ReduceOldVCCRValueUses(MBB); | 
|---|
| 1085 | Modified |= ConvertVPSEL(MBB); | 
|---|
| 1086 | } | 
|---|
| 1087 |  | 
|---|
| 1088 | LLVM_DEBUG(dbgs() << "**************************************\n"); | 
|---|
| 1089 | return Modified; | 
|---|
| 1090 | } | 
|---|
| 1091 |  | 
|---|
| 1092 | /// createMVETPAndVPTOptimisationsPass | 
|---|
| 1093 | FunctionPass *llvm::createMVETPAndVPTOptimisationsPass() { | 
|---|
| 1094 | return new MVETPAndVPTOptimisations(); | 
|---|
| 1095 | } | 
|---|
| 1096 |  | 
|---|