| 1 | //===--------- PPCPreEmitPeephole.cpp - Late peephole optimizations -------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // A pre-emit peephole for catching opportunities introduced by late passes such |
| 10 | // as MachineBlockPlacement. |
| 11 | // |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | |
| 14 | #include "PPC.h" |
| 15 | #include "PPCInstrInfo.h" |
| 16 | #include "PPCSubtarget.h" |
| 17 | #include "llvm/ADT/Statistic.h" |
| 18 | #include "llvm/CodeGen/LivePhysRegs.h" |
| 19 | #include "llvm/CodeGen/MachineBasicBlock.h" |
| 20 | #include "llvm/CodeGen/MachineFunctionPass.h" |
| 21 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
| 22 | #include "llvm/CodeGen/RegisterScavenging.h" |
| 23 | #include "llvm/MC/MCContext.h" |
| 24 | #include "llvm/Support/CommandLine.h" |
| 25 | #include "llvm/Support/Debug.h" |
| 26 | |
| 27 | using namespace llvm; |
| 28 | |
| 29 | #define DEBUG_TYPE "ppc-pre-emit-peephole" |
| 30 | |
| 31 | STATISTIC(NumRRConvertedInPreEmit, |
| 32 | "Number of r+r instructions converted to r+i in pre-emit peephole" ); |
| 33 | STATISTIC(NumRemovedInPreEmit, |
| 34 | "Number of instructions deleted in pre-emit peephole" ); |
| 35 | STATISTIC(NumberOfSelfCopies, |
| 36 | "Number of self copy instructions eliminated" ); |
| 37 | STATISTIC(NumFrameOffFoldInPreEmit, |
| 38 | "Number of folding frame offset by using r+r in pre-emit peephole" ); |
| 39 | STATISTIC(NumCmpsInPreEmit, |
| 40 | "Number of compares eliminated in pre-emit peephole" ); |
| 41 | |
| 42 | static cl::opt<bool> |
| 43 | EnablePCRelLinkerOpt("ppc-pcrel-linker-opt" , cl::Hidden, cl::init(Val: true), |
| 44 | cl::desc("enable PC Relative linker optimization" )); |
| 45 | |
| 46 | static cl::opt<bool> |
| 47 | RunPreEmitPeephole("ppc-late-peephole" , cl::Hidden, cl::init(Val: true), |
| 48 | cl::desc("Run pre-emit peephole optimizations." )); |
| 49 | |
| 50 | static cl::opt<uint64_t> |
| 51 | DSCRValue("ppc-set-dscr" , cl::Hidden, |
| 52 | cl::desc("Set the Data Stream Control Register." )); |
| 53 | |
| 54 | namespace { |
| 55 | |
| 56 | static bool hasPCRelativeForm(MachineInstr &Use) { |
| 57 | switch (Use.getOpcode()) { |
| 58 | default: |
| 59 | return false; |
| 60 | case PPC::LBZ: |
| 61 | case PPC::LBZ8: |
| 62 | case PPC::LHA: |
| 63 | case PPC::LHA8: |
| 64 | case PPC::LHZ: |
| 65 | case PPC::LHZ8: |
| 66 | case PPC::LWZ: |
| 67 | case PPC::LWZ8: |
| 68 | case PPC::STB: |
| 69 | case PPC::STB8: |
| 70 | case PPC::STH: |
| 71 | case PPC::STH8: |
| 72 | case PPC::STW: |
| 73 | case PPC::STW8: |
| 74 | case PPC::LD: |
| 75 | case PPC::STD: |
| 76 | case PPC::LWA: |
| 77 | case PPC::LXSD: |
| 78 | case PPC::LXSSP: |
| 79 | case PPC::LXV: |
| 80 | case PPC::STXSD: |
| 81 | case PPC::STXSSP: |
| 82 | case PPC::STXV: |
| 83 | case PPC::LFD: |
| 84 | case PPC::LFS: |
| 85 | case PPC::STFD: |
| 86 | case PPC::STFS: |
| 87 | case PPC::DFLOADf32: |
| 88 | case PPC::DFLOADf64: |
| 89 | case PPC::DFSTOREf32: |
| 90 | case PPC::DFSTOREf64: |
| 91 | return true; |
| 92 | } |
| 93 | } |
| 94 | |
| 95 | class PPCPreEmitPeephole : public MachineFunctionPass { |
| 96 | public: |
| 97 | static char ID; |
| 98 | PPCPreEmitPeephole() : MachineFunctionPass(ID) {} |
| 99 | |
| 100 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
| 101 | MachineFunctionPass::getAnalysisUsage(AU); |
| 102 | } |
| 103 | |
| 104 | MachineFunctionProperties getRequiredProperties() const override { |
| 105 | return MachineFunctionProperties().setNoVRegs(); |
| 106 | } |
| 107 | |
| 108 | // This function removes any redundant load immediates. It has two level |
| 109 | // loops - The outer loop finds the load immediates BBI that could be used |
| 110 | // to replace following redundancy. The inner loop scans instructions that |
| 111 | // after BBI to find redundancy and update kill/dead flags accordingly. If |
| 112 | // AfterBBI is the same as BBI, it is redundant, otherwise any instructions |
| 113 | // that modify the def register of BBI would break the scanning. |
| 114 | // DeadOrKillToUnset is a pointer to the previous operand that had the |
| 115 | // kill/dead flag set. It keeps track of the def register of BBI, the use |
| 116 | // registers of AfterBBIs and the def registers of AfterBBIs. |
| 117 | bool removeRedundantLIs(MachineBasicBlock &MBB, |
| 118 | const TargetRegisterInfo *TRI) { |
| 119 | LLVM_DEBUG(dbgs() << "Remove redundant load immediates from MBB:\n" ; |
| 120 | MBB.dump(); dbgs() << "\n" ); |
| 121 | |
| 122 | DenseSet<MachineInstr *> InstrsToErase; |
| 123 | for (auto BBI = MBB.instr_begin(); BBI != MBB.instr_end(); ++BBI) { |
| 124 | // Skip load immediate that is marked to be erased later because it |
| 125 | // cannot be used to replace any other instructions. |
| 126 | if (InstrsToErase.contains(V: &*BBI)) |
| 127 | continue; |
| 128 | // Skip non-load immediate. |
| 129 | unsigned Opc = BBI->getOpcode(); |
| 130 | if (Opc != PPC::LI && Opc != PPC::LI8 && Opc != PPC::LIS && |
| 131 | Opc != PPC::LIS8) |
| 132 | continue; |
| 133 | // Skip load immediate, where the operand is a relocation (e.g., $r3 = |
| 134 | // LI target-flags(ppc-lo) %const.0). |
| 135 | if (!BBI->getOperand(i: 1).isImm()) |
| 136 | continue; |
| 137 | assert(BBI->getOperand(0).isReg() && |
| 138 | "Expected a register for the first operand" ); |
| 139 | |
| 140 | LLVM_DEBUG(dbgs() << "Scanning after load immediate: " ; BBI->dump();); |
| 141 | |
| 142 | Register Reg = BBI->getOperand(i: 0).getReg(); |
| 143 | int64_t Imm = BBI->getOperand(i: 1).getImm(); |
| 144 | MachineOperand *DeadOrKillToUnset = nullptr; |
| 145 | if (BBI->getOperand(i: 0).isDead()) { |
| 146 | DeadOrKillToUnset = &BBI->getOperand(i: 0); |
| 147 | LLVM_DEBUG(dbgs() << " Kill flag of " << *DeadOrKillToUnset |
| 148 | << " from load immediate " << *BBI |
| 149 | << " is a unsetting candidate\n" ); |
| 150 | } |
| 151 | // This loop scans instructions after BBI to see if there is any |
| 152 | // redundant load immediate. |
| 153 | for (auto AfterBBI = std::next(x: BBI); AfterBBI != MBB.instr_end(); |
| 154 | ++AfterBBI) { |
| 155 | // Track the operand that kill Reg. We would unset the kill flag of |
| 156 | // the operand if there is a following redundant load immediate. |
| 157 | int KillIdx = AfterBBI->findRegisterUseOperandIdx(Reg, TRI, isKill: true); |
| 158 | |
| 159 | // We can't just clear implicit kills, so if we encounter one, stop |
| 160 | // looking further. |
| 161 | if (KillIdx != -1 && AfterBBI->getOperand(i: KillIdx).isImplicit()) { |
| 162 | LLVM_DEBUG(dbgs() |
| 163 | << "Encountered an implicit kill, cannot proceed: " ); |
| 164 | LLVM_DEBUG(AfterBBI->dump()); |
| 165 | break; |
| 166 | } |
| 167 | |
| 168 | if (KillIdx != -1) { |
| 169 | assert(!DeadOrKillToUnset && "Shouldn't kill same register twice" ); |
| 170 | DeadOrKillToUnset = &AfterBBI->getOperand(i: KillIdx); |
| 171 | LLVM_DEBUG(dbgs() |
| 172 | << " Kill flag of " << *DeadOrKillToUnset << " from " |
| 173 | << *AfterBBI << " is a unsetting candidate\n" ); |
| 174 | } |
| 175 | |
| 176 | if (!AfterBBI->modifiesRegister(Reg, TRI)) |
| 177 | continue; |
| 178 | // Finish scanning because Reg is overwritten by a non-load |
| 179 | // instruction. |
| 180 | if (AfterBBI->getOpcode() != Opc) |
| 181 | break; |
| 182 | assert(AfterBBI->getOperand(0).isReg() && |
| 183 | "Expected a register for the first operand" ); |
| 184 | // Finish scanning because Reg is overwritten by a relocation or a |
| 185 | // different value. |
| 186 | if (!AfterBBI->getOperand(i: 1).isImm() || |
| 187 | AfterBBI->getOperand(i: 1).getImm() != Imm) |
| 188 | break; |
| 189 | |
| 190 | // It loads same immediate value to the same Reg, which is redundant. |
| 191 | // We would unset kill flag in previous Reg usage to extend live range |
| 192 | // of Reg first, then remove the redundancy. |
| 193 | if (DeadOrKillToUnset) { |
| 194 | LLVM_DEBUG(dbgs() |
| 195 | << " Unset dead/kill flag of " << *DeadOrKillToUnset |
| 196 | << " from " << *DeadOrKillToUnset->getParent()); |
| 197 | if (DeadOrKillToUnset->isDef()) |
| 198 | DeadOrKillToUnset->setIsDead(false); |
| 199 | else |
| 200 | DeadOrKillToUnset->setIsKill(false); |
| 201 | } |
| 202 | DeadOrKillToUnset = |
| 203 | AfterBBI->findRegisterDefOperand(Reg, TRI, isDead: true, Overlap: true); |
| 204 | if (DeadOrKillToUnset) |
| 205 | LLVM_DEBUG(dbgs() |
| 206 | << " Dead flag of " << *DeadOrKillToUnset << " from " |
| 207 | << *AfterBBI << " is a unsetting candidate\n" ); |
| 208 | InstrsToErase.insert(V: &*AfterBBI); |
| 209 | LLVM_DEBUG(dbgs() << " Remove redundant load immediate: " ; |
| 210 | AfterBBI->dump()); |
| 211 | } |
| 212 | } |
| 213 | |
| 214 | for (MachineInstr *MI : InstrsToErase) { |
| 215 | MI->eraseFromParent(); |
| 216 | } |
| 217 | NumRemovedInPreEmit += InstrsToErase.size(); |
| 218 | return !InstrsToErase.empty(); |
| 219 | } |
| 220 | |
| 221 | // Check if this instruction is a PLDpc that is part of a GOT indirect |
| 222 | // access. |
| 223 | bool isGOTPLDpc(MachineInstr &Instr) { |
| 224 | if (Instr.getOpcode() != PPC::PLDpc) |
| 225 | return false; |
| 226 | |
| 227 | // The result must be a register. |
| 228 | const MachineOperand &LoadedAddressReg = Instr.getOperand(i: 0); |
| 229 | if (!LoadedAddressReg.isReg()) |
| 230 | return false; |
| 231 | |
| 232 | // Make sure that this is a global symbol. |
| 233 | const MachineOperand &SymbolOp = Instr.getOperand(i: 1); |
| 234 | if (!SymbolOp.isGlobal()) |
| 235 | return false; |
| 236 | |
| 237 | // Finally return true only if the GOT flag is present. |
| 238 | return PPCInstrInfo::hasGOTFlag(TF: SymbolOp.getTargetFlags()); |
| 239 | } |
| 240 | |
| 241 | bool addLinkerOpt(MachineBasicBlock &MBB, const TargetRegisterInfo *TRI) { |
| 242 | MachineFunction *MF = MBB.getParent(); |
| 243 | // If the linker opt is disabled then just return. |
| 244 | if (!EnablePCRelLinkerOpt) |
| 245 | return false; |
| 246 | |
| 247 | // Add this linker opt only if we are using PC Relative memops. |
| 248 | if (!MF->getSubtarget<PPCSubtarget>().isUsingPCRelativeCalls()) |
| 249 | return false; |
| 250 | |
| 251 | // Struct to keep track of one def/use pair for a GOT indirect access. |
| 252 | struct GOTDefUsePair { |
| 253 | MachineBasicBlock::iterator DefInst; |
| 254 | MachineBasicBlock::iterator UseInst; |
| 255 | Register DefReg; |
| 256 | Register UseReg; |
| 257 | bool StillValid; |
| 258 | }; |
| 259 | // Vector of def/ues pairs in this basic block. |
| 260 | SmallVector<GOTDefUsePair, 4> CandPairs; |
| 261 | SmallVector<GOTDefUsePair, 4> ValidPairs; |
| 262 | bool MadeChange = false; |
| 263 | |
| 264 | // Run through all of the instructions in the basic block and try to |
| 265 | // collect potential pairs of GOT indirect access instructions. |
| 266 | for (auto BBI = MBB.instr_begin(); BBI != MBB.instr_end(); ++BBI) { |
| 267 | // Look for the initial GOT indirect load. |
| 268 | if (isGOTPLDpc(Instr&: *BBI)) { |
| 269 | GOTDefUsePair CurrentPair{.DefInst: BBI, .UseInst: MachineBasicBlock::iterator(), |
| 270 | .DefReg: BBI->getOperand(i: 0).getReg(), |
| 271 | .UseReg: PPC::NoRegister, .StillValid: true}; |
| 272 | CandPairs.push_back(Elt: CurrentPair); |
| 273 | continue; |
| 274 | } |
| 275 | |
| 276 | // We haven't encountered any new PLD instructions, nothing to check. |
| 277 | if (CandPairs.empty()) |
| 278 | continue; |
| 279 | |
| 280 | // Run through the candidate pairs and see if any of the registers |
| 281 | // defined in the PLD instructions are used by this instruction. |
| 282 | // Note: the size of CandPairs can change in the loop. |
| 283 | for (unsigned Idx = 0; Idx < CandPairs.size(); Idx++) { |
| 284 | GOTDefUsePair &Pair = CandPairs[Idx]; |
| 285 | // The instruction does not use or modify this PLD's def reg, |
| 286 | // ignore it. |
| 287 | if (!BBI->readsRegister(Reg: Pair.DefReg, TRI) && |
| 288 | !BBI->modifiesRegister(Reg: Pair.DefReg, TRI)) |
| 289 | continue; |
| 290 | |
| 291 | // The use needs to be used in the address computation and not |
| 292 | // as the register being stored for a store. |
| 293 | const MachineOperand *UseOp = |
| 294 | hasPCRelativeForm(Use&: *BBI) ? &BBI->getOperand(i: 2) : nullptr; |
| 295 | |
| 296 | // Check for a valid use. |
| 297 | if (UseOp && UseOp->isReg() && UseOp->getReg() == Pair.DefReg && |
| 298 | UseOp->isUse() && UseOp->isKill()) { |
| 299 | Pair.UseInst = BBI; |
| 300 | Pair.UseReg = BBI->getOperand(i: 0).getReg(); |
| 301 | ValidPairs.push_back(Elt: Pair); |
| 302 | } |
| 303 | CandPairs.erase(CI: CandPairs.begin() + Idx); |
| 304 | } |
| 305 | } |
| 306 | |
| 307 | // Go through all of the pairs and check for any more valid uses. |
| 308 | for (auto Pair = ValidPairs.begin(); Pair != ValidPairs.end(); Pair++) { |
| 309 | // We shouldn't be here if we don't have a valid pair. |
| 310 | assert(Pair->UseInst.isValid() && Pair->StillValid && |
| 311 | "Kept an invalid def/use pair for GOT PCRel opt" ); |
| 312 | // We have found a potential pair. Search through the instructions |
| 313 | // between the def and the use to see if it is valid to mark this as a |
| 314 | // linker opt. |
| 315 | MachineBasicBlock::iterator BBI = Pair->DefInst; |
| 316 | ++BBI; |
| 317 | for (; BBI != Pair->UseInst; ++BBI) { |
| 318 | if (BBI->readsRegister(Reg: Pair->UseReg, TRI) || |
| 319 | BBI->modifiesRegister(Reg: Pair->UseReg, TRI)) { |
| 320 | Pair->StillValid = false; |
| 321 | break; |
| 322 | } |
| 323 | } |
| 324 | |
| 325 | if (!Pair->StillValid) |
| 326 | continue; |
| 327 | |
| 328 | // The load/store instruction that uses the address from the PLD will |
| 329 | // either use a register (for a store) or define a register (for the |
| 330 | // load). That register will be added as an implicit def to the PLD |
| 331 | // and as an implicit use on the second memory op. This is a precaution |
| 332 | // to prevent future passes from using that register between the two |
| 333 | // instructions. |
| 334 | MachineOperand ImplDef = |
| 335 | MachineOperand::CreateReg(Reg: Pair->UseReg, isDef: true, isImp: true); |
| 336 | MachineOperand ImplUse = |
| 337 | MachineOperand::CreateReg(Reg: Pair->UseReg, isDef: false, isImp: true); |
| 338 | Pair->DefInst->addOperand(Op: ImplDef); |
| 339 | Pair->UseInst->addOperand(Op: ImplUse); |
| 340 | |
| 341 | // Create the symbol. |
| 342 | MCContext &Context = MF->getContext(); |
| 343 | MCSymbol *Symbol = Context.createNamedTempSymbol(Name: "pcrel" ); |
| 344 | MachineOperand PCRelLabel = |
| 345 | MachineOperand::CreateMCSymbol(Sym: Symbol, TargetFlags: PPCII::MO_PCREL_OPT_FLAG); |
| 346 | Pair->DefInst->addOperand(MF&: *MF, Op: PCRelLabel); |
| 347 | Pair->UseInst->addOperand(MF&: *MF, Op: PCRelLabel); |
| 348 | MadeChange |= true; |
| 349 | } |
| 350 | return MadeChange; |
| 351 | } |
| 352 | |
| 353 | // This function removes redundant pairs of accumulator prime/unprime |
| 354 | // instructions. In some situations, it's possible the compiler inserts an |
| 355 | // accumulator prime instruction followed by an unprime instruction (e.g. |
| 356 | // when we store an accumulator after restoring it from a spill). If the |
| 357 | // accumulator is not used between the two, they can be removed. This |
| 358 | // function removes these redundant pairs from basic blocks. |
| 359 | // The algorithm is quite straightforward - every time we encounter a prime |
| 360 | // instruction, the primed register is added to a candidate set. Any use |
| 361 | // other than a prime removes the candidate from the set and any de-prime |
| 362 | // of a current candidate marks both the prime and de-prime for removal. |
| 363 | // This way we ensure we only remove prime/de-prime *pairs* with no |
| 364 | // intervening uses. |
| 365 | bool removeAccPrimeUnprime(MachineBasicBlock &MBB) { |
| 366 | DenseSet<MachineInstr *> InstrsToErase; |
| 367 | // Initially, none of the acc registers are candidates. |
| 368 | SmallVector<MachineInstr *, 8> Candidates( |
| 369 | PPC::UACCRCRegClass.getNumRegs(), nullptr); |
| 370 | |
| 371 | for (MachineInstr &BBI : MBB.instrs()) { |
| 372 | unsigned Opc = BBI.getOpcode(); |
| 373 | // If we are visiting a xxmtacc instruction, we add it and its operand |
| 374 | // register to the candidate set. |
| 375 | if (Opc == PPC::XXMTACC) { |
| 376 | Register Acc = BBI.getOperand(i: 0).getReg(); |
| 377 | assert(PPC::ACCRCRegClass.contains(Acc) && |
| 378 | "Unexpected register for XXMTACC" ); |
| 379 | Candidates[Acc - PPC::ACC0] = &BBI; |
| 380 | } |
| 381 | // If we are visiting a xxmfacc instruction and its operand register is |
| 382 | // in the candidate set, we mark the two instructions for removal. |
| 383 | else if (Opc == PPC::XXMFACC) { |
| 384 | Register Acc = BBI.getOperand(i: 0).getReg(); |
| 385 | assert(PPC::ACCRCRegClass.contains(Acc) && |
| 386 | "Unexpected register for XXMFACC" ); |
| 387 | if (!Candidates[Acc - PPC::ACC0]) |
| 388 | continue; |
| 389 | InstrsToErase.insert(V: &BBI); |
| 390 | InstrsToErase.insert(V: Candidates[Acc - PPC::ACC0]); |
| 391 | } |
| 392 | // If we are visiting an instruction using an accumulator register |
| 393 | // as operand, we remove it from the candidate set. |
| 394 | else { |
| 395 | for (MachineOperand &Operand : BBI.operands()) { |
| 396 | if (!Operand.isReg()) |
| 397 | continue; |
| 398 | Register Reg = Operand.getReg(); |
| 399 | if (PPC::ACCRCRegClass.contains(Reg)) |
| 400 | Candidates[Reg - PPC::ACC0] = nullptr; |
| 401 | } |
| 402 | } |
| 403 | } |
| 404 | |
| 405 | for (MachineInstr *MI : InstrsToErase) |
| 406 | MI->eraseFromParent(); |
| 407 | NumRemovedInPreEmit += InstrsToErase.size(); |
| 408 | return !InstrsToErase.empty(); |
| 409 | } |
| 410 | |
| 411 | bool runOnMachineFunction(MachineFunction &MF) override { |
| 412 | // If the user wants to set the DSCR using command-line options, |
| 413 | // load in the specified value at the start of main. |
| 414 | if (DSCRValue.getNumOccurrences() > 0 && MF.getName() == "main" && |
| 415 | MF.getFunction().hasExternalLinkage()) { |
| 416 | DSCRValue = (uint32_t)(DSCRValue & 0x01FFFFFF); // 25-bit DSCR mask |
| 417 | RegScavenger RS; |
| 418 | MachineBasicBlock &MBB = MF.front(); |
| 419 | // Find an unused GPR according to register liveness |
| 420 | RS.enterBasicBlock(MBB); |
| 421 | unsigned InDSCR = RS.FindUnusedReg(RC: &PPC::GPRCRegClass); |
| 422 | if (InDSCR) { |
| 423 | const PPCInstrInfo *TII = |
| 424 | MF.getSubtarget<PPCSubtarget>().getInstrInfo(); |
| 425 | DebugLoc dl; |
| 426 | MachineBasicBlock::iterator IP = MBB.begin(); // Insert Point |
| 427 | // Copy the 32-bit DSCRValue integer into the GPR InDSCR using LIS and |
| 428 | // ORI, then move to DSCR. If the requested DSCR value is contained |
| 429 | // in a 16-bit signed number, we can emit a single `LI`, but the |
| 430 | // impact of saving one instruction in one function does not warrant |
| 431 | // any additional complexity in the logic here. |
| 432 | BuildMI(BB&: MBB, I: IP, MIMD: dl, MCID: TII->get(Opcode: PPC::LIS), DestReg: InDSCR) |
| 433 | .addImm(Val: DSCRValue >> 16); |
| 434 | BuildMI(BB&: MBB, I: IP, MIMD: dl, MCID: TII->get(Opcode: PPC::ORI), DestReg: InDSCR) |
| 435 | .addReg(RegNo: InDSCR) |
| 436 | .addImm(Val: DSCRValue & 0xFFFF); |
| 437 | BuildMI(BB&: MBB, I: IP, MIMD: dl, MCID: TII->get(Opcode: PPC::MTUDSCR)) |
| 438 | .addReg(RegNo: InDSCR, flags: RegState::Kill); |
| 439 | } else |
| 440 | errs() << "Warning: Ran out of registers - Unable to set DSCR as " |
| 441 | "requested" ; |
| 442 | } |
| 443 | |
| 444 | if (skipFunction(F: MF.getFunction()) || !RunPreEmitPeephole) { |
| 445 | // Remove UNENCODED_NOP even when this pass is disabled. |
| 446 | // This needs to be done unconditionally so we don't emit zeros |
| 447 | // in the instruction stream. |
| 448 | SmallVector<MachineInstr *, 4> InstrsToErase; |
| 449 | for (MachineBasicBlock &MBB : MF) |
| 450 | for (MachineInstr &MI : MBB) |
| 451 | if (MI.getOpcode() == PPC::UNENCODED_NOP) |
| 452 | InstrsToErase.push_back(Elt: &MI); |
| 453 | for (MachineInstr *MI : InstrsToErase) |
| 454 | MI->eraseFromParent(); |
| 455 | return false; |
| 456 | } |
| 457 | bool Changed = false; |
| 458 | const PPCInstrInfo *TII = MF.getSubtarget<PPCSubtarget>().getInstrInfo(); |
| 459 | const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); |
| 460 | SmallVector<MachineInstr *, 4> InstrsToErase; |
| 461 | for (MachineBasicBlock &MBB : MF) { |
| 462 | Changed |= removeRedundantLIs(MBB, TRI); |
| 463 | Changed |= addLinkerOpt(MBB, TRI); |
| 464 | Changed |= removeAccPrimeUnprime(MBB); |
| 465 | for (MachineInstr &MI : MBB) { |
| 466 | unsigned Opc = MI.getOpcode(); |
| 467 | if (Opc == PPC::UNENCODED_NOP) { |
| 468 | InstrsToErase.push_back(Elt: &MI); |
| 469 | continue; |
| 470 | } |
| 471 | // Detect self copies - these can result from running AADB. |
| 472 | if (PPCInstrInfo::isSameClassPhysRegCopy(Opcode: Opc)) { |
| 473 | const MCInstrDesc &MCID = TII->get(Opcode: Opc); |
| 474 | if (MCID.getNumOperands() == 3 && |
| 475 | MI.getOperand(i: 0).getReg() == MI.getOperand(i: 1).getReg() && |
| 476 | MI.getOperand(i: 0).getReg() == MI.getOperand(i: 2).getReg()) { |
| 477 | NumberOfSelfCopies++; |
| 478 | LLVM_DEBUG(dbgs() << "Deleting self-copy instruction: " ); |
| 479 | LLVM_DEBUG(MI.dump()); |
| 480 | InstrsToErase.push_back(Elt: &MI); |
| 481 | continue; |
| 482 | } |
| 483 | else if (MCID.getNumOperands() == 2 && |
| 484 | MI.getOperand(i: 0).getReg() == MI.getOperand(i: 1).getReg()) { |
| 485 | NumberOfSelfCopies++; |
| 486 | LLVM_DEBUG(dbgs() << "Deleting self-copy instruction: " ); |
| 487 | LLVM_DEBUG(MI.dump()); |
| 488 | InstrsToErase.push_back(Elt: &MI); |
| 489 | continue; |
| 490 | } |
| 491 | } |
| 492 | MachineInstr *DefMIToErase = nullptr; |
| 493 | SmallSet<Register, 4> UpdatedRegs; |
| 494 | if (TII->convertToImmediateForm(MI, RegsToUpdate&: UpdatedRegs, KilledDef: &DefMIToErase)) { |
| 495 | Changed = true; |
| 496 | NumRRConvertedInPreEmit++; |
| 497 | LLVM_DEBUG(dbgs() << "Converted instruction to imm form: " ); |
| 498 | LLVM_DEBUG(MI.dump()); |
| 499 | if (DefMIToErase) { |
| 500 | InstrsToErase.push_back(Elt: DefMIToErase); |
| 501 | } |
| 502 | } |
| 503 | if (TII->foldFrameOffset(MI)) { |
| 504 | Changed = true; |
| 505 | NumFrameOffFoldInPreEmit++; |
| 506 | LLVM_DEBUG(dbgs() << "Frame offset folding by using index form: " ); |
| 507 | LLVM_DEBUG(MI.dump()); |
| 508 | } |
| 509 | if (TII->optimizeCmpPostRA(MI)) { |
| 510 | Changed = true; |
| 511 | NumCmpsInPreEmit++; |
| 512 | LLVM_DEBUG(dbgs() << "Optimize compare by using record form: " ); |
| 513 | LLVM_DEBUG(MI.dump()); |
| 514 | InstrsToErase.push_back(Elt: &MI); |
| 515 | } |
| 516 | } |
| 517 | |
| 518 | // Eliminate conditional branch based on a constant CR bit by |
| 519 | // CRSET or CRUNSET. We eliminate the conditional branch or |
| 520 | // convert it into an unconditional branch. Also, if the CR bit |
| 521 | // is not used by other instructions, we eliminate CRSET as well. |
| 522 | auto I = MBB.getFirstInstrTerminator(); |
| 523 | if (I == MBB.instr_end()) |
| 524 | continue; |
| 525 | MachineInstr *Br = &*I; |
| 526 | if (Br->getOpcode() != PPC::BC && Br->getOpcode() != PPC::BCn) |
| 527 | continue; |
| 528 | MachineInstr *CRSetMI = nullptr; |
| 529 | Register CRBit = Br->getOperand(i: 0).getReg(); |
| 530 | unsigned CRReg = getCRFromCRBit(SrcReg: CRBit); |
| 531 | bool SeenUse = false; |
| 532 | MachineBasicBlock::reverse_iterator It = Br, Er = MBB.rend(); |
| 533 | for (It++; It != Er; It++) { |
| 534 | if (It->modifiesRegister(Reg: CRBit, TRI)) { |
| 535 | if ((It->getOpcode() == PPC::CRUNSET || |
| 536 | It->getOpcode() == PPC::CRSET) && |
| 537 | It->getOperand(i: 0).getReg() == CRBit) |
| 538 | CRSetMI = &*It; |
| 539 | break; |
| 540 | } |
| 541 | if (It->readsRegister(Reg: CRBit, TRI)) |
| 542 | SeenUse = true; |
| 543 | } |
| 544 | if (!CRSetMI) continue; |
| 545 | |
| 546 | unsigned CRSetOp = CRSetMI->getOpcode(); |
| 547 | if ((Br->getOpcode() == PPC::BCn && CRSetOp == PPC::CRSET) || |
| 548 | (Br->getOpcode() == PPC::BC && CRSetOp == PPC::CRUNSET)) { |
| 549 | // Remove this branch since it cannot be taken. |
| 550 | InstrsToErase.push_back(Elt: Br); |
| 551 | MBB.removeSuccessor(Succ: Br->getOperand(i: 1).getMBB()); |
| 552 | } |
| 553 | else { |
| 554 | // This conditional branch is always taken. So, remove all branches |
| 555 | // and insert an unconditional branch to the destination of this. |
| 556 | MachineBasicBlock::iterator It = Br, Er = MBB.end(); |
| 557 | for (; It != Er; It++) { |
| 558 | if (It->isDebugInstr()) continue; |
| 559 | assert(It->isTerminator() && "Non-terminator after a terminator" ); |
| 560 | InstrsToErase.push_back(Elt: &*It); |
| 561 | } |
| 562 | if (!MBB.isLayoutSuccessor(MBB: Br->getOperand(i: 1).getMBB())) { |
| 563 | ArrayRef<MachineOperand> NoCond; |
| 564 | TII->insertBranch(MBB, TBB: Br->getOperand(i: 1).getMBB(), FBB: nullptr, |
| 565 | Cond: NoCond, DL: Br->getDebugLoc()); |
| 566 | } |
| 567 | for (auto &Succ : MBB.successors()) |
| 568 | if (Succ != Br->getOperand(i: 1).getMBB()) { |
| 569 | MBB.removeSuccessor(Succ); |
| 570 | break; |
| 571 | } |
| 572 | } |
| 573 | |
| 574 | // If the CRBit is not used by another instruction, we can eliminate |
| 575 | // CRSET/CRUNSET instruction. |
| 576 | if (!SeenUse) { |
| 577 | // We need to check use of the CRBit in successors. |
| 578 | for (auto &SuccMBB : MBB.successors()) |
| 579 | if (SuccMBB->isLiveIn(Reg: CRBit) || SuccMBB->isLiveIn(Reg: CRReg)) { |
| 580 | SeenUse = true; |
| 581 | break; |
| 582 | } |
| 583 | if (!SeenUse) |
| 584 | InstrsToErase.push_back(Elt: CRSetMI); |
| 585 | } |
| 586 | } |
| 587 | for (MachineInstr *MI : InstrsToErase) { |
| 588 | LLVM_DEBUG(dbgs() << "PPC pre-emit peephole: erasing instruction: " ); |
| 589 | LLVM_DEBUG(MI->dump()); |
| 590 | MI->eraseFromParent(); |
| 591 | NumRemovedInPreEmit++; |
| 592 | } |
| 593 | return Changed; |
| 594 | } |
| 595 | }; |
| 596 | } |
| 597 | |
| 598 | INITIALIZE_PASS(PPCPreEmitPeephole, DEBUG_TYPE, "PowerPC Pre-Emit Peephole" , |
| 599 | false, false) |
| 600 | char PPCPreEmitPeephole::ID = 0; |
| 601 | |
| 602 | FunctionPass *llvm::createPPCPreEmitPeepholePass() { |
| 603 | return new PPCPreEmitPeephole(); |
| 604 | } |
| 605 | |