1 | //===--------- PPCPreEmitPeephole.cpp - Late peephole optimizations -------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // A pre-emit peephole for catching opportunities introduced by late passes such |
10 | // as MachineBlockPlacement. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "PPC.h" |
15 | #include "PPCInstrInfo.h" |
16 | #include "PPCSubtarget.h" |
17 | #include "llvm/ADT/Statistic.h" |
18 | #include "llvm/CodeGen/LivePhysRegs.h" |
19 | #include "llvm/CodeGen/MachineBasicBlock.h" |
20 | #include "llvm/CodeGen/MachineFunctionPass.h" |
21 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
22 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
23 | #include "llvm/CodeGen/RegisterScavenging.h" |
24 | #include "llvm/MC/MCContext.h" |
25 | #include "llvm/Support/CommandLine.h" |
26 | #include "llvm/Support/Debug.h" |
27 | |
28 | using namespace llvm; |
29 | |
30 | #define DEBUG_TYPE "ppc-pre-emit-peephole" |
31 | |
32 | STATISTIC(NumRRConvertedInPreEmit, |
33 | "Number of r+r instructions converted to r+i in pre-emit peephole" ); |
34 | STATISTIC(NumRemovedInPreEmit, |
35 | "Number of instructions deleted in pre-emit peephole" ); |
36 | STATISTIC(NumberOfSelfCopies, |
37 | "Number of self copy instructions eliminated" ); |
38 | STATISTIC(NumFrameOffFoldInPreEmit, |
39 | "Number of folding frame offset by using r+r in pre-emit peephole" ); |
40 | STATISTIC(NumCmpsInPreEmit, |
41 | "Number of compares eliminated in pre-emit peephole" ); |
42 | |
43 | static cl::opt<bool> |
44 | EnablePCRelLinkerOpt("ppc-pcrel-linker-opt" , cl::Hidden, cl::init(Val: true), |
45 | cl::desc("enable PC Relative linker optimization" )); |
46 | |
47 | static cl::opt<bool> |
48 | RunPreEmitPeephole("ppc-late-peephole" , cl::Hidden, cl::init(Val: true), |
49 | cl::desc("Run pre-emit peephole optimizations." )); |
50 | |
51 | static cl::opt<uint64_t> |
52 | DSCRValue("ppc-set-dscr" , cl::Hidden, |
53 | cl::desc("Set the Data Stream Control Register." )); |
54 | |
55 | namespace { |
56 | |
57 | static bool hasPCRelativeForm(MachineInstr &Use) { |
58 | switch (Use.getOpcode()) { |
59 | default: |
60 | return false; |
61 | case PPC::LBZ: |
62 | case PPC::LBZ8: |
63 | case PPC::LHA: |
64 | case PPC::LHA8: |
65 | case PPC::LHZ: |
66 | case PPC::LHZ8: |
67 | case PPC::LWZ: |
68 | case PPC::LWZ8: |
69 | case PPC::STB: |
70 | case PPC::STB8: |
71 | case PPC::STH: |
72 | case PPC::STH8: |
73 | case PPC::STW: |
74 | case PPC::STW8: |
75 | case PPC::LD: |
76 | case PPC::STD: |
77 | case PPC::LWA: |
78 | case PPC::LXSD: |
79 | case PPC::LXSSP: |
80 | case PPC::LXV: |
81 | case PPC::STXSD: |
82 | case PPC::STXSSP: |
83 | case PPC::STXV: |
84 | case PPC::LFD: |
85 | case PPC::LFS: |
86 | case PPC::STFD: |
87 | case PPC::STFS: |
88 | case PPC::DFLOADf32: |
89 | case PPC::DFLOADf64: |
90 | case PPC::DFSTOREf32: |
91 | case PPC::DFSTOREf64: |
92 | return true; |
93 | } |
94 | } |
95 | |
96 | class PPCPreEmitPeephole : public MachineFunctionPass { |
97 | public: |
98 | static char ID; |
99 | PPCPreEmitPeephole() : MachineFunctionPass(ID) { |
100 | initializePPCPreEmitPeepholePass(*PassRegistry::getPassRegistry()); |
101 | } |
102 | |
103 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
104 | MachineFunctionPass::getAnalysisUsage(AU); |
105 | } |
106 | |
107 | MachineFunctionProperties getRequiredProperties() const override { |
108 | return MachineFunctionProperties().set( |
109 | MachineFunctionProperties::Property::NoVRegs); |
110 | } |
111 | |
112 | // This function removes any redundant load immediates. It has two level |
113 | // loops - The outer loop finds the load immediates BBI that could be used |
114 | // to replace following redundancy. The inner loop scans instructions that |
115 | // after BBI to find redundancy and update kill/dead flags accordingly. If |
116 | // AfterBBI is the same as BBI, it is redundant, otherwise any instructions |
117 | // that modify the def register of BBI would break the scanning. |
118 | // DeadOrKillToUnset is a pointer to the previous operand that had the |
119 | // kill/dead flag set. It keeps track of the def register of BBI, the use |
120 | // registers of AfterBBIs and the def registers of AfterBBIs. |
121 | bool removeRedundantLIs(MachineBasicBlock &MBB, |
122 | const TargetRegisterInfo *TRI) { |
123 | LLVM_DEBUG(dbgs() << "Remove redundant load immediates from MBB:\n" ; |
124 | MBB.dump(); dbgs() << "\n" ); |
125 | |
126 | DenseSet<MachineInstr *> InstrsToErase; |
127 | for (auto BBI = MBB.instr_begin(); BBI != MBB.instr_end(); ++BBI) { |
128 | // Skip load immediate that is marked to be erased later because it |
129 | // cannot be used to replace any other instructions. |
130 | if (InstrsToErase.contains(V: &*BBI)) |
131 | continue; |
132 | // Skip non-load immediate. |
133 | unsigned Opc = BBI->getOpcode(); |
134 | if (Opc != PPC::LI && Opc != PPC::LI8 && Opc != PPC::LIS && |
135 | Opc != PPC::LIS8) |
136 | continue; |
137 | // Skip load immediate, where the operand is a relocation (e.g., $r3 = |
138 | // LI target-flags(ppc-lo) %const.0). |
139 | if (!BBI->getOperand(i: 1).isImm()) |
140 | continue; |
141 | assert(BBI->getOperand(0).isReg() && |
142 | "Expected a register for the first operand" ); |
143 | |
144 | LLVM_DEBUG(dbgs() << "Scanning after load immediate: " ; BBI->dump();); |
145 | |
146 | Register Reg = BBI->getOperand(i: 0).getReg(); |
147 | int64_t Imm = BBI->getOperand(i: 1).getImm(); |
148 | MachineOperand *DeadOrKillToUnset = nullptr; |
149 | if (BBI->getOperand(i: 0).isDead()) { |
150 | DeadOrKillToUnset = &BBI->getOperand(i: 0); |
151 | LLVM_DEBUG(dbgs() << " Kill flag of " << *DeadOrKillToUnset |
152 | << " from load immediate " << *BBI |
153 | << " is a unsetting candidate\n" ); |
154 | } |
155 | // This loop scans instructions after BBI to see if there is any |
156 | // redundant load immediate. |
157 | for (auto AfterBBI = std::next(x: BBI); AfterBBI != MBB.instr_end(); |
158 | ++AfterBBI) { |
159 | // Track the operand that kill Reg. We would unset the kill flag of |
160 | // the operand if there is a following redundant load immediate. |
161 | int KillIdx = AfterBBI->findRegisterUseOperandIdx(Reg, TRI, isKill: true); |
162 | |
163 | // We can't just clear implicit kills, so if we encounter one, stop |
164 | // looking further. |
165 | if (KillIdx != -1 && AfterBBI->getOperand(i: KillIdx).isImplicit()) { |
166 | LLVM_DEBUG(dbgs() |
167 | << "Encountered an implicit kill, cannot proceed: " ); |
168 | LLVM_DEBUG(AfterBBI->dump()); |
169 | break; |
170 | } |
171 | |
172 | if (KillIdx != -1) { |
173 | assert(!DeadOrKillToUnset && "Shouldn't kill same register twice" ); |
174 | DeadOrKillToUnset = &AfterBBI->getOperand(i: KillIdx); |
175 | LLVM_DEBUG(dbgs() |
176 | << " Kill flag of " << *DeadOrKillToUnset << " from " |
177 | << *AfterBBI << " is a unsetting candidate\n" ); |
178 | } |
179 | |
180 | if (!AfterBBI->modifiesRegister(Reg, TRI)) |
181 | continue; |
182 | // Finish scanning because Reg is overwritten by a non-load |
183 | // instruction. |
184 | if (AfterBBI->getOpcode() != Opc) |
185 | break; |
186 | assert(AfterBBI->getOperand(0).isReg() && |
187 | "Expected a register for the first operand" ); |
188 | // Finish scanning because Reg is overwritten by a relocation or a |
189 | // different value. |
190 | if (!AfterBBI->getOperand(i: 1).isImm() || |
191 | AfterBBI->getOperand(i: 1).getImm() != Imm) |
192 | break; |
193 | |
194 | // It loads same immediate value to the same Reg, which is redundant. |
195 | // We would unset kill flag in previous Reg usage to extend live range |
196 | // of Reg first, then remove the redundancy. |
197 | if (DeadOrKillToUnset) { |
198 | LLVM_DEBUG(dbgs() |
199 | << " Unset dead/kill flag of " << *DeadOrKillToUnset |
200 | << " from " << *DeadOrKillToUnset->getParent()); |
201 | if (DeadOrKillToUnset->isDef()) |
202 | DeadOrKillToUnset->setIsDead(false); |
203 | else |
204 | DeadOrKillToUnset->setIsKill(false); |
205 | } |
206 | DeadOrKillToUnset = |
207 | AfterBBI->findRegisterDefOperand(Reg, TRI, isDead: true, Overlap: true); |
208 | if (DeadOrKillToUnset) |
209 | LLVM_DEBUG(dbgs() |
210 | << " Dead flag of " << *DeadOrKillToUnset << " from " |
211 | << *AfterBBI << " is a unsetting candidate\n" ); |
212 | InstrsToErase.insert(V: &*AfterBBI); |
213 | LLVM_DEBUG(dbgs() << " Remove redundant load immediate: " ; |
214 | AfterBBI->dump()); |
215 | } |
216 | } |
217 | |
218 | for (MachineInstr *MI : InstrsToErase) { |
219 | MI->eraseFromParent(); |
220 | } |
221 | NumRemovedInPreEmit += InstrsToErase.size(); |
222 | return !InstrsToErase.empty(); |
223 | } |
224 | |
225 | // Check if this instruction is a PLDpc that is part of a GOT indirect |
226 | // access. |
227 | bool isGOTPLDpc(MachineInstr &Instr) { |
228 | if (Instr.getOpcode() != PPC::PLDpc) |
229 | return false; |
230 | |
231 | // The result must be a register. |
232 | const MachineOperand &LoadedAddressReg = Instr.getOperand(i: 0); |
233 | if (!LoadedAddressReg.isReg()) |
234 | return false; |
235 | |
236 | // Make sure that this is a global symbol. |
237 | const MachineOperand &SymbolOp = Instr.getOperand(i: 1); |
238 | if (!SymbolOp.isGlobal()) |
239 | return false; |
240 | |
241 | // Finally return true only if the GOT flag is present. |
242 | return PPCInstrInfo::hasGOTFlag(TF: SymbolOp.getTargetFlags()); |
243 | } |
244 | |
245 | bool addLinkerOpt(MachineBasicBlock &MBB, const TargetRegisterInfo *TRI) { |
246 | MachineFunction *MF = MBB.getParent(); |
247 | // If the linker opt is disabled then just return. |
248 | if (!EnablePCRelLinkerOpt) |
249 | return false; |
250 | |
251 | // Add this linker opt only if we are using PC Relative memops. |
252 | if (!MF->getSubtarget<PPCSubtarget>().isUsingPCRelativeCalls()) |
253 | return false; |
254 | |
255 | // Struct to keep track of one def/use pair for a GOT indirect access. |
256 | struct GOTDefUsePair { |
257 | MachineBasicBlock::iterator DefInst; |
258 | MachineBasicBlock::iterator UseInst; |
259 | Register DefReg; |
260 | Register UseReg; |
261 | bool StillValid; |
262 | }; |
263 | // Vector of def/ues pairs in this basic block. |
264 | SmallVector<GOTDefUsePair, 4> CandPairs; |
265 | SmallVector<GOTDefUsePair, 4> ValidPairs; |
266 | bool MadeChange = false; |
267 | |
268 | // Run through all of the instructions in the basic block and try to |
269 | // collect potential pairs of GOT indirect access instructions. |
270 | for (auto BBI = MBB.instr_begin(); BBI != MBB.instr_end(); ++BBI) { |
271 | // Look for the initial GOT indirect load. |
272 | if (isGOTPLDpc(Instr&: *BBI)) { |
273 | GOTDefUsePair CurrentPair{.DefInst: BBI, .UseInst: MachineBasicBlock::iterator(), |
274 | .DefReg: BBI->getOperand(i: 0).getReg(), |
275 | .UseReg: PPC::NoRegister, .StillValid: true}; |
276 | CandPairs.push_back(Elt: CurrentPair); |
277 | continue; |
278 | } |
279 | |
280 | // We haven't encountered any new PLD instructions, nothing to check. |
281 | if (CandPairs.empty()) |
282 | continue; |
283 | |
284 | // Run through the candidate pairs and see if any of the registers |
285 | // defined in the PLD instructions are used by this instruction. |
286 | // Note: the size of CandPairs can change in the loop. |
287 | for (unsigned Idx = 0; Idx < CandPairs.size(); Idx++) { |
288 | GOTDefUsePair &Pair = CandPairs[Idx]; |
289 | // The instruction does not use or modify this PLD's def reg, |
290 | // ignore it. |
291 | if (!BBI->readsRegister(Reg: Pair.DefReg, TRI) && |
292 | !BBI->modifiesRegister(Reg: Pair.DefReg, TRI)) |
293 | continue; |
294 | |
295 | // The use needs to be used in the address computation and not |
296 | // as the register being stored for a store. |
297 | const MachineOperand *UseOp = |
298 | hasPCRelativeForm(Use&: *BBI) ? &BBI->getOperand(i: 2) : nullptr; |
299 | |
300 | // Check for a valid use. |
301 | if (UseOp && UseOp->isReg() && UseOp->getReg() == Pair.DefReg && |
302 | UseOp->isUse() && UseOp->isKill()) { |
303 | Pair.UseInst = BBI; |
304 | Pair.UseReg = BBI->getOperand(i: 0).getReg(); |
305 | ValidPairs.push_back(Elt: Pair); |
306 | } |
307 | CandPairs.erase(CI: CandPairs.begin() + Idx); |
308 | } |
309 | } |
310 | |
311 | // Go through all of the pairs and check for any more valid uses. |
312 | for (auto Pair = ValidPairs.begin(); Pair != ValidPairs.end(); Pair++) { |
313 | // We shouldn't be here if we don't have a valid pair. |
314 | assert(Pair->UseInst.isValid() && Pair->StillValid && |
315 | "Kept an invalid def/use pair for GOT PCRel opt" ); |
316 | // We have found a potential pair. Search through the instructions |
317 | // between the def and the use to see if it is valid to mark this as a |
318 | // linker opt. |
319 | MachineBasicBlock::iterator BBI = Pair->DefInst; |
320 | ++BBI; |
321 | for (; BBI != Pair->UseInst; ++BBI) { |
322 | if (BBI->readsRegister(Reg: Pair->UseReg, TRI) || |
323 | BBI->modifiesRegister(Reg: Pair->UseReg, TRI)) { |
324 | Pair->StillValid = false; |
325 | break; |
326 | } |
327 | } |
328 | |
329 | if (!Pair->StillValid) |
330 | continue; |
331 | |
332 | // The load/store instruction that uses the address from the PLD will |
333 | // either use a register (for a store) or define a register (for the |
334 | // load). That register will be added as an implicit def to the PLD |
335 | // and as an implicit use on the second memory op. This is a precaution |
336 | // to prevent future passes from using that register between the two |
337 | // instructions. |
338 | MachineOperand ImplDef = |
339 | MachineOperand::CreateReg(Reg: Pair->UseReg, isDef: true, isImp: true); |
340 | MachineOperand ImplUse = |
341 | MachineOperand::CreateReg(Reg: Pair->UseReg, isDef: false, isImp: true); |
342 | Pair->DefInst->addOperand(Op: ImplDef); |
343 | Pair->UseInst->addOperand(Op: ImplUse); |
344 | |
345 | // Create the symbol. |
346 | MCContext &Context = MF->getContext(); |
347 | MCSymbol *Symbol = Context.createNamedTempSymbol(Name: "pcrel" ); |
348 | MachineOperand PCRelLabel = |
349 | MachineOperand::CreateMCSymbol(Sym: Symbol, TargetFlags: PPCII::MO_PCREL_OPT_FLAG); |
350 | Pair->DefInst->addOperand(MF&: *MF, Op: PCRelLabel); |
351 | Pair->UseInst->addOperand(MF&: *MF, Op: PCRelLabel); |
352 | MadeChange |= true; |
353 | } |
354 | return MadeChange; |
355 | } |
356 | |
357 | // This function removes redundant pairs of accumulator prime/unprime |
358 | // instructions. In some situations, it's possible the compiler inserts an |
359 | // accumulator prime instruction followed by an unprime instruction (e.g. |
360 | // when we store an accumulator after restoring it from a spill). If the |
361 | // accumulator is not used between the two, they can be removed. This |
362 | // function removes these redundant pairs from basic blocks. |
363 | // The algorithm is quite straightforward - every time we encounter a prime |
364 | // instruction, the primed register is added to a candidate set. Any use |
365 | // other than a prime removes the candidate from the set and any de-prime |
366 | // of a current candidate marks both the prime and de-prime for removal. |
367 | // This way we ensure we only remove prime/de-prime *pairs* with no |
368 | // intervening uses. |
369 | bool removeAccPrimeUnprime(MachineBasicBlock &MBB) { |
370 | DenseSet<MachineInstr *> InstrsToErase; |
371 | // Initially, none of the acc registers are candidates. |
372 | SmallVector<MachineInstr *, 8> Candidates( |
373 | PPC::UACCRCRegClass.getNumRegs(), nullptr); |
374 | |
375 | for (MachineInstr &BBI : MBB.instrs()) { |
376 | unsigned Opc = BBI.getOpcode(); |
377 | // If we are visiting a xxmtacc instruction, we add it and its operand |
378 | // register to the candidate set. |
379 | if (Opc == PPC::XXMTACC) { |
380 | Register Acc = BBI.getOperand(i: 0).getReg(); |
381 | assert(PPC::ACCRCRegClass.contains(Acc) && |
382 | "Unexpected register for XXMTACC" ); |
383 | Candidates[Acc - PPC::ACC0] = &BBI; |
384 | } |
385 | // If we are visiting a xxmfacc instruction and its operand register is |
386 | // in the candidate set, we mark the two instructions for removal. |
387 | else if (Opc == PPC::XXMFACC) { |
388 | Register Acc = BBI.getOperand(i: 0).getReg(); |
389 | assert(PPC::ACCRCRegClass.contains(Acc) && |
390 | "Unexpected register for XXMFACC" ); |
391 | if (!Candidates[Acc - PPC::ACC0]) |
392 | continue; |
393 | InstrsToErase.insert(V: &BBI); |
394 | InstrsToErase.insert(V: Candidates[Acc - PPC::ACC0]); |
395 | } |
396 | // If we are visiting an instruction using an accumulator register |
397 | // as operand, we remove it from the candidate set. |
398 | else { |
399 | for (MachineOperand &Operand : BBI.operands()) { |
400 | if (!Operand.isReg()) |
401 | continue; |
402 | Register Reg = Operand.getReg(); |
403 | if (PPC::ACCRCRegClass.contains(Reg)) |
404 | Candidates[Reg - PPC::ACC0] = nullptr; |
405 | } |
406 | } |
407 | } |
408 | |
409 | for (MachineInstr *MI : InstrsToErase) |
410 | MI->eraseFromParent(); |
411 | NumRemovedInPreEmit += InstrsToErase.size(); |
412 | return !InstrsToErase.empty(); |
413 | } |
414 | |
415 | bool runOnMachineFunction(MachineFunction &MF) override { |
416 | // If the user wants to set the DSCR using command-line options, |
417 | // load in the specified value at the start of main. |
418 | if (DSCRValue.getNumOccurrences() > 0 && MF.getName() == "main" && |
419 | MF.getFunction().hasExternalLinkage()) { |
420 | DSCRValue = (uint32_t)(DSCRValue & 0x01FFFFFF); // 25-bit DSCR mask |
421 | RegScavenger RS; |
422 | MachineBasicBlock &MBB = MF.front(); |
423 | // Find an unused GPR according to register liveness |
424 | RS.enterBasicBlock(MBB); |
425 | unsigned InDSCR = RS.FindUnusedReg(RC: &PPC::GPRCRegClass); |
426 | if (InDSCR) { |
427 | const PPCInstrInfo *TII = |
428 | MF.getSubtarget<PPCSubtarget>().getInstrInfo(); |
429 | DebugLoc dl; |
430 | MachineBasicBlock::iterator IP = MBB.begin(); // Insert Point |
431 | // Copy the 32-bit DSCRValue integer into the GPR InDSCR using LIS and |
432 | // ORI, then move to DSCR. If the requested DSCR value is contained |
433 | // in a 16-bit signed number, we can emit a single `LI`, but the |
434 | // impact of saving one instruction in one function does not warrant |
435 | // any additional complexity in the logic here. |
436 | BuildMI(BB&: MBB, I: IP, MIMD: dl, MCID: TII->get(Opcode: PPC::LIS), DestReg: InDSCR) |
437 | .addImm(Val: DSCRValue >> 16); |
438 | BuildMI(BB&: MBB, I: IP, MIMD: dl, MCID: TII->get(Opcode: PPC::ORI), DestReg: InDSCR) |
439 | .addReg(RegNo: InDSCR) |
440 | .addImm(Val: DSCRValue & 0xFFFF); |
441 | BuildMI(BB&: MBB, I: IP, MIMD: dl, MCID: TII->get(Opcode: PPC::MTUDSCR)) |
442 | .addReg(RegNo: InDSCR, flags: RegState::Kill); |
443 | } else |
444 | errs() << "Warning: Ran out of registers - Unable to set DSCR as " |
445 | "requested" ; |
446 | } |
447 | |
448 | if (skipFunction(F: MF.getFunction()) || !RunPreEmitPeephole) { |
449 | // Remove UNENCODED_NOP even when this pass is disabled. |
450 | // This needs to be done unconditionally so we don't emit zeros |
451 | // in the instruction stream. |
452 | SmallVector<MachineInstr *, 4> InstrsToErase; |
453 | for (MachineBasicBlock &MBB : MF) |
454 | for (MachineInstr &MI : MBB) |
455 | if (MI.getOpcode() == PPC::UNENCODED_NOP) |
456 | InstrsToErase.push_back(Elt: &MI); |
457 | for (MachineInstr *MI : InstrsToErase) |
458 | MI->eraseFromParent(); |
459 | return false; |
460 | } |
461 | bool Changed = false; |
462 | const PPCInstrInfo *TII = MF.getSubtarget<PPCSubtarget>().getInstrInfo(); |
463 | const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); |
464 | SmallVector<MachineInstr *, 4> InstrsToErase; |
465 | for (MachineBasicBlock &MBB : MF) { |
466 | Changed |= removeRedundantLIs(MBB, TRI); |
467 | Changed |= addLinkerOpt(MBB, TRI); |
468 | Changed |= removeAccPrimeUnprime(MBB); |
469 | for (MachineInstr &MI : MBB) { |
470 | unsigned Opc = MI.getOpcode(); |
471 | if (Opc == PPC::UNENCODED_NOP) { |
472 | InstrsToErase.push_back(Elt: &MI); |
473 | continue; |
474 | } |
475 | // Detect self copies - these can result from running AADB. |
476 | if (PPCInstrInfo::isSameClassPhysRegCopy(Opcode: Opc)) { |
477 | const MCInstrDesc &MCID = TII->get(Opcode: Opc); |
478 | if (MCID.getNumOperands() == 3 && |
479 | MI.getOperand(i: 0).getReg() == MI.getOperand(i: 1).getReg() && |
480 | MI.getOperand(i: 0).getReg() == MI.getOperand(i: 2).getReg()) { |
481 | NumberOfSelfCopies++; |
482 | LLVM_DEBUG(dbgs() << "Deleting self-copy instruction: " ); |
483 | LLVM_DEBUG(MI.dump()); |
484 | InstrsToErase.push_back(Elt: &MI); |
485 | continue; |
486 | } |
487 | else if (MCID.getNumOperands() == 2 && |
488 | MI.getOperand(i: 0).getReg() == MI.getOperand(i: 1).getReg()) { |
489 | NumberOfSelfCopies++; |
490 | LLVM_DEBUG(dbgs() << "Deleting self-copy instruction: " ); |
491 | LLVM_DEBUG(MI.dump()); |
492 | InstrsToErase.push_back(Elt: &MI); |
493 | continue; |
494 | } |
495 | } |
496 | MachineInstr *DefMIToErase = nullptr; |
497 | SmallSet<Register, 4> UpdatedRegs; |
498 | if (TII->convertToImmediateForm(MI, RegsToUpdate&: UpdatedRegs, KilledDef: &DefMIToErase)) { |
499 | Changed = true; |
500 | NumRRConvertedInPreEmit++; |
501 | LLVM_DEBUG(dbgs() << "Converted instruction to imm form: " ); |
502 | LLVM_DEBUG(MI.dump()); |
503 | if (DefMIToErase) { |
504 | InstrsToErase.push_back(Elt: DefMIToErase); |
505 | } |
506 | } |
507 | if (TII->foldFrameOffset(MI)) { |
508 | Changed = true; |
509 | NumFrameOffFoldInPreEmit++; |
510 | LLVM_DEBUG(dbgs() << "Frame offset folding by using index form: " ); |
511 | LLVM_DEBUG(MI.dump()); |
512 | } |
513 | if (TII->optimizeCmpPostRA(MI)) { |
514 | Changed = true; |
515 | NumCmpsInPreEmit++; |
516 | LLVM_DEBUG(dbgs() << "Optimize compare by using record form: " ); |
517 | LLVM_DEBUG(MI.dump()); |
518 | InstrsToErase.push_back(Elt: &MI); |
519 | } |
520 | } |
521 | |
522 | // Eliminate conditional branch based on a constant CR bit by |
523 | // CRSET or CRUNSET. We eliminate the conditional branch or |
524 | // convert it into an unconditional branch. Also, if the CR bit |
525 | // is not used by other instructions, we eliminate CRSET as well. |
526 | auto I = MBB.getFirstInstrTerminator(); |
527 | if (I == MBB.instr_end()) |
528 | continue; |
529 | MachineInstr *Br = &*I; |
530 | if (Br->getOpcode() != PPC::BC && Br->getOpcode() != PPC::BCn) |
531 | continue; |
532 | MachineInstr *CRSetMI = nullptr; |
533 | Register CRBit = Br->getOperand(i: 0).getReg(); |
534 | unsigned CRReg = getCRFromCRBit(SrcReg: CRBit); |
535 | bool SeenUse = false; |
536 | MachineBasicBlock::reverse_iterator It = Br, Er = MBB.rend(); |
537 | for (It++; It != Er; It++) { |
538 | if (It->modifiesRegister(Reg: CRBit, TRI)) { |
539 | if ((It->getOpcode() == PPC::CRUNSET || |
540 | It->getOpcode() == PPC::CRSET) && |
541 | It->getOperand(i: 0).getReg() == CRBit) |
542 | CRSetMI = &*It; |
543 | break; |
544 | } |
545 | if (It->readsRegister(Reg: CRBit, TRI)) |
546 | SeenUse = true; |
547 | } |
548 | if (!CRSetMI) continue; |
549 | |
550 | unsigned CRSetOp = CRSetMI->getOpcode(); |
551 | if ((Br->getOpcode() == PPC::BCn && CRSetOp == PPC::CRSET) || |
552 | (Br->getOpcode() == PPC::BC && CRSetOp == PPC::CRUNSET)) { |
553 | // Remove this branch since it cannot be taken. |
554 | InstrsToErase.push_back(Elt: Br); |
555 | MBB.removeSuccessor(Succ: Br->getOperand(i: 1).getMBB()); |
556 | } |
557 | else { |
558 | // This conditional branch is always taken. So, remove all branches |
559 | // and insert an unconditional branch to the destination of this. |
560 | MachineBasicBlock::iterator It = Br, Er = MBB.end(); |
561 | for (; It != Er; It++) { |
562 | if (It->isDebugInstr()) continue; |
563 | assert(It->isTerminator() && "Non-terminator after a terminator" ); |
564 | InstrsToErase.push_back(Elt: &*It); |
565 | } |
566 | if (!MBB.isLayoutSuccessor(MBB: Br->getOperand(i: 1).getMBB())) { |
567 | ArrayRef<MachineOperand> NoCond; |
568 | TII->insertBranch(MBB, TBB: Br->getOperand(i: 1).getMBB(), FBB: nullptr, |
569 | Cond: NoCond, DL: Br->getDebugLoc()); |
570 | } |
571 | for (auto &Succ : MBB.successors()) |
572 | if (Succ != Br->getOperand(i: 1).getMBB()) { |
573 | MBB.removeSuccessor(Succ); |
574 | break; |
575 | } |
576 | } |
577 | |
578 | // If the CRBit is not used by another instruction, we can eliminate |
579 | // CRSET/CRUNSET instruction. |
580 | if (!SeenUse) { |
581 | // We need to check use of the CRBit in successors. |
582 | for (auto &SuccMBB : MBB.successors()) |
583 | if (SuccMBB->isLiveIn(Reg: CRBit) || SuccMBB->isLiveIn(Reg: CRReg)) { |
584 | SeenUse = true; |
585 | break; |
586 | } |
587 | if (!SeenUse) |
588 | InstrsToErase.push_back(Elt: CRSetMI); |
589 | } |
590 | } |
591 | for (MachineInstr *MI : InstrsToErase) { |
592 | LLVM_DEBUG(dbgs() << "PPC pre-emit peephole: erasing instruction: " ); |
593 | LLVM_DEBUG(MI->dump()); |
594 | MI->eraseFromParent(); |
595 | NumRemovedInPreEmit++; |
596 | } |
597 | return Changed; |
598 | } |
599 | }; |
600 | } |
601 | |
602 | INITIALIZE_PASS(PPCPreEmitPeephole, DEBUG_TYPE, "PowerPC Pre-Emit Peephole" , |
603 | false, false) |
604 | char PPCPreEmitPeephole::ID = 0; |
605 | |
606 | FunctionPass *llvm::createPPCPreEmitPeepholePass() { |
607 | return new PPCPreEmitPeephole(); |
608 | } |
609 | |