1 | //===--------- PPCPreEmitPeephole.cpp - Late peephole optimizations -------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // A pre-emit peephole for catching opportunities introduced by late passes such |
10 | // as MachineBlockPlacement. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "PPC.h" |
15 | #include "PPCInstrInfo.h" |
16 | #include "PPCSubtarget.h" |
17 | #include "llvm/ADT/Statistic.h" |
18 | #include "llvm/CodeGen/LivePhysRegs.h" |
19 | #include "llvm/CodeGen/MachineBasicBlock.h" |
20 | #include "llvm/CodeGen/MachineFunctionPass.h" |
21 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
22 | #include "llvm/CodeGen/RegisterScavenging.h" |
23 | #include "llvm/MC/MCContext.h" |
24 | #include "llvm/Support/CommandLine.h" |
25 | #include "llvm/Support/Debug.h" |
26 | |
27 | using namespace llvm; |
28 | |
29 | #define DEBUG_TYPE "ppc-pre-emit-peephole" |
30 | |
31 | STATISTIC(NumRRConvertedInPreEmit, |
32 | "Number of r+r instructions converted to r+i in pre-emit peephole" ); |
33 | STATISTIC(NumRemovedInPreEmit, |
34 | "Number of instructions deleted in pre-emit peephole" ); |
35 | STATISTIC(NumberOfSelfCopies, |
36 | "Number of self copy instructions eliminated" ); |
37 | STATISTIC(NumFrameOffFoldInPreEmit, |
38 | "Number of folding frame offset by using r+r in pre-emit peephole" ); |
39 | STATISTIC(NumCmpsInPreEmit, |
40 | "Number of compares eliminated in pre-emit peephole" ); |
41 | |
42 | static cl::opt<bool> |
43 | EnablePCRelLinkerOpt("ppc-pcrel-linker-opt" , cl::Hidden, cl::init(Val: true), |
44 | cl::desc("enable PC Relative linker optimization" )); |
45 | |
46 | static cl::opt<bool> |
47 | RunPreEmitPeephole("ppc-late-peephole" , cl::Hidden, cl::init(Val: true), |
48 | cl::desc("Run pre-emit peephole optimizations." )); |
49 | |
50 | static cl::opt<uint64_t> |
51 | DSCRValue("ppc-set-dscr" , cl::Hidden, |
52 | cl::desc("Set the Data Stream Control Register." )); |
53 | |
54 | namespace { |
55 | |
56 | static bool hasPCRelativeForm(MachineInstr &Use) { |
57 | switch (Use.getOpcode()) { |
58 | default: |
59 | return false; |
60 | case PPC::LBZ: |
61 | case PPC::LBZ8: |
62 | case PPC::LHA: |
63 | case PPC::LHA8: |
64 | case PPC::LHZ: |
65 | case PPC::LHZ8: |
66 | case PPC::LWZ: |
67 | case PPC::LWZ8: |
68 | case PPC::STB: |
69 | case PPC::STB8: |
70 | case PPC::STH: |
71 | case PPC::STH8: |
72 | case PPC::STW: |
73 | case PPC::STW8: |
74 | case PPC::LD: |
75 | case PPC::STD: |
76 | case PPC::LWA: |
77 | case PPC::LXSD: |
78 | case PPC::LXSSP: |
79 | case PPC::LXV: |
80 | case PPC::STXSD: |
81 | case PPC::STXSSP: |
82 | case PPC::STXV: |
83 | case PPC::LFD: |
84 | case PPC::LFS: |
85 | case PPC::STFD: |
86 | case PPC::STFS: |
87 | case PPC::DFLOADf32: |
88 | case PPC::DFLOADf64: |
89 | case PPC::DFSTOREf32: |
90 | case PPC::DFSTOREf64: |
91 | return true; |
92 | } |
93 | } |
94 | |
95 | class PPCPreEmitPeephole : public MachineFunctionPass { |
96 | public: |
97 | static char ID; |
98 | PPCPreEmitPeephole() : MachineFunctionPass(ID) {} |
99 | |
100 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
101 | MachineFunctionPass::getAnalysisUsage(AU); |
102 | } |
103 | |
104 | MachineFunctionProperties getRequiredProperties() const override { |
105 | return MachineFunctionProperties().setNoVRegs(); |
106 | } |
107 | |
108 | // This function removes any redundant load immediates. It has two level |
109 | // loops - The outer loop finds the load immediates BBI that could be used |
110 | // to replace following redundancy. The inner loop scans instructions that |
111 | // after BBI to find redundancy and update kill/dead flags accordingly. If |
112 | // AfterBBI is the same as BBI, it is redundant, otherwise any instructions |
113 | // that modify the def register of BBI would break the scanning. |
114 | // DeadOrKillToUnset is a pointer to the previous operand that had the |
115 | // kill/dead flag set. It keeps track of the def register of BBI, the use |
116 | // registers of AfterBBIs and the def registers of AfterBBIs. |
117 | bool removeRedundantLIs(MachineBasicBlock &MBB, |
118 | const TargetRegisterInfo *TRI) { |
119 | LLVM_DEBUG(dbgs() << "Remove redundant load immediates from MBB:\n" ; |
120 | MBB.dump(); dbgs() << "\n" ); |
121 | |
122 | DenseSet<MachineInstr *> InstrsToErase; |
123 | for (auto BBI = MBB.instr_begin(); BBI != MBB.instr_end(); ++BBI) { |
124 | // Skip load immediate that is marked to be erased later because it |
125 | // cannot be used to replace any other instructions. |
126 | if (InstrsToErase.contains(V: &*BBI)) |
127 | continue; |
128 | // Skip non-load immediate. |
129 | unsigned Opc = BBI->getOpcode(); |
130 | if (Opc != PPC::LI && Opc != PPC::LI8 && Opc != PPC::LIS && |
131 | Opc != PPC::LIS8) |
132 | continue; |
133 | // Skip load immediate, where the operand is a relocation (e.g., $r3 = |
134 | // LI target-flags(ppc-lo) %const.0). |
135 | if (!BBI->getOperand(i: 1).isImm()) |
136 | continue; |
137 | assert(BBI->getOperand(0).isReg() && |
138 | "Expected a register for the first operand" ); |
139 | |
140 | LLVM_DEBUG(dbgs() << "Scanning after load immediate: " ; BBI->dump();); |
141 | |
142 | Register Reg = BBI->getOperand(i: 0).getReg(); |
143 | int64_t Imm = BBI->getOperand(i: 1).getImm(); |
144 | MachineOperand *DeadOrKillToUnset = nullptr; |
145 | if (BBI->getOperand(i: 0).isDead()) { |
146 | DeadOrKillToUnset = &BBI->getOperand(i: 0); |
147 | LLVM_DEBUG(dbgs() << " Kill flag of " << *DeadOrKillToUnset |
148 | << " from load immediate " << *BBI |
149 | << " is a unsetting candidate\n" ); |
150 | } |
151 | // This loop scans instructions after BBI to see if there is any |
152 | // redundant load immediate. |
153 | for (auto AfterBBI = std::next(x: BBI); AfterBBI != MBB.instr_end(); |
154 | ++AfterBBI) { |
155 | // Track the operand that kill Reg. We would unset the kill flag of |
156 | // the operand if there is a following redundant load immediate. |
157 | int KillIdx = AfterBBI->findRegisterUseOperandIdx(Reg, TRI, isKill: true); |
158 | |
159 | // We can't just clear implicit kills, so if we encounter one, stop |
160 | // looking further. |
161 | if (KillIdx != -1 && AfterBBI->getOperand(i: KillIdx).isImplicit()) { |
162 | LLVM_DEBUG(dbgs() |
163 | << "Encountered an implicit kill, cannot proceed: " ); |
164 | LLVM_DEBUG(AfterBBI->dump()); |
165 | break; |
166 | } |
167 | |
168 | if (KillIdx != -1) { |
169 | assert(!DeadOrKillToUnset && "Shouldn't kill same register twice" ); |
170 | DeadOrKillToUnset = &AfterBBI->getOperand(i: KillIdx); |
171 | LLVM_DEBUG(dbgs() |
172 | << " Kill flag of " << *DeadOrKillToUnset << " from " |
173 | << *AfterBBI << " is a unsetting candidate\n" ); |
174 | } |
175 | |
176 | if (!AfterBBI->modifiesRegister(Reg, TRI)) |
177 | continue; |
178 | // Finish scanning because Reg is overwritten by a non-load |
179 | // instruction. |
180 | if (AfterBBI->getOpcode() != Opc) |
181 | break; |
182 | assert(AfterBBI->getOperand(0).isReg() && |
183 | "Expected a register for the first operand" ); |
184 | // Finish scanning because Reg is overwritten by a relocation or a |
185 | // different value. |
186 | if (!AfterBBI->getOperand(i: 1).isImm() || |
187 | AfterBBI->getOperand(i: 1).getImm() != Imm) |
188 | break; |
189 | |
190 | // It loads same immediate value to the same Reg, which is redundant. |
191 | // We would unset kill flag in previous Reg usage to extend live range |
192 | // of Reg first, then remove the redundancy. |
193 | if (DeadOrKillToUnset) { |
194 | LLVM_DEBUG(dbgs() |
195 | << " Unset dead/kill flag of " << *DeadOrKillToUnset |
196 | << " from " << *DeadOrKillToUnset->getParent()); |
197 | if (DeadOrKillToUnset->isDef()) |
198 | DeadOrKillToUnset->setIsDead(false); |
199 | else |
200 | DeadOrKillToUnset->setIsKill(false); |
201 | } |
202 | DeadOrKillToUnset = |
203 | AfterBBI->findRegisterDefOperand(Reg, TRI, isDead: true, Overlap: true); |
204 | if (DeadOrKillToUnset) |
205 | LLVM_DEBUG(dbgs() |
206 | << " Dead flag of " << *DeadOrKillToUnset << " from " |
207 | << *AfterBBI << " is a unsetting candidate\n" ); |
208 | InstrsToErase.insert(V: &*AfterBBI); |
209 | LLVM_DEBUG(dbgs() << " Remove redundant load immediate: " ; |
210 | AfterBBI->dump()); |
211 | } |
212 | } |
213 | |
214 | for (MachineInstr *MI : InstrsToErase) { |
215 | MI->eraseFromParent(); |
216 | } |
217 | NumRemovedInPreEmit += InstrsToErase.size(); |
218 | return !InstrsToErase.empty(); |
219 | } |
220 | |
221 | // Check if this instruction is a PLDpc that is part of a GOT indirect |
222 | // access. |
223 | bool isGOTPLDpc(MachineInstr &Instr) { |
224 | if (Instr.getOpcode() != PPC::PLDpc) |
225 | return false; |
226 | |
227 | // The result must be a register. |
228 | const MachineOperand &LoadedAddressReg = Instr.getOperand(i: 0); |
229 | if (!LoadedAddressReg.isReg()) |
230 | return false; |
231 | |
232 | // Make sure that this is a global symbol. |
233 | const MachineOperand &SymbolOp = Instr.getOperand(i: 1); |
234 | if (!SymbolOp.isGlobal()) |
235 | return false; |
236 | |
237 | // Finally return true only if the GOT flag is present. |
238 | return PPCInstrInfo::hasGOTFlag(TF: SymbolOp.getTargetFlags()); |
239 | } |
240 | |
241 | bool addLinkerOpt(MachineBasicBlock &MBB, const TargetRegisterInfo *TRI) { |
242 | MachineFunction *MF = MBB.getParent(); |
243 | // If the linker opt is disabled then just return. |
244 | if (!EnablePCRelLinkerOpt) |
245 | return false; |
246 | |
247 | // Add this linker opt only if we are using PC Relative memops. |
248 | if (!MF->getSubtarget<PPCSubtarget>().isUsingPCRelativeCalls()) |
249 | return false; |
250 | |
251 | // Struct to keep track of one def/use pair for a GOT indirect access. |
252 | struct GOTDefUsePair { |
253 | MachineBasicBlock::iterator DefInst; |
254 | MachineBasicBlock::iterator UseInst; |
255 | Register DefReg; |
256 | Register UseReg; |
257 | bool StillValid; |
258 | }; |
259 | // Vector of def/ues pairs in this basic block. |
260 | SmallVector<GOTDefUsePair, 4> CandPairs; |
261 | SmallVector<GOTDefUsePair, 4> ValidPairs; |
262 | bool MadeChange = false; |
263 | |
264 | // Run through all of the instructions in the basic block and try to |
265 | // collect potential pairs of GOT indirect access instructions. |
266 | for (auto BBI = MBB.instr_begin(); BBI != MBB.instr_end(); ++BBI) { |
267 | // Look for the initial GOT indirect load. |
268 | if (isGOTPLDpc(Instr&: *BBI)) { |
269 | GOTDefUsePair CurrentPair{.DefInst: BBI, .UseInst: MachineBasicBlock::iterator(), |
270 | .DefReg: BBI->getOperand(i: 0).getReg(), |
271 | .UseReg: PPC::NoRegister, .StillValid: true}; |
272 | CandPairs.push_back(Elt: CurrentPair); |
273 | continue; |
274 | } |
275 | |
276 | // We haven't encountered any new PLD instructions, nothing to check. |
277 | if (CandPairs.empty()) |
278 | continue; |
279 | |
280 | // Run through the candidate pairs and see if any of the registers |
281 | // defined in the PLD instructions are used by this instruction. |
282 | // Note: the size of CandPairs can change in the loop. |
283 | for (unsigned Idx = 0; Idx < CandPairs.size(); Idx++) { |
284 | GOTDefUsePair &Pair = CandPairs[Idx]; |
285 | // The instruction does not use or modify this PLD's def reg, |
286 | // ignore it. |
287 | if (!BBI->readsRegister(Reg: Pair.DefReg, TRI) && |
288 | !BBI->modifiesRegister(Reg: Pair.DefReg, TRI)) |
289 | continue; |
290 | |
291 | // The use needs to be used in the address computation and not |
292 | // as the register being stored for a store. |
293 | const MachineOperand *UseOp = |
294 | hasPCRelativeForm(Use&: *BBI) ? &BBI->getOperand(i: 2) : nullptr; |
295 | |
296 | // Check for a valid use. |
297 | if (UseOp && UseOp->isReg() && UseOp->getReg() == Pair.DefReg && |
298 | UseOp->isUse() && UseOp->isKill()) { |
299 | Pair.UseInst = BBI; |
300 | Pair.UseReg = BBI->getOperand(i: 0).getReg(); |
301 | ValidPairs.push_back(Elt: Pair); |
302 | } |
303 | CandPairs.erase(CI: CandPairs.begin() + Idx); |
304 | } |
305 | } |
306 | |
307 | // Go through all of the pairs and check for any more valid uses. |
308 | for (auto Pair = ValidPairs.begin(); Pair != ValidPairs.end(); Pair++) { |
309 | // We shouldn't be here if we don't have a valid pair. |
310 | assert(Pair->UseInst.isValid() && Pair->StillValid && |
311 | "Kept an invalid def/use pair for GOT PCRel opt" ); |
312 | // We have found a potential pair. Search through the instructions |
313 | // between the def and the use to see if it is valid to mark this as a |
314 | // linker opt. |
315 | MachineBasicBlock::iterator BBI = Pair->DefInst; |
316 | ++BBI; |
317 | for (; BBI != Pair->UseInst; ++BBI) { |
318 | if (BBI->readsRegister(Reg: Pair->UseReg, TRI) || |
319 | BBI->modifiesRegister(Reg: Pair->UseReg, TRI)) { |
320 | Pair->StillValid = false; |
321 | break; |
322 | } |
323 | } |
324 | |
325 | if (!Pair->StillValid) |
326 | continue; |
327 | |
328 | // The load/store instruction that uses the address from the PLD will |
329 | // either use a register (for a store) or define a register (for the |
330 | // load). That register will be added as an implicit def to the PLD |
331 | // and as an implicit use on the second memory op. This is a precaution |
332 | // to prevent future passes from using that register between the two |
333 | // instructions. |
334 | MachineOperand ImplDef = |
335 | MachineOperand::CreateReg(Reg: Pair->UseReg, isDef: true, isImp: true); |
336 | MachineOperand ImplUse = |
337 | MachineOperand::CreateReg(Reg: Pair->UseReg, isDef: false, isImp: true); |
338 | Pair->DefInst->addOperand(Op: ImplDef); |
339 | Pair->UseInst->addOperand(Op: ImplUse); |
340 | |
341 | // Create the symbol. |
342 | MCContext &Context = MF->getContext(); |
343 | MCSymbol *Symbol = Context.createNamedTempSymbol(Name: "pcrel" ); |
344 | MachineOperand PCRelLabel = |
345 | MachineOperand::CreateMCSymbol(Sym: Symbol, TargetFlags: PPCII::MO_PCREL_OPT_FLAG); |
346 | Pair->DefInst->addOperand(MF&: *MF, Op: PCRelLabel); |
347 | Pair->UseInst->addOperand(MF&: *MF, Op: PCRelLabel); |
348 | MadeChange |= true; |
349 | } |
350 | return MadeChange; |
351 | } |
352 | |
353 | // This function removes redundant pairs of accumulator prime/unprime |
354 | // instructions. In some situations, it's possible the compiler inserts an |
355 | // accumulator prime instruction followed by an unprime instruction (e.g. |
356 | // when we store an accumulator after restoring it from a spill). If the |
357 | // accumulator is not used between the two, they can be removed. This |
358 | // function removes these redundant pairs from basic blocks. |
359 | // The algorithm is quite straightforward - every time we encounter a prime |
360 | // instruction, the primed register is added to a candidate set. Any use |
361 | // other than a prime removes the candidate from the set and any de-prime |
362 | // of a current candidate marks both the prime and de-prime for removal. |
363 | // This way we ensure we only remove prime/de-prime *pairs* with no |
364 | // intervening uses. |
365 | bool removeAccPrimeUnprime(MachineBasicBlock &MBB) { |
366 | DenseSet<MachineInstr *> InstrsToErase; |
367 | // Initially, none of the acc registers are candidates. |
368 | SmallVector<MachineInstr *, 8> Candidates( |
369 | PPC::UACCRCRegClass.getNumRegs(), nullptr); |
370 | |
371 | for (MachineInstr &BBI : MBB.instrs()) { |
372 | unsigned Opc = BBI.getOpcode(); |
373 | // If we are visiting a xxmtacc instruction, we add it and its operand |
374 | // register to the candidate set. |
375 | if (Opc == PPC::XXMTACC) { |
376 | Register Acc = BBI.getOperand(i: 0).getReg(); |
377 | assert(PPC::ACCRCRegClass.contains(Acc) && |
378 | "Unexpected register for XXMTACC" ); |
379 | Candidates[Acc - PPC::ACC0] = &BBI; |
380 | } |
381 | // If we are visiting a xxmfacc instruction and its operand register is |
382 | // in the candidate set, we mark the two instructions for removal. |
383 | else if (Opc == PPC::XXMFACC) { |
384 | Register Acc = BBI.getOperand(i: 0).getReg(); |
385 | assert(PPC::ACCRCRegClass.contains(Acc) && |
386 | "Unexpected register for XXMFACC" ); |
387 | if (!Candidates[Acc - PPC::ACC0]) |
388 | continue; |
389 | InstrsToErase.insert(V: &BBI); |
390 | InstrsToErase.insert(V: Candidates[Acc - PPC::ACC0]); |
391 | } |
392 | // If we are visiting an instruction using an accumulator register |
393 | // as operand, we remove it from the candidate set. |
394 | else { |
395 | for (MachineOperand &Operand : BBI.operands()) { |
396 | if (!Operand.isReg()) |
397 | continue; |
398 | Register Reg = Operand.getReg(); |
399 | if (PPC::ACCRCRegClass.contains(Reg)) |
400 | Candidates[Reg - PPC::ACC0] = nullptr; |
401 | } |
402 | } |
403 | } |
404 | |
405 | for (MachineInstr *MI : InstrsToErase) |
406 | MI->eraseFromParent(); |
407 | NumRemovedInPreEmit += InstrsToErase.size(); |
408 | return !InstrsToErase.empty(); |
409 | } |
410 | |
411 | bool runOnMachineFunction(MachineFunction &MF) override { |
412 | // If the user wants to set the DSCR using command-line options, |
413 | // load in the specified value at the start of main. |
414 | if (DSCRValue.getNumOccurrences() > 0 && MF.getName() == "main" && |
415 | MF.getFunction().hasExternalLinkage()) { |
416 | DSCRValue = (uint32_t)(DSCRValue & 0x01FFFFFF); // 25-bit DSCR mask |
417 | RegScavenger RS; |
418 | MachineBasicBlock &MBB = MF.front(); |
419 | // Find an unused GPR according to register liveness |
420 | RS.enterBasicBlock(MBB); |
421 | unsigned InDSCR = RS.FindUnusedReg(RC: &PPC::GPRCRegClass); |
422 | if (InDSCR) { |
423 | const PPCInstrInfo *TII = |
424 | MF.getSubtarget<PPCSubtarget>().getInstrInfo(); |
425 | DebugLoc dl; |
426 | MachineBasicBlock::iterator IP = MBB.begin(); // Insert Point |
427 | // Copy the 32-bit DSCRValue integer into the GPR InDSCR using LIS and |
428 | // ORI, then move to DSCR. If the requested DSCR value is contained |
429 | // in a 16-bit signed number, we can emit a single `LI`, but the |
430 | // impact of saving one instruction in one function does not warrant |
431 | // any additional complexity in the logic here. |
432 | BuildMI(BB&: MBB, I: IP, MIMD: dl, MCID: TII->get(Opcode: PPC::LIS), DestReg: InDSCR) |
433 | .addImm(Val: DSCRValue >> 16); |
434 | BuildMI(BB&: MBB, I: IP, MIMD: dl, MCID: TII->get(Opcode: PPC::ORI), DestReg: InDSCR) |
435 | .addReg(RegNo: InDSCR) |
436 | .addImm(Val: DSCRValue & 0xFFFF); |
437 | BuildMI(BB&: MBB, I: IP, MIMD: dl, MCID: TII->get(Opcode: PPC::MTUDSCR)) |
438 | .addReg(RegNo: InDSCR, flags: RegState::Kill); |
439 | } else |
440 | errs() << "Warning: Ran out of registers - Unable to set DSCR as " |
441 | "requested" ; |
442 | } |
443 | |
444 | if (skipFunction(F: MF.getFunction()) || !RunPreEmitPeephole) { |
445 | // Remove UNENCODED_NOP even when this pass is disabled. |
446 | // This needs to be done unconditionally so we don't emit zeros |
447 | // in the instruction stream. |
448 | SmallVector<MachineInstr *, 4> InstrsToErase; |
449 | for (MachineBasicBlock &MBB : MF) |
450 | for (MachineInstr &MI : MBB) |
451 | if (MI.getOpcode() == PPC::UNENCODED_NOP) |
452 | InstrsToErase.push_back(Elt: &MI); |
453 | for (MachineInstr *MI : InstrsToErase) |
454 | MI->eraseFromParent(); |
455 | return false; |
456 | } |
457 | bool Changed = false; |
458 | const PPCInstrInfo *TII = MF.getSubtarget<PPCSubtarget>().getInstrInfo(); |
459 | const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); |
460 | SmallVector<MachineInstr *, 4> InstrsToErase; |
461 | for (MachineBasicBlock &MBB : MF) { |
462 | Changed |= removeRedundantLIs(MBB, TRI); |
463 | Changed |= addLinkerOpt(MBB, TRI); |
464 | Changed |= removeAccPrimeUnprime(MBB); |
465 | for (MachineInstr &MI : MBB) { |
466 | unsigned Opc = MI.getOpcode(); |
467 | if (Opc == PPC::UNENCODED_NOP) { |
468 | InstrsToErase.push_back(Elt: &MI); |
469 | continue; |
470 | } |
471 | // Detect self copies - these can result from running AADB. |
472 | if (PPCInstrInfo::isSameClassPhysRegCopy(Opcode: Opc)) { |
473 | const MCInstrDesc &MCID = TII->get(Opcode: Opc); |
474 | if (MCID.getNumOperands() == 3 && |
475 | MI.getOperand(i: 0).getReg() == MI.getOperand(i: 1).getReg() && |
476 | MI.getOperand(i: 0).getReg() == MI.getOperand(i: 2).getReg()) { |
477 | NumberOfSelfCopies++; |
478 | LLVM_DEBUG(dbgs() << "Deleting self-copy instruction: " ); |
479 | LLVM_DEBUG(MI.dump()); |
480 | InstrsToErase.push_back(Elt: &MI); |
481 | continue; |
482 | } |
483 | else if (MCID.getNumOperands() == 2 && |
484 | MI.getOperand(i: 0).getReg() == MI.getOperand(i: 1).getReg()) { |
485 | NumberOfSelfCopies++; |
486 | LLVM_DEBUG(dbgs() << "Deleting self-copy instruction: " ); |
487 | LLVM_DEBUG(MI.dump()); |
488 | InstrsToErase.push_back(Elt: &MI); |
489 | continue; |
490 | } |
491 | } |
492 | MachineInstr *DefMIToErase = nullptr; |
493 | SmallSet<Register, 4> UpdatedRegs; |
494 | if (TII->convertToImmediateForm(MI, RegsToUpdate&: UpdatedRegs, KilledDef: &DefMIToErase)) { |
495 | Changed = true; |
496 | NumRRConvertedInPreEmit++; |
497 | LLVM_DEBUG(dbgs() << "Converted instruction to imm form: " ); |
498 | LLVM_DEBUG(MI.dump()); |
499 | if (DefMIToErase) { |
500 | InstrsToErase.push_back(Elt: DefMIToErase); |
501 | } |
502 | } |
503 | if (TII->foldFrameOffset(MI)) { |
504 | Changed = true; |
505 | NumFrameOffFoldInPreEmit++; |
506 | LLVM_DEBUG(dbgs() << "Frame offset folding by using index form: " ); |
507 | LLVM_DEBUG(MI.dump()); |
508 | } |
509 | if (TII->optimizeCmpPostRA(MI)) { |
510 | Changed = true; |
511 | NumCmpsInPreEmit++; |
512 | LLVM_DEBUG(dbgs() << "Optimize compare by using record form: " ); |
513 | LLVM_DEBUG(MI.dump()); |
514 | InstrsToErase.push_back(Elt: &MI); |
515 | } |
516 | } |
517 | |
518 | // Eliminate conditional branch based on a constant CR bit by |
519 | // CRSET or CRUNSET. We eliminate the conditional branch or |
520 | // convert it into an unconditional branch. Also, if the CR bit |
521 | // is not used by other instructions, we eliminate CRSET as well. |
522 | auto I = MBB.getFirstInstrTerminator(); |
523 | if (I == MBB.instr_end()) |
524 | continue; |
525 | MachineInstr *Br = &*I; |
526 | if (Br->getOpcode() != PPC::BC && Br->getOpcode() != PPC::BCn) |
527 | continue; |
528 | MachineInstr *CRSetMI = nullptr; |
529 | Register CRBit = Br->getOperand(i: 0).getReg(); |
530 | unsigned CRReg = getCRFromCRBit(SrcReg: CRBit); |
531 | bool SeenUse = false; |
532 | MachineBasicBlock::reverse_iterator It = Br, Er = MBB.rend(); |
533 | for (It++; It != Er; It++) { |
534 | if (It->modifiesRegister(Reg: CRBit, TRI)) { |
535 | if ((It->getOpcode() == PPC::CRUNSET || |
536 | It->getOpcode() == PPC::CRSET) && |
537 | It->getOperand(i: 0).getReg() == CRBit) |
538 | CRSetMI = &*It; |
539 | break; |
540 | } |
541 | if (It->readsRegister(Reg: CRBit, TRI)) |
542 | SeenUse = true; |
543 | } |
544 | if (!CRSetMI) continue; |
545 | |
546 | unsigned CRSetOp = CRSetMI->getOpcode(); |
547 | if ((Br->getOpcode() == PPC::BCn && CRSetOp == PPC::CRSET) || |
548 | (Br->getOpcode() == PPC::BC && CRSetOp == PPC::CRUNSET)) { |
549 | // Remove this branch since it cannot be taken. |
550 | InstrsToErase.push_back(Elt: Br); |
551 | MBB.removeSuccessor(Succ: Br->getOperand(i: 1).getMBB()); |
552 | } |
553 | else { |
554 | // This conditional branch is always taken. So, remove all branches |
555 | // and insert an unconditional branch to the destination of this. |
556 | MachineBasicBlock::iterator It = Br, Er = MBB.end(); |
557 | for (; It != Er; It++) { |
558 | if (It->isDebugInstr()) continue; |
559 | assert(It->isTerminator() && "Non-terminator after a terminator" ); |
560 | InstrsToErase.push_back(Elt: &*It); |
561 | } |
562 | if (!MBB.isLayoutSuccessor(MBB: Br->getOperand(i: 1).getMBB())) { |
563 | ArrayRef<MachineOperand> NoCond; |
564 | TII->insertBranch(MBB, TBB: Br->getOperand(i: 1).getMBB(), FBB: nullptr, |
565 | Cond: NoCond, DL: Br->getDebugLoc()); |
566 | } |
567 | for (auto &Succ : MBB.successors()) |
568 | if (Succ != Br->getOperand(i: 1).getMBB()) { |
569 | MBB.removeSuccessor(Succ); |
570 | break; |
571 | } |
572 | } |
573 | |
574 | // If the CRBit is not used by another instruction, we can eliminate |
575 | // CRSET/CRUNSET instruction. |
576 | if (!SeenUse) { |
577 | // We need to check use of the CRBit in successors. |
578 | for (auto &SuccMBB : MBB.successors()) |
579 | if (SuccMBB->isLiveIn(Reg: CRBit) || SuccMBB->isLiveIn(Reg: CRReg)) { |
580 | SeenUse = true; |
581 | break; |
582 | } |
583 | if (!SeenUse) |
584 | InstrsToErase.push_back(Elt: CRSetMI); |
585 | } |
586 | } |
587 | for (MachineInstr *MI : InstrsToErase) { |
588 | LLVM_DEBUG(dbgs() << "PPC pre-emit peephole: erasing instruction: " ); |
589 | LLVM_DEBUG(MI->dump()); |
590 | MI->eraseFromParent(); |
591 | NumRemovedInPreEmit++; |
592 | } |
593 | return Changed; |
594 | } |
595 | }; |
596 | } |
597 | |
598 | INITIALIZE_PASS(PPCPreEmitPeephole, DEBUG_TYPE, "PowerPC Pre-Emit Peephole" , |
599 | false, false) |
600 | char PPCPreEmitPeephole::ID = 0; |
601 | |
602 | FunctionPass *llvm::createPPCPreEmitPeepholePass() { |
603 | return new PPCPreEmitPeephole(); |
604 | } |
605 | |