1//===-------------- PPCMIPeephole.cpp - MI Peephole Cleanups -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===---------------------------------------------------------------------===//
8//
9// This pass performs peephole optimizations to clean up ugly code
10// sequences at the MachineInstruction layer. It runs at the end of
11// the SSA phases, following VSX swap removal. A pass of dead code
12// elimination follows this one for quick clean-up of any dead
13// instructions introduced here. Although we could do this as callbacks
14// from the generic peephole pass, this would have a couple of bad
15// effects: it might remove optimization opportunities for VSX swap
16// removal, and it would miss cleanups made possible following VSX
17// swap removal.
18//
19// NOTE: We run the verifier after this pass in Asserts/Debug builds so it
20// is important to keep the code valid after transformations.
21// Common causes of errors stem from violating the contract specified
22// by kill flags. Whenever a transformation changes the live range of
23// a register, that register should be added to the work list using
24// addRegToUpdate(RegsToUpdate, <Reg>). Furthermore, if a transformation
25// is changing the definition of a register (i.e. removing the single
26// definition of the original vreg), it needs to provide a dummy
27// definition of that register using addDummyDef(<MBB>, <Reg>).
28//===---------------------------------------------------------------------===//
29
30#include "MCTargetDesc/PPCMCTargetDesc.h"
31#include "MCTargetDesc/PPCPredicates.h"
32#include "PPC.h"
33#include "PPCInstrBuilder.h"
34#include "PPCInstrInfo.h"
35#include "PPCMachineFunctionInfo.h"
36#include "PPCTargetMachine.h"
37#include "llvm/ADT/Statistic.h"
38#include "llvm/CodeGen/LiveVariables.h"
39#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
40#include "llvm/CodeGen/MachineDominators.h"
41#include "llvm/CodeGen/MachineFrameInfo.h"
42#include "llvm/CodeGen/MachineFunctionPass.h"
43#include "llvm/CodeGen/MachineInstrBuilder.h"
44#include "llvm/CodeGen/MachinePostDominators.h"
45#include "llvm/CodeGen/MachineRegisterInfo.h"
46#include "llvm/InitializePasses.h"
47#include "llvm/Support/Debug.h"
48#include "llvm/Support/DebugCounter.h"
49
50using namespace llvm;
51
52#define DEBUG_TYPE "ppc-mi-peepholes"
53
54STATISTIC(RemoveTOCSave, "Number of TOC saves removed");
55STATISTIC(MultiTOCSaves,
56 "Number of functions with multiple TOC saves that must be kept");
57STATISTIC(NumTOCSavesInPrologue, "Number of TOC saves placed in the prologue");
58STATISTIC(NumEliminatedSExt, "Number of eliminated sign-extensions");
59STATISTIC(NumEliminatedZExt, "Number of eliminated zero-extensions");
60STATISTIC(NumOptADDLIs, "Number of optimized ADD instruction fed by LI");
61STATISTIC(NumConvertedToImmediateForm,
62 "Number of instructions converted to their immediate form");
63STATISTIC(NumFunctionsEnteredInMIPeephole,
64 "Number of functions entered in PPC MI Peepholes");
65STATISTIC(NumFixedPointIterations,
66 "Number of fixed-point iterations converting reg-reg instructions "
67 "to reg-imm ones");
68STATISTIC(NumRotatesCollapsed,
69 "Number of pairs of rotate left, clear left/right collapsed");
70STATISTIC(NumEXTSWAndSLDICombined,
71 "Number of pairs of EXTSW and SLDI combined as EXTSWSLI");
72STATISTIC(NumLoadImmZeroFoldedAndRemoved,
73 "Number of LI(8) reg, 0 that are folded to r0 and removed");
74
75static cl::opt<bool>
76FixedPointRegToImm("ppc-reg-to-imm-fixed-point", cl::Hidden, cl::init(Val: true),
77 cl::desc("Iterate to a fixed point when attempting to "
78 "convert reg-reg instructions to reg-imm"));
79
80static cl::opt<bool>
81ConvertRegReg("ppc-convert-rr-to-ri", cl::Hidden, cl::init(Val: true),
82 cl::desc("Convert eligible reg+reg instructions to reg+imm"));
83
84static cl::opt<bool>
85 EnableSExtElimination("ppc-eliminate-signext",
86 cl::desc("enable elimination of sign-extensions"),
87 cl::init(Val: true), cl::Hidden);
88
89static cl::opt<bool>
90 EnableZExtElimination("ppc-eliminate-zeroext",
91 cl::desc("enable elimination of zero-extensions"),
92 cl::init(Val: true), cl::Hidden);
93
94static cl::opt<bool>
95 EnableTrapOptimization("ppc-opt-conditional-trap",
96 cl::desc("enable optimization of conditional traps"),
97 cl::init(Val: false), cl::Hidden);
98
99DEBUG_COUNTER(
100 PeepholeXToICounter, "ppc-xtoi-peephole",
101 "Controls whether PPC reg+reg to reg+imm peephole is performed on a MI");
102
103DEBUG_COUNTER(PeepholePerOpCounter, "ppc-per-op-peephole",
104 "Controls whether PPC per opcode peephole is performed on a MI");
105
106namespace {
107
108struct PPCMIPeephole : public MachineFunctionPass {
109
110 static char ID;
111 const PPCInstrInfo *TII;
112 MachineFunction *MF;
113 MachineRegisterInfo *MRI;
114 LiveVariables *LV;
115
116 PPCMIPeephole() : MachineFunctionPass(ID) {
117 initializePPCMIPeepholePass(*PassRegistry::getPassRegistry());
118 }
119
120private:
121 MachineDominatorTree *MDT;
122 MachinePostDominatorTree *MPDT;
123 MachineBlockFrequencyInfo *MBFI;
124 BlockFrequency EntryFreq;
125 SmallSet<Register, 16> RegsToUpdate;
126
127 // Initialize class variables.
128 void initialize(MachineFunction &MFParm);
129
130 // Perform peepholes.
131 bool simplifyCode();
132
133 // Perform peepholes.
134 bool eliminateRedundantCompare();
135 bool eliminateRedundantTOCSaves(std::map<MachineInstr *, bool> &TOCSaves);
136 bool combineSEXTAndSHL(MachineInstr &MI, MachineInstr *&ToErase);
137 bool emitRLDICWhenLoweringJumpTables(MachineInstr &MI,
138 MachineInstr *&ToErase);
139 void UpdateTOCSaves(std::map<MachineInstr *, bool> &TOCSaves,
140 MachineInstr *MI);
141
142 // A number of transformations will eliminate the definition of a register
143 // as all of its uses will be removed. However, this leaves a register
144 // without a definition for LiveVariables. Such transformations should
145 // use this function to provide a dummy definition of the register that
146 // will simply be removed by DCE.
147 void addDummyDef(MachineBasicBlock &MBB, MachineInstr *At, Register Reg) {
148 BuildMI(BB&: MBB, I: At, MIMD: At->getDebugLoc(), MCID: TII->get(Opcode: PPC::IMPLICIT_DEF), DestReg: Reg);
149 }
150 void addRegToUpdateWithLine(Register Reg, int Line);
151 void convertUnprimedAccPHIs(const PPCInstrInfo *TII, MachineRegisterInfo *MRI,
152 SmallVectorImpl<MachineInstr *> &PHIs,
153 Register Dst);
154
155public:
156
157 void getAnalysisUsage(AnalysisUsage &AU) const override {
158 AU.addRequired<LiveVariablesWrapperPass>();
159 AU.addRequired<MachineDominatorTreeWrapperPass>();
160 AU.addRequired<MachinePostDominatorTreeWrapperPass>();
161 AU.addRequired<MachineBlockFrequencyInfoWrapperPass>();
162 AU.addPreserved<LiveVariablesWrapperPass>();
163 AU.addPreserved<MachineDominatorTreeWrapperPass>();
164 AU.addPreserved<MachinePostDominatorTreeWrapperPass>();
165 AU.addPreserved<MachineBlockFrequencyInfoWrapperPass>();
166 MachineFunctionPass::getAnalysisUsage(AU);
167 }
168
169 // Main entry point for this pass.
170 bool runOnMachineFunction(MachineFunction &MF) override {
171 initialize(MFParm&: MF);
172 // At this point, TOC pointer should not be used in a function that uses
173 // PC-Relative addressing.
174 assert((MF.getRegInfo().use_empty(PPC::X2) ||
175 !MF.getSubtarget<PPCSubtarget>().isUsingPCRelativeCalls()) &&
176 "TOC pointer used in a function using PC-Relative addressing!");
177 if (skipFunction(F: MF.getFunction()))
178 return false;
179 bool Changed = simplifyCode();
180#ifndef NDEBUG
181 if (Changed)
182 MF.verify(this, "Error in PowerPC MI Peephole optimization, compile with "
183 "-mllvm -disable-ppc-peephole");
184#endif
185 return Changed;
186 }
187};
188
189#define addRegToUpdate(R) addRegToUpdateWithLine(R, __LINE__)
190void PPCMIPeephole::addRegToUpdateWithLine(Register Reg, int Line) {
191 if (!Register::isVirtualRegister(Reg))
192 return;
193 if (RegsToUpdate.insert(V: Reg).second)
194 LLVM_DEBUG(dbgs() << "Adding register: " << Register::virtReg2Index(Reg)
195 << " on line " << Line
196 << " for re-computation of kill flags\n");
197}
198
199// Initialize class variables.
200void PPCMIPeephole::initialize(MachineFunction &MFParm) {
201 MF = &MFParm;
202 MRI = &MF->getRegInfo();
203 MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
204 MPDT = &getAnalysis<MachinePostDominatorTreeWrapperPass>().getPostDomTree();
205 MBFI = &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI();
206 LV = &getAnalysis<LiveVariablesWrapperPass>().getLV();
207 EntryFreq = MBFI->getEntryFreq();
208 TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo();
209 RegsToUpdate.clear();
210 LLVM_DEBUG(dbgs() << "*** PowerPC MI peephole pass ***\n\n");
211 LLVM_DEBUG(MF->dump());
212}
213
214static MachineInstr *getVRegDefOrNull(MachineOperand *Op,
215 MachineRegisterInfo *MRI) {
216 assert(Op && "Invalid Operand!");
217 if (!Op->isReg())
218 return nullptr;
219
220 Register Reg = Op->getReg();
221 if (!Reg.isVirtual())
222 return nullptr;
223
224 return MRI->getVRegDef(Reg);
225}
226
227// This function returns number of known zero bits in output of MI
228// starting from the most significant bit.
229static unsigned getKnownLeadingZeroCount(const unsigned Reg,
230 const PPCInstrInfo *TII,
231 const MachineRegisterInfo *MRI) {
232 MachineInstr *MI = MRI->getVRegDef(Reg);
233 unsigned Opcode = MI->getOpcode();
234 if (Opcode == PPC::RLDICL || Opcode == PPC::RLDICL_rec ||
235 Opcode == PPC::RLDCL || Opcode == PPC::RLDCL_rec)
236 return MI->getOperand(i: 3).getImm();
237
238 if ((Opcode == PPC::RLDIC || Opcode == PPC::RLDIC_rec) &&
239 MI->getOperand(i: 3).getImm() <= 63 - MI->getOperand(i: 2).getImm())
240 return MI->getOperand(i: 3).getImm();
241
242 if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINM_rec ||
243 Opcode == PPC::RLWNM || Opcode == PPC::RLWNM_rec ||
244 Opcode == PPC::RLWINM8 || Opcode == PPC::RLWNM8) &&
245 MI->getOperand(i: 3).getImm() <= MI->getOperand(i: 4).getImm())
246 return 32 + MI->getOperand(i: 3).getImm();
247
248 if (Opcode == PPC::ANDI_rec) {
249 uint16_t Imm = MI->getOperand(i: 2).getImm();
250 return 48 + llvm::countl_zero(Val: Imm);
251 }
252
253 if (Opcode == PPC::CNTLZW || Opcode == PPC::CNTLZW_rec ||
254 Opcode == PPC::CNTTZW || Opcode == PPC::CNTTZW_rec ||
255 Opcode == PPC::CNTLZW8 || Opcode == PPC::CNTTZW8)
256 // The result ranges from 0 to 32.
257 return 58;
258
259 if (Opcode == PPC::CNTLZD || Opcode == PPC::CNTLZD_rec ||
260 Opcode == PPC::CNTTZD || Opcode == PPC::CNTTZD_rec)
261 // The result ranges from 0 to 64.
262 return 57;
263
264 if (Opcode == PPC::LHZ || Opcode == PPC::LHZX ||
265 Opcode == PPC::LHZ8 || Opcode == PPC::LHZX8 ||
266 Opcode == PPC::LHZU || Opcode == PPC::LHZUX ||
267 Opcode == PPC::LHZU8 || Opcode == PPC::LHZUX8)
268 return 48;
269
270 if (Opcode == PPC::LBZ || Opcode == PPC::LBZX ||
271 Opcode == PPC::LBZ8 || Opcode == PPC::LBZX8 ||
272 Opcode == PPC::LBZU || Opcode == PPC::LBZUX ||
273 Opcode == PPC::LBZU8 || Opcode == PPC::LBZUX8)
274 return 56;
275
276 if (Opcode == PPC::AND || Opcode == PPC::AND8 || Opcode == PPC::AND_rec ||
277 Opcode == PPC::AND8_rec)
278 return std::max(
279 a: getKnownLeadingZeroCount(Reg: MI->getOperand(i: 1).getReg(), TII, MRI),
280 b: getKnownLeadingZeroCount(Reg: MI->getOperand(i: 2).getReg(), TII, MRI));
281
282 if (Opcode == PPC::OR || Opcode == PPC::OR8 || Opcode == PPC::XOR ||
283 Opcode == PPC::XOR8 || Opcode == PPC::OR_rec ||
284 Opcode == PPC::OR8_rec || Opcode == PPC::XOR_rec ||
285 Opcode == PPC::XOR8_rec)
286 return std::min(
287 a: getKnownLeadingZeroCount(Reg: MI->getOperand(i: 1).getReg(), TII, MRI),
288 b: getKnownLeadingZeroCount(Reg: MI->getOperand(i: 2).getReg(), TII, MRI));
289
290 if (TII->isZeroExtended(Reg, MRI))
291 return 32;
292
293 return 0;
294}
295
296// This function maintains a map for the pairs <TOC Save Instr, Keep>
297// Each time a new TOC save is encountered, it checks if any of the existing
298// ones are dominated by the new one. If so, it marks the existing one as
299// redundant by setting it's entry in the map as false. It then adds the new
300// instruction to the map with either true or false depending on if any
301// existing instructions dominated the new one.
302void PPCMIPeephole::UpdateTOCSaves(
303 std::map<MachineInstr *, bool> &TOCSaves, MachineInstr *MI) {
304 assert(TII->isTOCSaveMI(*MI) && "Expecting a TOC save instruction here");
305 // FIXME: Saving TOC in prologue hasn't been implemented well in AIX ABI part,
306 // here only support it under ELFv2.
307 if (MF->getSubtarget<PPCSubtarget>().isELFv2ABI()) {
308 PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
309
310 MachineBasicBlock *Entry = &MF->front();
311 BlockFrequency CurrBlockFreq = MBFI->getBlockFreq(MBB: MI->getParent());
312
313 // If the block in which the TOC save resides is in a block that
314 // post-dominates Entry, or a block that is hotter than entry (keep in mind
315 // that early MachineLICM has already run so the TOC save won't be hoisted)
316 // we can just do the save in the prologue.
317 if (CurrBlockFreq > EntryFreq || MPDT->dominates(A: MI->getParent(), B: Entry))
318 FI->setMustSaveTOC(true);
319
320 // If we are saving the TOC in the prologue, all the TOC saves can be
321 // removed from the code.
322 if (FI->mustSaveTOC()) {
323 for (auto &TOCSave : TOCSaves)
324 TOCSave.second = false;
325 // Add new instruction to map.
326 TOCSaves[MI] = false;
327 return;
328 }
329 }
330
331 bool Keep = true;
332 for (auto &I : TOCSaves) {
333 MachineInstr *CurrInst = I.first;
334 // If new instruction dominates an existing one, mark existing one as
335 // redundant.
336 if (I.second && MDT->dominates(A: MI, B: CurrInst))
337 I.second = false;
338 // Check if the new instruction is redundant.
339 if (MDT->dominates(A: CurrInst, B: MI)) {
340 Keep = false;
341 break;
342 }
343 }
344 // Add new instruction to map.
345 TOCSaves[MI] = Keep;
346}
347
348// This function returns a list of all PHI nodes in the tree starting from
349// the RootPHI node. We perform a BFS traversal to get an ordered list of nodes.
350// The list initially only contains the root PHI. When we visit a PHI node, we
351// add it to the list. We continue to look for other PHI node operands while
352// there are nodes to visit in the list. The function returns false if the
353// optimization cannot be applied on this tree.
354static bool collectUnprimedAccPHIs(MachineRegisterInfo *MRI,
355 MachineInstr *RootPHI,
356 SmallVectorImpl<MachineInstr *> &PHIs) {
357 PHIs.push_back(Elt: RootPHI);
358 unsigned VisitedIndex = 0;
359 while (VisitedIndex < PHIs.size()) {
360 MachineInstr *VisitedPHI = PHIs[VisitedIndex];
361 for (unsigned PHIOp = 1, NumOps = VisitedPHI->getNumOperands();
362 PHIOp != NumOps; PHIOp += 2) {
363 Register RegOp = VisitedPHI->getOperand(i: PHIOp).getReg();
364 if (!RegOp.isVirtual())
365 return false;
366 MachineInstr *Instr = MRI->getVRegDef(Reg: RegOp);
367 // While collecting the PHI nodes, we check if they can be converted (i.e.
368 // all the operands are either copies, implicit defs or PHI nodes).
369 unsigned Opcode = Instr->getOpcode();
370 if (Opcode == PPC::COPY) {
371 Register Reg = Instr->getOperand(i: 1).getReg();
372 if (!Reg.isVirtual() || MRI->getRegClass(Reg) != &PPC::ACCRCRegClass)
373 return false;
374 } else if (Opcode != PPC::IMPLICIT_DEF && Opcode != PPC::PHI)
375 return false;
376 // If we detect a cycle in the PHI nodes, we exit. It would be
377 // possible to change cycles as well, but that would add a lot
378 // of complexity for a case that is unlikely to occur with MMA
379 // code.
380 if (Opcode != PPC::PHI)
381 continue;
382 if (llvm::is_contained(Range&: PHIs, Element: Instr))
383 return false;
384 PHIs.push_back(Elt: Instr);
385 }
386 VisitedIndex++;
387 }
388 return true;
389}
390
391// This function changes the unprimed accumulator PHI nodes in the PHIs list to
392// primed accumulator PHI nodes. The list is traversed in reverse order to
393// change all the PHI operands of a PHI node before changing the node itself.
394// We keep a map to associate each changed PHI node to its non-changed form.
395void PPCMIPeephole::convertUnprimedAccPHIs(
396 const PPCInstrInfo *TII, MachineRegisterInfo *MRI,
397 SmallVectorImpl<MachineInstr *> &PHIs, Register Dst) {
398 DenseMap<MachineInstr *, MachineInstr *> ChangedPHIMap;
399 for (MachineInstr *PHI : llvm::reverse(C&: PHIs)) {
400 SmallVector<std::pair<MachineOperand, MachineOperand>, 4> PHIOps;
401 // We check if the current PHI node can be changed by looking at its
402 // operands. If all the operands are either copies from primed
403 // accumulators, implicit definitions or other unprimed accumulator
404 // PHI nodes, we change it.
405 for (unsigned PHIOp = 1, NumOps = PHI->getNumOperands(); PHIOp != NumOps;
406 PHIOp += 2) {
407 Register RegOp = PHI->getOperand(i: PHIOp).getReg();
408 MachineInstr *PHIInput = MRI->getVRegDef(Reg: RegOp);
409 unsigned Opcode = PHIInput->getOpcode();
410 assert((Opcode == PPC::COPY || Opcode == PPC::IMPLICIT_DEF ||
411 Opcode == PPC::PHI) &&
412 "Unexpected instruction");
413 if (Opcode == PPC::COPY) {
414 assert(MRI->getRegClass(PHIInput->getOperand(1).getReg()) ==
415 &PPC::ACCRCRegClass &&
416 "Unexpected register class");
417 PHIOps.push_back(Elt: {PHIInput->getOperand(i: 1), PHI->getOperand(i: PHIOp + 1)});
418 } else if (Opcode == PPC::IMPLICIT_DEF) {
419 Register AccReg = MRI->createVirtualRegister(RegClass: &PPC::ACCRCRegClass);
420 BuildMI(BB&: *PHIInput->getParent(), I: PHIInput, MIMD: PHIInput->getDebugLoc(),
421 MCID: TII->get(Opcode: PPC::IMPLICIT_DEF), DestReg: AccReg);
422 PHIOps.push_back(Elt: {MachineOperand::CreateReg(Reg: AccReg, isDef: false),
423 PHI->getOperand(i: PHIOp + 1)});
424 } else if (Opcode == PPC::PHI) {
425 // We found a PHI operand. At this point we know this operand
426 // has already been changed so we get its associated changed form
427 // from the map.
428 assert(ChangedPHIMap.count(PHIInput) == 1 &&
429 "This PHI node should have already been changed.");
430 MachineInstr *PrimedAccPHI = ChangedPHIMap.lookup(Val: PHIInput);
431 PHIOps.push_back(Elt: {MachineOperand::CreateReg(
432 Reg: PrimedAccPHI->getOperand(i: 0).getReg(), isDef: false),
433 PHI->getOperand(i: PHIOp + 1)});
434 }
435 }
436 Register AccReg = Dst;
437 // If the PHI node we are changing is the root node, the register it defines
438 // will be the destination register of the original copy (of the PHI def).
439 // For all other PHI's in the list, we need to create another primed
440 // accumulator virtual register as the PHI will no longer define the
441 // unprimed accumulator.
442 if (PHI != PHIs[0])
443 AccReg = MRI->createVirtualRegister(RegClass: &PPC::ACCRCRegClass);
444 MachineInstrBuilder NewPHI = BuildMI(
445 BB&: *PHI->getParent(), I: PHI, MIMD: PHI->getDebugLoc(), MCID: TII->get(Opcode: PPC::PHI), DestReg: AccReg);
446 for (auto RegMBB : PHIOps) {
447 NewPHI.add(MO: RegMBB.first).add(MO: RegMBB.second);
448 if (MRI->isSSA())
449 addRegToUpdate(RegMBB.first.getReg());
450 }
451 // The liveness of old PHI and new PHI have to be updated.
452 addRegToUpdate(PHI->getOperand(0).getReg());
453 addRegToUpdate(AccReg);
454 ChangedPHIMap[PHI] = NewPHI.getInstr();
455 LLVM_DEBUG(dbgs() << "Converting PHI: ");
456 LLVM_DEBUG(PHI->dump());
457 LLVM_DEBUG(dbgs() << "To: ");
458 LLVM_DEBUG(NewPHI.getInstr()->dump());
459 }
460}
461
462// Perform peephole optimizations.
463bool PPCMIPeephole::simplifyCode() {
464 bool Simplified = false;
465 bool TrapOpt = false;
466 MachineInstr* ToErase = nullptr;
467 std::map<MachineInstr *, bool> TOCSaves;
468 const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
469 NumFunctionsEnteredInMIPeephole++;
470 if (ConvertRegReg) {
471 // Fixed-point conversion of reg/reg instructions fed by load-immediate
472 // into reg/imm instructions. FIXME: This is expensive, control it with
473 // an option.
474 bool SomethingChanged = false;
475 do {
476 NumFixedPointIterations++;
477 SomethingChanged = false;
478 for (MachineBasicBlock &MBB : *MF) {
479 for (MachineInstr &MI : MBB) {
480 if (MI.isDebugInstr())
481 continue;
482
483 if (!DebugCounter::shouldExecute(CounterName: PeepholeXToICounter))
484 continue;
485
486 SmallSet<Register, 4> RRToRIRegsToUpdate;
487 if (!TII->convertToImmediateForm(MI, RegsToUpdate&: RRToRIRegsToUpdate))
488 continue;
489 for (Register R : RRToRIRegsToUpdate)
490 addRegToUpdate(R);
491 // The updated instruction may now have new register operands.
492 // Conservatively add them to recompute the flags as well.
493 for (const MachineOperand &MO : MI.operands())
494 if (MO.isReg())
495 addRegToUpdate(MO.getReg());
496 // We don't erase anything in case the def has other uses. Let DCE
497 // remove it if it can be removed.
498 LLVM_DEBUG(dbgs() << "Converted instruction to imm form: ");
499 LLVM_DEBUG(MI.dump());
500 NumConvertedToImmediateForm++;
501 SomethingChanged = true;
502 Simplified = true;
503 continue;
504 }
505 }
506 } while (SomethingChanged && FixedPointRegToImm);
507 }
508
509 // Since we are deleting this instruction, we need to run LiveVariables
510 // on any of its definitions that are marked as needing an update since
511 // we can't run LiveVariables on a deleted register. This only needs
512 // to be done for defs since uses will have their own defining
513 // instructions so we won't be running LiveVariables on a deleted reg.
514 auto recomputeLVForDyingInstr = [&]() {
515 if (RegsToUpdate.empty())
516 return;
517 for (MachineOperand &MO : ToErase->operands()) {
518 if (!MO.isReg() || !MO.isDef() || !RegsToUpdate.count(V: MO.getReg()))
519 continue;
520 Register RegToUpdate = MO.getReg();
521 RegsToUpdate.erase(V: RegToUpdate);
522 // If some transformation has introduced an additional definition of
523 // this register (breaking SSA), we can safely convert this def to
524 // a def of an invalid register as the instruction is going away.
525 if (!MRI->getUniqueVRegDef(Reg: RegToUpdate))
526 MO.setReg(PPC::NoRegister);
527 LV->recomputeForSingleDefVirtReg(Reg: RegToUpdate);
528 }
529 };
530
531 for (MachineBasicBlock &MBB : *MF) {
532 for (MachineInstr &MI : MBB) {
533
534 // If the previous instruction was marked for elimination,
535 // remove it now.
536 if (ToErase) {
537 LLVM_DEBUG(dbgs() << "Deleting instruction: ");
538 LLVM_DEBUG(ToErase->dump());
539 recomputeLVForDyingInstr();
540 ToErase->eraseFromParent();
541 ToErase = nullptr;
542 }
543 // If a conditional trap instruction got optimized to an
544 // unconditional trap, eliminate all the instructions after
545 // the trap.
546 if (EnableTrapOptimization && TrapOpt) {
547 ToErase = &MI;
548 continue;
549 }
550
551 // Ignore debug instructions.
552 if (MI.isDebugInstr())
553 continue;
554
555 if (!DebugCounter::shouldExecute(CounterName: PeepholePerOpCounter))
556 continue;
557
558 // Per-opcode peepholes.
559 switch (MI.getOpcode()) {
560
561 default:
562 break;
563 case PPC::COPY: {
564 Register Src = MI.getOperand(i: 1).getReg();
565 Register Dst = MI.getOperand(i: 0).getReg();
566 if (!Src.isVirtual() || !Dst.isVirtual())
567 break;
568 if (MRI->getRegClass(Reg: Src) != &PPC::UACCRCRegClass ||
569 MRI->getRegClass(Reg: Dst) != &PPC::ACCRCRegClass)
570 break;
571
572 // We are copying an unprimed accumulator to a primed accumulator.
573 // If the input to the copy is a PHI that is fed only by (i) copies in
574 // the other direction (ii) implicitly defined unprimed accumulators or
575 // (iii) other PHI nodes satisfying (i) and (ii), we can change
576 // the PHI to a PHI on primed accumulators (as long as we also change
577 // its operands). To detect and change such copies, we first get a list
578 // of all the PHI nodes starting from the root PHI node in BFS order.
579 // We then visit all these PHI nodes to check if they can be changed to
580 // primed accumulator PHI nodes and if so, we change them.
581 MachineInstr *RootPHI = MRI->getVRegDef(Reg: Src);
582 if (RootPHI->getOpcode() != PPC::PHI)
583 break;
584
585 SmallVector<MachineInstr *, 4> PHIs;
586 if (!collectUnprimedAccPHIs(MRI, RootPHI, PHIs))
587 break;
588
589 convertUnprimedAccPHIs(TII, MRI, PHIs, Dst);
590
591 ToErase = &MI;
592 break;
593 }
594 case PPC::LI:
595 case PPC::LI8: {
596 // If we are materializing a zero, look for any use operands for which
597 // zero means immediate zero. All such operands can be replaced with
598 // PPC::ZERO.
599 if (!MI.getOperand(i: 1).isImm() || MI.getOperand(i: 1).getImm() != 0)
600 break;
601 Register MIDestReg = MI.getOperand(i: 0).getReg();
602 bool Folded = false;
603 for (MachineInstr& UseMI : MRI->use_instructions(Reg: MIDestReg))
604 Folded |= TII->onlyFoldImmediate(UseMI, DefMI&: MI, Reg: MIDestReg);
605 if (MRI->use_nodbg_empty(RegNo: MIDestReg)) {
606 ++NumLoadImmZeroFoldedAndRemoved;
607 ToErase = &MI;
608 }
609 if (Folded)
610 addRegToUpdate(MIDestReg);
611 Simplified |= Folded;
612 break;
613 }
614 case PPC::STW:
615 case PPC::STD: {
616 MachineFrameInfo &MFI = MF->getFrameInfo();
617 if (MFI.hasVarSizedObjects() ||
618 (!MF->getSubtarget<PPCSubtarget>().isELFv2ABI() &&
619 !MF->getSubtarget<PPCSubtarget>().isAIXABI()))
620 break;
621 // When encountering a TOC save instruction, call UpdateTOCSaves
622 // to add it to the TOCSaves map and mark any existing TOC saves
623 // it dominates as redundant.
624 if (TII->isTOCSaveMI(MI))
625 UpdateTOCSaves(TOCSaves, MI: &MI);
626 break;
627 }
628 case PPC::XXPERMDI: {
629 // Perform simplifications of 2x64 vector swaps and splats.
630 // A swap is identified by an immediate value of 2, and a splat
631 // is identified by an immediate value of 0 or 3.
632 int Immed = MI.getOperand(i: 3).getImm();
633
634 if (Immed == 1)
635 break;
636
637 // For each of these simplifications, we need the two source
638 // regs to match. Unfortunately, MachineCSE ignores COPY and
639 // SUBREG_TO_REG, so for example we can see
640 // XXPERMDI t, SUBREG_TO_REG(s), SUBREG_TO_REG(s), immed.
641 // We have to look through chains of COPY and SUBREG_TO_REG
642 // to find the real source values for comparison.
643 Register TrueReg1 =
644 TRI->lookThruCopyLike(SrcReg: MI.getOperand(i: 1).getReg(), MRI);
645 Register TrueReg2 =
646 TRI->lookThruCopyLike(SrcReg: MI.getOperand(i: 2).getReg(), MRI);
647
648 if (!(TrueReg1 == TrueReg2 && TrueReg1.isVirtual()))
649 break;
650
651 MachineInstr *DefMI = MRI->getVRegDef(Reg: TrueReg1);
652
653 if (!DefMI)
654 break;
655
656 unsigned DefOpc = DefMI->getOpcode();
657
658 // If this is a splat fed by a splatting load, the splat is
659 // redundant. Replace with a copy. This doesn't happen directly due
660 // to code in PPCDAGToDAGISel.cpp, but it can happen when converting
661 // a load of a double to a vector of 64-bit integers.
662 auto isConversionOfLoadAndSplat = [=]() -> bool {
663 if (DefOpc != PPC::XVCVDPSXDS && DefOpc != PPC::XVCVDPUXDS)
664 return false;
665 Register FeedReg1 =
666 TRI->lookThruCopyLike(SrcReg: DefMI->getOperand(i: 1).getReg(), MRI);
667 if (FeedReg1.isVirtual()) {
668 MachineInstr *LoadMI = MRI->getVRegDef(Reg: FeedReg1);
669 if (LoadMI && LoadMI->getOpcode() == PPC::LXVDSX)
670 return true;
671 }
672 return false;
673 };
674 if ((Immed == 0 || Immed == 3) &&
675 (DefOpc == PPC::LXVDSX || isConversionOfLoadAndSplat())) {
676 LLVM_DEBUG(dbgs() << "Optimizing load-and-splat/splat "
677 "to load-and-splat/copy: ");
678 LLVM_DEBUG(MI.dump());
679 BuildMI(BB&: MBB, I: &MI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: PPC::COPY),
680 DestReg: MI.getOperand(i: 0).getReg())
681 .add(MO: MI.getOperand(i: 1));
682 addRegToUpdate(MI.getOperand(1).getReg());
683 ToErase = &MI;
684 Simplified = true;
685 }
686
687 // If this is a splat or a swap fed by another splat, we
688 // can replace it with a copy.
689 if (DefOpc == PPC::XXPERMDI) {
690 Register DefReg1 = DefMI->getOperand(i: 1).getReg();
691 Register DefReg2 = DefMI->getOperand(i: 2).getReg();
692 unsigned DefImmed = DefMI->getOperand(i: 3).getImm();
693
694 // If the two inputs are not the same register, check to see if
695 // they originate from the same virtual register after only
696 // copy-like instructions.
697 if (DefReg1 != DefReg2) {
698 Register FeedReg1 = TRI->lookThruCopyLike(SrcReg: DefReg1, MRI);
699 Register FeedReg2 = TRI->lookThruCopyLike(SrcReg: DefReg2, MRI);
700
701 if (!(FeedReg1 == FeedReg2 && FeedReg1.isVirtual()))
702 break;
703 }
704
705 if (DefImmed == 0 || DefImmed == 3) {
706 LLVM_DEBUG(dbgs() << "Optimizing splat/swap or splat/splat "
707 "to splat/copy: ");
708 LLVM_DEBUG(MI.dump());
709 BuildMI(BB&: MBB, I: &MI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: PPC::COPY),
710 DestReg: MI.getOperand(i: 0).getReg())
711 .add(MO: MI.getOperand(i: 1));
712 addRegToUpdate(MI.getOperand(1).getReg());
713 ToErase = &MI;
714 Simplified = true;
715 }
716
717 // If this is a splat fed by a swap, we can simplify modify
718 // the splat to splat the other value from the swap's input
719 // parameter.
720 else if ((Immed == 0 || Immed == 3) && DefImmed == 2) {
721 LLVM_DEBUG(dbgs() << "Optimizing swap/splat => splat: ");
722 LLVM_DEBUG(MI.dump());
723 addRegToUpdate(MI.getOperand(1).getReg());
724 addRegToUpdate(MI.getOperand(2).getReg());
725 MI.getOperand(i: 1).setReg(DefReg1);
726 MI.getOperand(i: 2).setReg(DefReg2);
727 MI.getOperand(i: 3).setImm(3 - Immed);
728 addRegToUpdate(DefReg1);
729 addRegToUpdate(DefReg2);
730 Simplified = true;
731 }
732
733 // If this is a swap fed by a swap, we can replace it
734 // with a copy from the first swap's input.
735 else if (Immed == 2 && DefImmed == 2) {
736 LLVM_DEBUG(dbgs() << "Optimizing swap/swap => copy: ");
737 LLVM_DEBUG(MI.dump());
738 addRegToUpdate(MI.getOperand(1).getReg());
739 BuildMI(BB&: MBB, I: &MI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: PPC::COPY),
740 DestReg: MI.getOperand(i: 0).getReg())
741 .add(MO: DefMI->getOperand(i: 1));
742 addRegToUpdate(DefMI->getOperand(0).getReg());
743 addRegToUpdate(DefMI->getOperand(1).getReg());
744 ToErase = &MI;
745 Simplified = true;
746 }
747 } else if ((Immed == 0 || Immed == 3 || Immed == 2) &&
748 DefOpc == PPC::XXPERMDIs &&
749 (DefMI->getOperand(i: 2).getImm() == 0 ||
750 DefMI->getOperand(i: 2).getImm() == 3)) {
751 ToErase = &MI;
752 Simplified = true;
753 // Swap of a splat, convert to copy.
754 if (Immed == 2) {
755 LLVM_DEBUG(dbgs() << "Optimizing swap(splat) => copy(splat): ");
756 LLVM_DEBUG(MI.dump());
757 BuildMI(BB&: MBB, I: &MI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: PPC::COPY),
758 DestReg: MI.getOperand(i: 0).getReg())
759 .add(MO: MI.getOperand(i: 1));
760 addRegToUpdate(MI.getOperand(1).getReg());
761 break;
762 }
763 // Splat fed by another splat - switch the output of the first
764 // and remove the second.
765 DefMI->getOperand(i: 0).setReg(MI.getOperand(i: 0).getReg());
766 LLVM_DEBUG(dbgs() << "Removing redundant splat: ");
767 LLVM_DEBUG(MI.dump());
768 } else if (Immed == 2 &&
769 (DefOpc == PPC::VSPLTB || DefOpc == PPC::VSPLTH ||
770 DefOpc == PPC::VSPLTW || DefOpc == PPC::XXSPLTW ||
771 DefOpc == PPC::VSPLTISB || DefOpc == PPC::VSPLTISH ||
772 DefOpc == PPC::VSPLTISW)) {
773 // Swap of various vector splats, convert to copy.
774 ToErase = &MI;
775 Simplified = true;
776 LLVM_DEBUG(dbgs() << "Optimizing swap(vsplt(is)?[b|h|w]|xxspltw) => "
777 "copy(vsplt(is)?[b|h|w]|xxspltw): ");
778 LLVM_DEBUG(MI.dump());
779 BuildMI(BB&: MBB, I: &MI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: PPC::COPY),
780 DestReg: MI.getOperand(i: 0).getReg())
781 .add(MO: MI.getOperand(i: 1));
782 addRegToUpdate(MI.getOperand(1).getReg());
783 } else if ((Immed == 0 || Immed == 3 || Immed == 2) &&
784 TII->isLoadFromConstantPool(I: DefMI)) {
785 const Constant *C = TII->getConstantFromConstantPool(I: DefMI);
786 if (C && C->getType()->isVectorTy() && C->getSplatValue()) {
787 ToErase = &MI;
788 Simplified = true;
789 LLVM_DEBUG(dbgs()
790 << "Optimizing swap(splat pattern from constant-pool) "
791 "=> copy(splat pattern from constant-pool): ");
792 LLVM_DEBUG(MI.dump());
793 BuildMI(BB&: MBB, I: &MI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: PPC::COPY),
794 DestReg: MI.getOperand(i: 0).getReg())
795 .add(MO: MI.getOperand(i: 1));
796 addRegToUpdate(MI.getOperand(1).getReg());
797 }
798 }
799 break;
800 }
801 case PPC::VSPLTB:
802 case PPC::VSPLTH:
803 case PPC::XXSPLTW: {
804 unsigned MyOpcode = MI.getOpcode();
805 unsigned OpNo = MyOpcode == PPC::XXSPLTW ? 1 : 2;
806 Register TrueReg =
807 TRI->lookThruCopyLike(SrcReg: MI.getOperand(i: OpNo).getReg(), MRI);
808 if (!TrueReg.isVirtual())
809 break;
810 MachineInstr *DefMI = MRI->getVRegDef(Reg: TrueReg);
811 if (!DefMI)
812 break;
813 unsigned DefOpcode = DefMI->getOpcode();
814 auto isConvertOfSplat = [=]() -> bool {
815 if (DefOpcode != PPC::XVCVSPSXWS && DefOpcode != PPC::XVCVSPUXWS)
816 return false;
817 Register ConvReg = DefMI->getOperand(i: 1).getReg();
818 if (!ConvReg.isVirtual())
819 return false;
820 MachineInstr *Splt = MRI->getVRegDef(Reg: ConvReg);
821 return Splt && (Splt->getOpcode() == PPC::LXVWSX ||
822 Splt->getOpcode() == PPC::XXSPLTW);
823 };
824 bool AlreadySplat = (MyOpcode == DefOpcode) ||
825 (MyOpcode == PPC::VSPLTB && DefOpcode == PPC::VSPLTBs) ||
826 (MyOpcode == PPC::VSPLTH && DefOpcode == PPC::VSPLTHs) ||
827 (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::XXSPLTWs) ||
828 (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::LXVWSX) ||
829 (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::MTVSRWS)||
830 (MyOpcode == PPC::XXSPLTW && isConvertOfSplat());
831 // If the instruction[s] that feed this splat have already splat
832 // the value, this splat is redundant.
833 if (AlreadySplat) {
834 LLVM_DEBUG(dbgs() << "Changing redundant splat to a copy: ");
835 LLVM_DEBUG(MI.dump());
836 BuildMI(BB&: MBB, I: &MI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: PPC::COPY),
837 DestReg: MI.getOperand(i: 0).getReg())
838 .add(MO: MI.getOperand(i: OpNo));
839 addRegToUpdate(MI.getOperand(OpNo).getReg());
840 ToErase = &MI;
841 Simplified = true;
842 }
843 // Splat fed by a shift. Usually when we align value to splat into
844 // vector element zero.
845 if (DefOpcode == PPC::XXSLDWI) {
846 Register ShiftRes = DefMI->getOperand(i: 0).getReg();
847 Register ShiftOp1 = DefMI->getOperand(i: 1).getReg();
848 Register ShiftOp2 = DefMI->getOperand(i: 2).getReg();
849 unsigned ShiftImm = DefMI->getOperand(i: 3).getImm();
850 unsigned SplatImm =
851 MI.getOperand(i: MyOpcode == PPC::XXSPLTW ? 2 : 1).getImm();
852 if (ShiftOp1 == ShiftOp2) {
853 unsigned NewElem = (SplatImm + ShiftImm) & 0x3;
854 if (MRI->hasOneNonDBGUse(RegNo: ShiftRes)) {
855 LLVM_DEBUG(dbgs() << "Removing redundant shift: ");
856 LLVM_DEBUG(DefMI->dump());
857 ToErase = DefMI;
858 }
859 Simplified = true;
860 LLVM_DEBUG(dbgs() << "Changing splat immediate from " << SplatImm
861 << " to " << NewElem << " in instruction: ");
862 LLVM_DEBUG(MI.dump());
863 addRegToUpdate(MI.getOperand(OpNo).getReg());
864 addRegToUpdate(ShiftOp1);
865 MI.getOperand(i: OpNo).setReg(ShiftOp1);
866 MI.getOperand(i: 2).setImm(NewElem);
867 }
868 }
869 break;
870 }
871 case PPC::XVCVDPSP: {
872 // If this is a DP->SP conversion fed by an FRSP, the FRSP is redundant.
873 Register TrueReg =
874 TRI->lookThruCopyLike(SrcReg: MI.getOperand(i: 1).getReg(), MRI);
875 if (!TrueReg.isVirtual())
876 break;
877 MachineInstr *DefMI = MRI->getVRegDef(Reg: TrueReg);
878
879 // This can occur when building a vector of single precision or integer
880 // values.
881 if (DefMI && DefMI->getOpcode() == PPC::XXPERMDI) {
882 Register DefsReg1 =
883 TRI->lookThruCopyLike(SrcReg: DefMI->getOperand(i: 1).getReg(), MRI);
884 Register DefsReg2 =
885 TRI->lookThruCopyLike(SrcReg: DefMI->getOperand(i: 2).getReg(), MRI);
886 if (!DefsReg1.isVirtual() || !DefsReg2.isVirtual())
887 break;
888 MachineInstr *P1 = MRI->getVRegDef(Reg: DefsReg1);
889 MachineInstr *P2 = MRI->getVRegDef(Reg: DefsReg2);
890
891 if (!P1 || !P2)
892 break;
893
894 // Remove the passed FRSP/XSRSP instruction if it only feeds this MI
895 // and set any uses of that FRSP/XSRSP (in this MI) to the source of
896 // the FRSP/XSRSP.
897 auto removeFRSPIfPossible = [&](MachineInstr *RoundInstr) {
898 unsigned Opc = RoundInstr->getOpcode();
899 if ((Opc == PPC::FRSP || Opc == PPC::XSRSP) &&
900 MRI->hasOneNonDBGUse(RegNo: RoundInstr->getOperand(i: 0).getReg())) {
901 Simplified = true;
902 Register ConvReg1 = RoundInstr->getOperand(i: 1).getReg();
903 Register FRSPDefines = RoundInstr->getOperand(i: 0).getReg();
904 MachineInstr &Use = *(MRI->use_instr_nodbg_begin(RegNo: FRSPDefines));
905 for (int i = 0, e = Use.getNumOperands(); i < e; ++i)
906 if (Use.getOperand(i).isReg() &&
907 Use.getOperand(i).getReg() == FRSPDefines)
908 Use.getOperand(i).setReg(ConvReg1);
909 LLVM_DEBUG(dbgs() << "Removing redundant FRSP/XSRSP:\n");
910 LLVM_DEBUG(RoundInstr->dump());
911 LLVM_DEBUG(dbgs() << "As it feeds instruction:\n");
912 LLVM_DEBUG(MI.dump());
913 LLVM_DEBUG(dbgs() << "Through instruction:\n");
914 LLVM_DEBUG(DefMI->dump());
915 addRegToUpdate(ConvReg1);
916 addRegToUpdate(FRSPDefines);
917 ToErase = RoundInstr;
918 }
919 };
920
921 // If the input to XVCVDPSP is a vector that was built (even
922 // partially) out of FRSP's, the FRSP(s) can safely be removed
923 // since this instruction performs the same operation.
924 if (P1 != P2) {
925 removeFRSPIfPossible(P1);
926 removeFRSPIfPossible(P2);
927 break;
928 }
929 removeFRSPIfPossible(P1);
930 }
931 break;
932 }
933 case PPC::EXTSH:
934 case PPC::EXTSH8:
935 case PPC::EXTSH8_32_64: {
936 if (!EnableSExtElimination) break;
937 Register NarrowReg = MI.getOperand(i: 1).getReg();
938 if (!NarrowReg.isVirtual())
939 break;
940
941 MachineInstr *SrcMI = MRI->getVRegDef(Reg: NarrowReg);
942 unsigned SrcOpcode = SrcMI->getOpcode();
943 // If we've used a zero-extending load that we will sign-extend,
944 // just do a sign-extending load.
945 if (SrcOpcode == PPC::LHZ || SrcOpcode == PPC::LHZX) {
946 if (!MRI->hasOneNonDBGUse(RegNo: SrcMI->getOperand(i: 0).getReg()))
947 break;
948 // Determine the new opcode. We need to make sure that if the original
949 // instruction has a 64 bit opcode we keep using a 64 bit opcode.
950 // Likewise if the source is X-Form the new opcode should also be
951 // X-Form.
952 unsigned Opc = PPC::LHA;
953 bool SourceIsXForm = SrcOpcode == PPC::LHZX;
954 bool MIIs64Bit = MI.getOpcode() == PPC::EXTSH8 ||
955 MI.getOpcode() == PPC::EXTSH8_32_64;
956
957 if (SourceIsXForm && MIIs64Bit)
958 Opc = PPC::LHAX8;
959 else if (SourceIsXForm && !MIIs64Bit)
960 Opc = PPC::LHAX;
961 else if (MIIs64Bit)
962 Opc = PPC::LHA8;
963
964 addRegToUpdate(NarrowReg);
965 addRegToUpdate(MI.getOperand(0).getReg());
966
967 // We are removing a definition of NarrowReg which will cause
968 // problems in AliveBlocks. Add an implicit def that will be
969 // removed so that AliveBlocks are updated correctly.
970 addDummyDef(MBB, At: &MI, Reg: NarrowReg);
971 LLVM_DEBUG(dbgs() << "Zero-extending load\n");
972 LLVM_DEBUG(SrcMI->dump());
973 LLVM_DEBUG(dbgs() << "and sign-extension\n");
974 LLVM_DEBUG(MI.dump());
975 LLVM_DEBUG(dbgs() << "are merged into sign-extending load\n");
976 SrcMI->setDesc(TII->get(Opcode: Opc));
977 SrcMI->getOperand(i: 0).setReg(MI.getOperand(i: 0).getReg());
978 ToErase = &MI;
979 Simplified = true;
980 NumEliminatedSExt++;
981 }
982 break;
983 }
984 case PPC::EXTSW:
985 case PPC::EXTSW_32:
986 case PPC::EXTSW_32_64: {
987 if (!EnableSExtElimination) break;
988 Register NarrowReg = MI.getOperand(i: 1).getReg();
989 if (!NarrowReg.isVirtual())
990 break;
991
992 MachineInstr *SrcMI = MRI->getVRegDef(Reg: NarrowReg);
993 unsigned SrcOpcode = SrcMI->getOpcode();
994 // If we've used a zero-extending load that we will sign-extend,
995 // just do a sign-extending load.
996 if (SrcOpcode == PPC::LWZ || SrcOpcode == PPC::LWZX) {
997 if (!MRI->hasOneNonDBGUse(RegNo: SrcMI->getOperand(i: 0).getReg()))
998 break;
999
1000 // The transformation from a zero-extending load to a sign-extending
1001 // load is only legal when the displacement is a multiple of 4.
1002 // If the displacement is not at least 4 byte aligned, don't perform
1003 // the transformation.
1004 bool IsWordAligned = false;
1005 if (SrcMI->getOperand(i: 1).isGlobal()) {
1006 const GlobalObject *GO =
1007 dyn_cast<GlobalObject>(Val: SrcMI->getOperand(i: 1).getGlobal());
1008 if (GO && GO->getAlign() && *GO->getAlign() >= 4 &&
1009 (SrcMI->getOperand(i: 1).getOffset() % 4 == 0))
1010 IsWordAligned = true;
1011 } else if (SrcMI->getOperand(i: 1).isImm()) {
1012 int64_t Value = SrcMI->getOperand(i: 1).getImm();
1013 if (Value % 4 == 0)
1014 IsWordAligned = true;
1015 }
1016
1017 // Determine the new opcode. We need to make sure that if the original
1018 // instruction has a 64 bit opcode we keep using a 64 bit opcode.
1019 // Likewise if the source is X-Form the new opcode should also be
1020 // X-Form.
1021 unsigned Opc = PPC::LWA_32;
1022 bool SourceIsXForm = SrcOpcode == PPC::LWZX;
1023 bool MIIs64Bit = MI.getOpcode() == PPC::EXTSW ||
1024 MI.getOpcode() == PPC::EXTSW_32_64;
1025
1026 if (SourceIsXForm && MIIs64Bit)
1027 Opc = PPC::LWAX;
1028 else if (SourceIsXForm && !MIIs64Bit)
1029 Opc = PPC::LWAX_32;
1030 else if (MIIs64Bit)
1031 Opc = PPC::LWA;
1032
1033 if (!IsWordAligned && (Opc == PPC::LWA || Opc == PPC::LWA_32))
1034 break;
1035
1036 addRegToUpdate(NarrowReg);
1037 addRegToUpdate(MI.getOperand(0).getReg());
1038
1039 // We are removing a definition of NarrowReg which will cause
1040 // problems in AliveBlocks. Add an implicit def that will be
1041 // removed so that AliveBlocks are updated correctly.
1042 addDummyDef(MBB, At: &MI, Reg: NarrowReg);
1043 LLVM_DEBUG(dbgs() << "Zero-extending load\n");
1044 LLVM_DEBUG(SrcMI->dump());
1045 LLVM_DEBUG(dbgs() << "and sign-extension\n");
1046 LLVM_DEBUG(MI.dump());
1047 LLVM_DEBUG(dbgs() << "are merged into sign-extending load\n");
1048 SrcMI->setDesc(TII->get(Opcode: Opc));
1049 SrcMI->getOperand(i: 0).setReg(MI.getOperand(i: 0).getReg());
1050 ToErase = &MI;
1051 Simplified = true;
1052 NumEliminatedSExt++;
1053 } else if (MI.getOpcode() == PPC::EXTSW_32_64 &&
1054 TII->isSignExtended(Reg: NarrowReg, MRI)) {
1055 // We can eliminate EXTSW if the input is known to be already
1056 // sign-extended.
1057 LLVM_DEBUG(dbgs() << "Removing redundant sign-extension\n");
1058 Register TmpReg =
1059 MF->getRegInfo().createVirtualRegister(RegClass: &PPC::G8RCRegClass);
1060 BuildMI(BB&: MBB, I: &MI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: PPC::IMPLICIT_DEF),
1061 DestReg: TmpReg);
1062 BuildMI(BB&: MBB, I: &MI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: PPC::INSERT_SUBREG),
1063 DestReg: MI.getOperand(i: 0).getReg())
1064 .addReg(RegNo: TmpReg)
1065 .addReg(RegNo: NarrowReg)
1066 .addImm(Val: PPC::sub_32);
1067 ToErase = &MI;
1068 Simplified = true;
1069 NumEliminatedSExt++;
1070 }
1071 break;
1072 }
1073 case PPC::RLDICL: {
1074 // We can eliminate RLDICL (e.g. for zero-extension)
1075 // if all bits to clear are already zero in the input.
1076 // This code assume following code sequence for zero-extension.
1077 // %6 = COPY %5:sub_32; (optional)
1078 // %8 = IMPLICIT_DEF;
1079 // %7<def,tied1> = INSERT_SUBREG %8<tied0>, %6, sub_32;
1080 if (!EnableZExtElimination) break;
1081
1082 if (MI.getOperand(i: 2).getImm() != 0)
1083 break;
1084
1085 Register SrcReg = MI.getOperand(i: 1).getReg();
1086 if (!SrcReg.isVirtual())
1087 break;
1088
1089 MachineInstr *SrcMI = MRI->getVRegDef(Reg: SrcReg);
1090 if (!(SrcMI && SrcMI->getOpcode() == PPC::INSERT_SUBREG &&
1091 SrcMI->getOperand(i: 0).isReg() && SrcMI->getOperand(i: 1).isReg()))
1092 break;
1093
1094 MachineInstr *ImpDefMI, *SubRegMI;
1095 ImpDefMI = MRI->getVRegDef(Reg: SrcMI->getOperand(i: 1).getReg());
1096 SubRegMI = MRI->getVRegDef(Reg: SrcMI->getOperand(i: 2).getReg());
1097 if (ImpDefMI->getOpcode() != PPC::IMPLICIT_DEF) break;
1098
1099 SrcMI = SubRegMI;
1100 if (SubRegMI->getOpcode() == PPC::COPY) {
1101 Register CopyReg = SubRegMI->getOperand(i: 1).getReg();
1102 if (CopyReg.isVirtual())
1103 SrcMI = MRI->getVRegDef(Reg: CopyReg);
1104 }
1105 if (!SrcMI->getOperand(i: 0).isReg())
1106 break;
1107
1108 unsigned KnownZeroCount =
1109 getKnownLeadingZeroCount(Reg: SrcMI->getOperand(i: 0).getReg(), TII, MRI);
1110 if (MI.getOperand(i: 3).getImm() <= KnownZeroCount) {
1111 LLVM_DEBUG(dbgs() << "Removing redundant zero-extension\n");
1112 BuildMI(BB&: MBB, I: &MI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: PPC::COPY),
1113 DestReg: MI.getOperand(i: 0).getReg())
1114 .addReg(RegNo: SrcReg);
1115 addRegToUpdate(SrcReg);
1116 ToErase = &MI;
1117 Simplified = true;
1118 NumEliminatedZExt++;
1119 }
1120 break;
1121 }
1122
1123 // TODO: Any instruction that has an immediate form fed only by a PHI
1124 // whose operands are all load immediate can be folded away. We currently
1125 // do this for ADD instructions, but should expand it to arithmetic and
1126 // binary instructions with immediate forms in the future.
1127 case PPC::ADD4:
1128 case PPC::ADD8: {
1129 auto isSingleUsePHI = [&](MachineOperand *PhiOp) {
1130 assert(PhiOp && "Invalid Operand!");
1131 MachineInstr *DefPhiMI = getVRegDefOrNull(Op: PhiOp, MRI);
1132
1133 return DefPhiMI && (DefPhiMI->getOpcode() == PPC::PHI) &&
1134 MRI->hasOneNonDBGUse(RegNo: DefPhiMI->getOperand(i: 0).getReg());
1135 };
1136
1137 auto dominatesAllSingleUseLIs = [&](MachineOperand *DominatorOp,
1138 MachineOperand *PhiOp) {
1139 assert(PhiOp && "Invalid Operand!");
1140 assert(DominatorOp && "Invalid Operand!");
1141 MachineInstr *DefPhiMI = getVRegDefOrNull(Op: PhiOp, MRI);
1142 MachineInstr *DefDomMI = getVRegDefOrNull(Op: DominatorOp, MRI);
1143
1144 // Note: the vregs only show up at odd indices position of PHI Node,
1145 // the even indices position save the BB info.
1146 for (unsigned i = 1; i < DefPhiMI->getNumOperands(); i += 2) {
1147 MachineInstr *LiMI =
1148 getVRegDefOrNull(Op: &DefPhiMI->getOperand(i), MRI);
1149 if (!LiMI ||
1150 (LiMI->getOpcode() != PPC::LI && LiMI->getOpcode() != PPC::LI8)
1151 || !MRI->hasOneNonDBGUse(RegNo: LiMI->getOperand(i: 0).getReg()) ||
1152 !MDT->dominates(A: DefDomMI, B: LiMI))
1153 return false;
1154 }
1155
1156 return true;
1157 };
1158
1159 MachineOperand Op1 = MI.getOperand(i: 1);
1160 MachineOperand Op2 = MI.getOperand(i: 2);
1161 if (isSingleUsePHI(&Op2) && dominatesAllSingleUseLIs(&Op1, &Op2))
1162 std::swap(a&: Op1, b&: Op2);
1163 else if (!isSingleUsePHI(&Op1) || !dominatesAllSingleUseLIs(&Op2, &Op1))
1164 break; // We don't have an ADD fed by LI's that can be transformed
1165
1166 // Now we know that Op1 is the PHI node and Op2 is the dominator
1167 Register DominatorReg = Op2.getReg();
1168
1169 const TargetRegisterClass *TRC = MI.getOpcode() == PPC::ADD8
1170 ? &PPC::G8RC_and_G8RC_NOX0RegClass
1171 : &PPC::GPRC_and_GPRC_NOR0RegClass;
1172 MRI->setRegClass(Reg: DominatorReg, RC: TRC);
1173
1174 // replace LIs with ADDIs
1175 MachineInstr *DefPhiMI = getVRegDefOrNull(Op: &Op1, MRI);
1176 for (unsigned i = 1; i < DefPhiMI->getNumOperands(); i += 2) {
1177 MachineInstr *LiMI = getVRegDefOrNull(Op: &DefPhiMI->getOperand(i), MRI);
1178 LLVM_DEBUG(dbgs() << "Optimizing LI to ADDI: ");
1179 LLVM_DEBUG(LiMI->dump());
1180
1181 // There could be repeated registers in the PHI, e.g: %1 =
1182 // PHI %6, <%bb.2>, %8, <%bb.3>, %8, <%bb.6>; So if we've
1183 // already replaced the def instruction, skip.
1184 if (LiMI->getOpcode() == PPC::ADDI || LiMI->getOpcode() == PPC::ADDI8)
1185 continue;
1186
1187 assert((LiMI->getOpcode() == PPC::LI ||
1188 LiMI->getOpcode() == PPC::LI8) &&
1189 "Invalid Opcode!");
1190 auto LiImm = LiMI->getOperand(i: 1).getImm(); // save the imm of LI
1191 LiMI->removeOperand(OpNo: 1); // remove the imm of LI
1192 LiMI->setDesc(TII->get(Opcode: LiMI->getOpcode() == PPC::LI ? PPC::ADDI
1193 : PPC::ADDI8));
1194 MachineInstrBuilder(*LiMI->getParent()->getParent(), *LiMI)
1195 .addReg(RegNo: DominatorReg)
1196 .addImm(Val: LiImm); // restore the imm of LI
1197 LLVM_DEBUG(LiMI->dump());
1198 }
1199
1200 // Replace ADD with COPY
1201 LLVM_DEBUG(dbgs() << "Optimizing ADD to COPY: ");
1202 LLVM_DEBUG(MI.dump());
1203 BuildMI(BB&: MBB, I: &MI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: PPC::COPY),
1204 DestReg: MI.getOperand(i: 0).getReg())
1205 .add(MO: Op1);
1206 addRegToUpdate(Op1.getReg());
1207 addRegToUpdate(Op2.getReg());
1208 ToErase = &MI;
1209 Simplified = true;
1210 NumOptADDLIs++;
1211 break;
1212 }
1213 case PPC::RLDICR: {
1214 Simplified |= emitRLDICWhenLoweringJumpTables(MI, ToErase) ||
1215 combineSEXTAndSHL(MI, ToErase);
1216 break;
1217 }
1218 case PPC::ANDI_rec:
1219 case PPC::ANDI8_rec:
1220 case PPC::ANDIS_rec:
1221 case PPC::ANDIS8_rec: {
1222 Register TrueReg =
1223 TRI->lookThruCopyLike(SrcReg: MI.getOperand(i: 1).getReg(), MRI);
1224 if (!TrueReg.isVirtual() || !MRI->hasOneNonDBGUse(RegNo: TrueReg))
1225 break;
1226
1227 MachineInstr *SrcMI = MRI->getVRegDef(Reg: TrueReg);
1228 if (!SrcMI)
1229 break;
1230
1231 unsigned SrcOpCode = SrcMI->getOpcode();
1232 if (SrcOpCode != PPC::RLDICL && SrcOpCode != PPC::RLDICR)
1233 break;
1234
1235 Register SrcReg, DstReg;
1236 SrcReg = SrcMI->getOperand(i: 1).getReg();
1237 DstReg = MI.getOperand(i: 1).getReg();
1238 const TargetRegisterClass *SrcRC = MRI->getRegClassOrNull(Reg: SrcReg);
1239 const TargetRegisterClass *DstRC = MRI->getRegClassOrNull(Reg: DstReg);
1240 if (DstRC != SrcRC)
1241 break;
1242
1243 uint64_t AndImm = MI.getOperand(i: 2).getImm();
1244 if (MI.getOpcode() == PPC::ANDIS_rec ||
1245 MI.getOpcode() == PPC::ANDIS8_rec)
1246 AndImm <<= 16;
1247 uint64_t LZeroAndImm = llvm::countl_zero<uint64_t>(Val: AndImm);
1248 uint64_t RZeroAndImm = llvm::countr_zero<uint64_t>(Val: AndImm);
1249 uint64_t ImmSrc = SrcMI->getOperand(i: 3).getImm();
1250
1251 // We can transfer `RLDICL/RLDICR + ANDI_rec/ANDIS_rec` to `ANDI_rec 0`
1252 // if all bits to AND are already zero in the input.
1253 bool PatternResultZero =
1254 (SrcOpCode == PPC::RLDICL && (RZeroAndImm + ImmSrc > 63)) ||
1255 (SrcOpCode == PPC::RLDICR && LZeroAndImm > ImmSrc);
1256
1257 // We can eliminate RLDICL/RLDICR if it's used to clear bits and all
1258 // bits cleared will be ANDed with 0 by ANDI_rec/ANDIS_rec.
1259 bool PatternRemoveRotate =
1260 SrcMI->getOperand(i: 2).getImm() == 0 &&
1261 ((SrcOpCode == PPC::RLDICL && LZeroAndImm >= ImmSrc) ||
1262 (SrcOpCode == PPC::RLDICR && (RZeroAndImm + ImmSrc > 63)));
1263
1264 if (!PatternResultZero && !PatternRemoveRotate)
1265 break;
1266
1267 LLVM_DEBUG(dbgs() << "Combining pair: ");
1268 LLVM_DEBUG(SrcMI->dump());
1269 LLVM_DEBUG(MI.dump());
1270 if (PatternResultZero)
1271 MI.getOperand(i: 2).setImm(0);
1272 MI.getOperand(i: 1).setReg(SrcMI->getOperand(i: 1).getReg());
1273 LLVM_DEBUG(dbgs() << "To: ");
1274 LLVM_DEBUG(MI.dump());
1275 addRegToUpdate(MI.getOperand(1).getReg());
1276 addRegToUpdate(SrcMI->getOperand(0).getReg());
1277 Simplified = true;
1278 break;
1279 }
1280 case PPC::RLWINM:
1281 case PPC::RLWINM_rec:
1282 case PPC::RLWINM8:
1283 case PPC::RLWINM8_rec: {
1284 // We might replace operand 1 of the instruction which will
1285 // require we recompute kill flags for it.
1286 Register OrigOp1Reg = MI.getOperand(i: 1).isReg()
1287 ? MI.getOperand(i: 1).getReg()
1288 : PPC::NoRegister;
1289 Simplified = TII->combineRLWINM(MI, ToErase: &ToErase);
1290 if (Simplified) {
1291 addRegToUpdate(OrigOp1Reg);
1292 if (MI.getOperand(i: 1).isReg())
1293 addRegToUpdate(MI.getOperand(1).getReg());
1294 if (ToErase && ToErase->getOperand(i: 1).isReg())
1295 for (auto UseReg : ToErase->explicit_uses())
1296 if (UseReg.isReg())
1297 addRegToUpdate(UseReg.getReg());
1298 ++NumRotatesCollapsed;
1299 }
1300 break;
1301 }
1302 // We will replace TD/TW/TDI/TWI with an unconditional trap if it will
1303 // always trap, we will delete the node if it will never trap.
1304 case PPC::TDI:
1305 case PPC::TWI:
1306 case PPC::TD:
1307 case PPC::TW: {
1308 if (!EnableTrapOptimization) break;
1309 MachineInstr *LiMI1 = getVRegDefOrNull(Op: &MI.getOperand(i: 1), MRI);
1310 MachineInstr *LiMI2 = getVRegDefOrNull(Op: &MI.getOperand(i: 2), MRI);
1311 bool IsOperand2Immediate = MI.getOperand(i: 2).isImm();
1312 // We can only do the optimization if we can get immediates
1313 // from both operands
1314 if (!(LiMI1 && (LiMI1->getOpcode() == PPC::LI ||
1315 LiMI1->getOpcode() == PPC::LI8)))
1316 break;
1317 if (!IsOperand2Immediate &&
1318 !(LiMI2 && (LiMI2->getOpcode() == PPC::LI ||
1319 LiMI2->getOpcode() == PPC::LI8)))
1320 break;
1321
1322 auto ImmOperand0 = MI.getOperand(i: 0).getImm();
1323 auto ImmOperand1 = LiMI1->getOperand(i: 1).getImm();
1324 auto ImmOperand2 = IsOperand2Immediate ? MI.getOperand(i: 2).getImm()
1325 : LiMI2->getOperand(i: 1).getImm();
1326
1327 // We will replace the MI with an unconditional trap if it will always
1328 // trap.
1329 if ((ImmOperand0 == 31) ||
1330 ((ImmOperand0 & 0x10) &&
1331 ((int64_t)ImmOperand1 < (int64_t)ImmOperand2)) ||
1332 ((ImmOperand0 & 0x8) &&
1333 ((int64_t)ImmOperand1 > (int64_t)ImmOperand2)) ||
1334 ((ImmOperand0 & 0x2) &&
1335 ((uint64_t)ImmOperand1 < (uint64_t)ImmOperand2)) ||
1336 ((ImmOperand0 & 0x1) &&
1337 ((uint64_t)ImmOperand1 > (uint64_t)ImmOperand2)) ||
1338 ((ImmOperand0 & 0x4) && (ImmOperand1 == ImmOperand2))) {
1339 BuildMI(BB&: MBB, I: &MI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: PPC::TRAP));
1340 TrapOpt = true;
1341 }
1342 // We will delete the MI if it will never trap.
1343 ToErase = &MI;
1344 Simplified = true;
1345 break;
1346 }
1347 }
1348 }
1349
1350 // If the last instruction was marked for elimination,
1351 // remove it now.
1352 if (ToErase) {
1353 recomputeLVForDyingInstr();
1354 ToErase->eraseFromParent();
1355 ToErase = nullptr;
1356 }
1357 // Reset TrapOpt to false at the end of the basic block.
1358 if (EnableTrapOptimization)
1359 TrapOpt = false;
1360 }
1361
1362 // Eliminate all the TOC save instructions which are redundant.
1363 Simplified |= eliminateRedundantTOCSaves(TOCSaves);
1364 PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
1365 if (FI->mustSaveTOC())
1366 NumTOCSavesInPrologue++;
1367
1368 // We try to eliminate redundant compare instruction.
1369 Simplified |= eliminateRedundantCompare();
1370
1371 // If we have made any modifications and added any registers to the set of
1372 // registers for which we need to update the kill flags, do so by recomputing
1373 // LiveVariables for those registers.
1374 for (Register Reg : RegsToUpdate) {
1375 if (!MRI->reg_empty(RegNo: Reg))
1376 LV->recomputeForSingleDefVirtReg(Reg);
1377 }
1378 return Simplified;
1379}
1380
1381// helper functions for eliminateRedundantCompare
1382static bool isEqOrNe(MachineInstr *BI) {
1383 PPC::Predicate Pred = (PPC::Predicate)BI->getOperand(i: 0).getImm();
1384 unsigned PredCond = PPC::getPredicateCondition(Opcode: Pred);
1385 return (PredCond == PPC::PRED_EQ || PredCond == PPC::PRED_NE);
1386}
1387
1388static bool isSupportedCmpOp(unsigned opCode) {
1389 return (opCode == PPC::CMPLD || opCode == PPC::CMPD ||
1390 opCode == PPC::CMPLW || opCode == PPC::CMPW ||
1391 opCode == PPC::CMPLDI || opCode == PPC::CMPDI ||
1392 opCode == PPC::CMPLWI || opCode == PPC::CMPWI);
1393}
1394
1395static bool is64bitCmpOp(unsigned opCode) {
1396 return (opCode == PPC::CMPLD || opCode == PPC::CMPD ||
1397 opCode == PPC::CMPLDI || opCode == PPC::CMPDI);
1398}
1399
1400static bool isSignedCmpOp(unsigned opCode) {
1401 return (opCode == PPC::CMPD || opCode == PPC::CMPW ||
1402 opCode == PPC::CMPDI || opCode == PPC::CMPWI);
1403}
1404
1405static unsigned getSignedCmpOpCode(unsigned opCode) {
1406 if (opCode == PPC::CMPLD) return PPC::CMPD;
1407 if (opCode == PPC::CMPLW) return PPC::CMPW;
1408 if (opCode == PPC::CMPLDI) return PPC::CMPDI;
1409 if (opCode == PPC::CMPLWI) return PPC::CMPWI;
1410 return opCode;
1411}
1412
1413// We can decrement immediate x in (GE x) by changing it to (GT x-1) or
1414// (LT x) to (LE x-1)
1415static unsigned getPredicateToDecImm(MachineInstr *BI, MachineInstr *CMPI) {
1416 uint64_t Imm = CMPI->getOperand(i: 2).getImm();
1417 bool SignedCmp = isSignedCmpOp(opCode: CMPI->getOpcode());
1418 if ((!SignedCmp && Imm == 0) || (SignedCmp && Imm == 0x8000))
1419 return 0;
1420
1421 PPC::Predicate Pred = (PPC::Predicate)BI->getOperand(i: 0).getImm();
1422 unsigned PredCond = PPC::getPredicateCondition(Opcode: Pred);
1423 unsigned PredHint = PPC::getPredicateHint(Opcode: Pred);
1424 if (PredCond == PPC::PRED_GE)
1425 return PPC::getPredicate(Condition: PPC::PRED_GT, Hint: PredHint);
1426 if (PredCond == PPC::PRED_LT)
1427 return PPC::getPredicate(Condition: PPC::PRED_LE, Hint: PredHint);
1428
1429 return 0;
1430}
1431
1432// We can increment immediate x in (GT x) by changing it to (GE x+1) or
1433// (LE x) to (LT x+1)
1434static unsigned getPredicateToIncImm(MachineInstr *BI, MachineInstr *CMPI) {
1435 uint64_t Imm = CMPI->getOperand(i: 2).getImm();
1436 bool SignedCmp = isSignedCmpOp(opCode: CMPI->getOpcode());
1437 if ((!SignedCmp && Imm == 0xFFFF) || (SignedCmp && Imm == 0x7FFF))
1438 return 0;
1439
1440 PPC::Predicate Pred = (PPC::Predicate)BI->getOperand(i: 0).getImm();
1441 unsigned PredCond = PPC::getPredicateCondition(Opcode: Pred);
1442 unsigned PredHint = PPC::getPredicateHint(Opcode: Pred);
1443 if (PredCond == PPC::PRED_GT)
1444 return PPC::getPredicate(Condition: PPC::PRED_GE, Hint: PredHint);
1445 if (PredCond == PPC::PRED_LE)
1446 return PPC::getPredicate(Condition: PPC::PRED_LT, Hint: PredHint);
1447
1448 return 0;
1449}
1450
1451// This takes a Phi node and returns a register value for the specified BB.
1452static unsigned getIncomingRegForBlock(MachineInstr *Phi,
1453 MachineBasicBlock *MBB) {
1454 for (unsigned I = 2, E = Phi->getNumOperands() + 1; I != E; I += 2) {
1455 MachineOperand &MO = Phi->getOperand(i: I);
1456 if (MO.getMBB() == MBB)
1457 return Phi->getOperand(i: I-1).getReg();
1458 }
1459 llvm_unreachable("invalid src basic block for this Phi node\n");
1460 return 0;
1461}
1462
1463// This function tracks the source of the register through register copy.
1464// If BB1 and BB2 are non-NULL, we also track PHI instruction in BB2
1465// assuming that the control comes from BB1 into BB2.
1466static unsigned getSrcVReg(unsigned Reg, MachineBasicBlock *BB1,
1467 MachineBasicBlock *BB2, MachineRegisterInfo *MRI) {
1468 unsigned SrcReg = Reg;
1469 while (true) {
1470 unsigned NextReg = SrcReg;
1471 MachineInstr *Inst = MRI->getVRegDef(Reg: SrcReg);
1472 if (BB1 && Inst->getOpcode() == PPC::PHI && Inst->getParent() == BB2) {
1473 NextReg = getIncomingRegForBlock(Phi: Inst, MBB: BB1);
1474 // We track through PHI only once to avoid infinite loop.
1475 BB1 = nullptr;
1476 }
1477 else if (Inst->isFullCopy())
1478 NextReg = Inst->getOperand(i: 1).getReg();
1479 if (NextReg == SrcReg || !Register::isVirtualRegister(Reg: NextReg))
1480 break;
1481 SrcReg = NextReg;
1482 }
1483 return SrcReg;
1484}
1485
1486static bool eligibleForCompareElimination(MachineBasicBlock &MBB,
1487 MachineBasicBlock *&PredMBB,
1488 MachineBasicBlock *&MBBtoMoveCmp,
1489 MachineRegisterInfo *MRI) {
1490
1491 auto isEligibleBB = [&](MachineBasicBlock &BB) {
1492 auto BII = BB.getFirstInstrTerminator();
1493 // We optimize BBs ending with a conditional branch.
1494 // We check only for BCC here, not BCCLR, because BCCLR
1495 // will be formed only later in the pipeline.
1496 if (BB.succ_size() == 2 &&
1497 BII != BB.instr_end() &&
1498 (*BII).getOpcode() == PPC::BCC &&
1499 (*BII).getOperand(i: 1).isReg()) {
1500 // We optimize only if the condition code is used only by one BCC.
1501 Register CndReg = (*BII).getOperand(i: 1).getReg();
1502 if (!CndReg.isVirtual() || !MRI->hasOneNonDBGUse(RegNo: CndReg))
1503 return false;
1504
1505 MachineInstr *CMPI = MRI->getVRegDef(Reg: CndReg);
1506 // We assume compare and branch are in the same BB for ease of analysis.
1507 if (CMPI->getParent() != &BB)
1508 return false;
1509
1510 // We skip this BB if a physical register is used in comparison.
1511 for (MachineOperand &MO : CMPI->operands())
1512 if (MO.isReg() && !MO.getReg().isVirtual())
1513 return false;
1514
1515 return true;
1516 }
1517 return false;
1518 };
1519
1520 // If this BB has more than one successor, we can create a new BB and
1521 // move the compare instruction in the new BB.
1522 // So far, we do not move compare instruction to a BB having multiple
1523 // successors to avoid potentially increasing code size.
1524 auto isEligibleForMoveCmp = [](MachineBasicBlock &BB) {
1525 return BB.succ_size() == 1;
1526 };
1527
1528 if (!isEligibleBB(MBB))
1529 return false;
1530
1531 unsigned NumPredBBs = MBB.pred_size();
1532 if (NumPredBBs == 1) {
1533 MachineBasicBlock *TmpMBB = *MBB.pred_begin();
1534 if (isEligibleBB(*TmpMBB)) {
1535 PredMBB = TmpMBB;
1536 MBBtoMoveCmp = nullptr;
1537 return true;
1538 }
1539 }
1540 else if (NumPredBBs == 2) {
1541 // We check for partially redundant case.
1542 // So far, we support cases with only two predecessors
1543 // to avoid increasing the number of instructions.
1544 MachineBasicBlock::pred_iterator PI = MBB.pred_begin();
1545 MachineBasicBlock *Pred1MBB = *PI;
1546 MachineBasicBlock *Pred2MBB = *(PI+1);
1547
1548 if (isEligibleBB(*Pred1MBB) && isEligibleForMoveCmp(*Pred2MBB)) {
1549 // We assume Pred1MBB is the BB containing the compare to be merged and
1550 // Pred2MBB is the BB to which we will append a compare instruction.
1551 // Proceed as is if Pred1MBB is different from MBB.
1552 }
1553 else if (isEligibleBB(*Pred2MBB) && isEligibleForMoveCmp(*Pred1MBB)) {
1554 // We need to swap Pred1MBB and Pred2MBB to canonicalize.
1555 std::swap(a&: Pred1MBB, b&: Pred2MBB);
1556 }
1557 else return false;
1558
1559 if (Pred1MBB == &MBB)
1560 return false;
1561
1562 // Here, Pred2MBB is the BB to which we need to append a compare inst.
1563 // We cannot move the compare instruction if operands are not available
1564 // in Pred2MBB (i.e. defined in MBB by an instruction other than PHI).
1565 MachineInstr *BI = &*MBB.getFirstInstrTerminator();
1566 MachineInstr *CMPI = MRI->getVRegDef(Reg: BI->getOperand(i: 1).getReg());
1567 for (int I = 1; I <= 2; I++)
1568 if (CMPI->getOperand(i: I).isReg()) {
1569 MachineInstr *Inst = MRI->getVRegDef(Reg: CMPI->getOperand(i: I).getReg());
1570 if (Inst->getParent() == &MBB && Inst->getOpcode() != PPC::PHI)
1571 return false;
1572 }
1573
1574 PredMBB = Pred1MBB;
1575 MBBtoMoveCmp = Pred2MBB;
1576 return true;
1577 }
1578
1579 return false;
1580}
1581
1582// This function will iterate over the input map containing a pair of TOC save
1583// instruction and a flag. The flag will be set to false if the TOC save is
1584// proven redundant. This function will erase from the basic block all the TOC
1585// saves marked as redundant.
1586bool PPCMIPeephole::eliminateRedundantTOCSaves(
1587 std::map<MachineInstr *, bool> &TOCSaves) {
1588 bool Simplified = false;
1589 int NumKept = 0;
1590 for (auto TOCSave : TOCSaves) {
1591 if (!TOCSave.second) {
1592 TOCSave.first->eraseFromParent();
1593 RemoveTOCSave++;
1594 Simplified = true;
1595 } else {
1596 NumKept++;
1597 }
1598 }
1599
1600 if (NumKept > 1)
1601 MultiTOCSaves++;
1602
1603 return Simplified;
1604}
1605
1606// If multiple conditional branches are executed based on the (essentially)
1607// same comparison, we merge compare instructions into one and make multiple
1608// conditional branches on this comparison.
1609// For example,
1610// if (a == 0) { ... }
1611// else if (a < 0) { ... }
1612// can be executed by one compare and two conditional branches instead of
1613// two pairs of a compare and a conditional branch.
1614//
1615// This method merges two compare instructions in two MBBs and modifies the
1616// compare and conditional branch instructions if needed.
1617// For the above example, the input for this pass looks like:
1618// cmplwi r3, 0
1619// beq 0, .LBB0_3
1620// cmpwi r3, -1
1621// bgt 0, .LBB0_4
1622// So, before merging two compares, we need to modify these instructions as
1623// cmpwi r3, 0 ; cmplwi and cmpwi yield same result for beq
1624// beq 0, .LBB0_3
1625// cmpwi r3, 0 ; greather than -1 means greater or equal to 0
1626// bge 0, .LBB0_4
1627
1628bool PPCMIPeephole::eliminateRedundantCompare() {
1629 bool Simplified = false;
1630
1631 for (MachineBasicBlock &MBB2 : *MF) {
1632 MachineBasicBlock *MBB1 = nullptr, *MBBtoMoveCmp = nullptr;
1633
1634 // For fully redundant case, we select two basic blocks MBB1 and MBB2
1635 // as an optimization target if
1636 // - both MBBs end with a conditional branch,
1637 // - MBB1 is the only predecessor of MBB2, and
1638 // - compare does not take a physical register as a operand in both MBBs.
1639 // In this case, eligibleForCompareElimination sets MBBtoMoveCmp nullptr.
1640 //
1641 // As partially redundant case, we additionally handle if MBB2 has one
1642 // additional predecessor, which has only one successor (MBB2).
1643 // In this case, we move the compare instruction originally in MBB2 into
1644 // MBBtoMoveCmp. This partially redundant case is typically appear by
1645 // compiling a while loop; here, MBBtoMoveCmp is the loop preheader.
1646 //
1647 // Overview of CFG of related basic blocks
1648 // Fully redundant case Partially redundant case
1649 // -------- ---------------- --------
1650 // | MBB1 | (w/ 2 succ) | MBBtoMoveCmp | | MBB1 | (w/ 2 succ)
1651 // -------- ---------------- --------
1652 // | \ (w/ 1 succ) \ | \
1653 // | \ \ | \
1654 // | \ |
1655 // -------- --------
1656 // | MBB2 | (w/ 1 pred | MBB2 | (w/ 2 pred
1657 // -------- and 2 succ) -------- and 2 succ)
1658 // | \ | \
1659 // | \ | \
1660 //
1661 if (!eligibleForCompareElimination(MBB&: MBB2, PredMBB&: MBB1, MBBtoMoveCmp, MRI))
1662 continue;
1663
1664 MachineInstr *BI1 = &*MBB1->getFirstInstrTerminator();
1665 MachineInstr *CMPI1 = MRI->getVRegDef(Reg: BI1->getOperand(i: 1).getReg());
1666
1667 MachineInstr *BI2 = &*MBB2.getFirstInstrTerminator();
1668 MachineInstr *CMPI2 = MRI->getVRegDef(Reg: BI2->getOperand(i: 1).getReg());
1669 bool IsPartiallyRedundant = (MBBtoMoveCmp != nullptr);
1670
1671 // We cannot optimize an unsupported compare opcode or
1672 // a mix of 32-bit and 64-bit comparisons
1673 if (!isSupportedCmpOp(opCode: CMPI1->getOpcode()) ||
1674 !isSupportedCmpOp(opCode: CMPI2->getOpcode()) ||
1675 is64bitCmpOp(opCode: CMPI1->getOpcode()) != is64bitCmpOp(opCode: CMPI2->getOpcode()))
1676 continue;
1677
1678 unsigned NewOpCode = 0;
1679 unsigned NewPredicate1 = 0, NewPredicate2 = 0;
1680 int16_t Imm1 = 0, NewImm1 = 0, Imm2 = 0, NewImm2 = 0;
1681 bool SwapOperands = false;
1682
1683 if (CMPI1->getOpcode() != CMPI2->getOpcode()) {
1684 // Typically, unsigned comparison is used for equality check, but
1685 // we replace it with a signed comparison if the comparison
1686 // to be merged is a signed comparison.
1687 // In other cases of opcode mismatch, we cannot optimize this.
1688
1689 // We cannot change opcode when comparing against an immediate
1690 // if the most significant bit of the immediate is one
1691 // due to the difference in sign extension.
1692 auto CmpAgainstImmWithSignBit = [](MachineInstr *I) {
1693 if (!I->getOperand(i: 2).isImm())
1694 return false;
1695 int16_t Imm = (int16_t)I->getOperand(i: 2).getImm();
1696 return Imm < 0;
1697 };
1698
1699 if (isEqOrNe(BI: BI2) && !CmpAgainstImmWithSignBit(CMPI2) &&
1700 CMPI1->getOpcode() == getSignedCmpOpCode(opCode: CMPI2->getOpcode()))
1701 NewOpCode = CMPI1->getOpcode();
1702 else if (isEqOrNe(BI: BI1) && !CmpAgainstImmWithSignBit(CMPI1) &&
1703 getSignedCmpOpCode(opCode: CMPI1->getOpcode()) == CMPI2->getOpcode())
1704 NewOpCode = CMPI2->getOpcode();
1705 else continue;
1706 }
1707
1708 if (CMPI1->getOperand(i: 2).isReg() && CMPI2->getOperand(i: 2).isReg()) {
1709 // In case of comparisons between two registers, these two registers
1710 // must be same to merge two comparisons.
1711 unsigned Cmp1Operand1 = getSrcVReg(Reg: CMPI1->getOperand(i: 1).getReg(),
1712 BB1: nullptr, BB2: nullptr, MRI);
1713 unsigned Cmp1Operand2 = getSrcVReg(Reg: CMPI1->getOperand(i: 2).getReg(),
1714 BB1: nullptr, BB2: nullptr, MRI);
1715 unsigned Cmp2Operand1 = getSrcVReg(Reg: CMPI2->getOperand(i: 1).getReg(),
1716 BB1: MBB1, BB2: &MBB2, MRI);
1717 unsigned Cmp2Operand2 = getSrcVReg(Reg: CMPI2->getOperand(i: 2).getReg(),
1718 BB1: MBB1, BB2: &MBB2, MRI);
1719
1720 if (Cmp1Operand1 == Cmp2Operand1 && Cmp1Operand2 == Cmp2Operand2) {
1721 // Same pair of registers in the same order; ready to merge as is.
1722 }
1723 else if (Cmp1Operand1 == Cmp2Operand2 && Cmp1Operand2 == Cmp2Operand1) {
1724 // Same pair of registers in different order.
1725 // We reverse the predicate to merge compare instructions.
1726 PPC::Predicate Pred = (PPC::Predicate)BI2->getOperand(i: 0).getImm();
1727 NewPredicate2 = (unsigned)PPC::getSwappedPredicate(Opcode: Pred);
1728 // In case of partial redundancy, we need to swap operands
1729 // in another compare instruction.
1730 SwapOperands = true;
1731 }
1732 else continue;
1733 }
1734 else if (CMPI1->getOperand(i: 2).isImm() && CMPI2->getOperand(i: 2).isImm()) {
1735 // In case of comparisons between a register and an immediate,
1736 // the operand register must be same for two compare instructions.
1737 unsigned Cmp1Operand1 = getSrcVReg(Reg: CMPI1->getOperand(i: 1).getReg(),
1738 BB1: nullptr, BB2: nullptr, MRI);
1739 unsigned Cmp2Operand1 = getSrcVReg(Reg: CMPI2->getOperand(i: 1).getReg(),
1740 BB1: MBB1, BB2: &MBB2, MRI);
1741 if (Cmp1Operand1 != Cmp2Operand1)
1742 continue;
1743
1744 NewImm1 = Imm1 = (int16_t)CMPI1->getOperand(i: 2).getImm();
1745 NewImm2 = Imm2 = (int16_t)CMPI2->getOperand(i: 2).getImm();
1746
1747 // If immediate are not same, we try to adjust by changing predicate;
1748 // e.g. GT imm means GE (imm+1).
1749 if (Imm1 != Imm2 && (!isEqOrNe(BI: BI2) || !isEqOrNe(BI: BI1))) {
1750 int Diff = Imm1 - Imm2;
1751 if (Diff < -2 || Diff > 2)
1752 continue;
1753
1754 unsigned PredToInc1 = getPredicateToIncImm(BI: BI1, CMPI: CMPI1);
1755 unsigned PredToDec1 = getPredicateToDecImm(BI: BI1, CMPI: CMPI1);
1756 unsigned PredToInc2 = getPredicateToIncImm(BI: BI2, CMPI: CMPI2);
1757 unsigned PredToDec2 = getPredicateToDecImm(BI: BI2, CMPI: CMPI2);
1758 if (Diff == 2) {
1759 if (PredToInc2 && PredToDec1) {
1760 NewPredicate2 = PredToInc2;
1761 NewPredicate1 = PredToDec1;
1762 NewImm2++;
1763 NewImm1--;
1764 }
1765 }
1766 else if (Diff == 1) {
1767 if (PredToInc2) {
1768 NewImm2++;
1769 NewPredicate2 = PredToInc2;
1770 }
1771 else if (PredToDec1) {
1772 NewImm1--;
1773 NewPredicate1 = PredToDec1;
1774 }
1775 }
1776 else if (Diff == -1) {
1777 if (PredToDec2) {
1778 NewImm2--;
1779 NewPredicate2 = PredToDec2;
1780 }
1781 else if (PredToInc1) {
1782 NewImm1++;
1783 NewPredicate1 = PredToInc1;
1784 }
1785 }
1786 else if (Diff == -2) {
1787 if (PredToDec2 && PredToInc1) {
1788 NewPredicate2 = PredToDec2;
1789 NewPredicate1 = PredToInc1;
1790 NewImm2--;
1791 NewImm1++;
1792 }
1793 }
1794 }
1795
1796 // We cannot merge two compares if the immediates are not same.
1797 if (NewImm2 != NewImm1)
1798 continue;
1799 }
1800
1801 LLVM_DEBUG(dbgs() << "Optimize two pairs of compare and branch:\n");
1802 LLVM_DEBUG(CMPI1->dump());
1803 LLVM_DEBUG(BI1->dump());
1804 LLVM_DEBUG(CMPI2->dump());
1805 LLVM_DEBUG(BI2->dump());
1806 for (const MachineOperand &MO : CMPI1->operands())
1807 if (MO.isReg())
1808 addRegToUpdate(MO.getReg());
1809 for (const MachineOperand &MO : CMPI2->operands())
1810 if (MO.isReg())
1811 addRegToUpdate(MO.getReg());
1812
1813 // We adjust opcode, predicates and immediate as we determined above.
1814 if (NewOpCode != 0 && NewOpCode != CMPI1->getOpcode()) {
1815 CMPI1->setDesc(TII->get(Opcode: NewOpCode));
1816 }
1817 if (NewPredicate1) {
1818 BI1->getOperand(i: 0).setImm(NewPredicate1);
1819 }
1820 if (NewPredicate2) {
1821 BI2->getOperand(i: 0).setImm(NewPredicate2);
1822 }
1823 if (NewImm1 != Imm1) {
1824 CMPI1->getOperand(i: 2).setImm(NewImm1);
1825 }
1826
1827 if (IsPartiallyRedundant) {
1828 // We touch up the compare instruction in MBB2 and move it to
1829 // a previous BB to handle partially redundant case.
1830 if (SwapOperands) {
1831 Register Op1 = CMPI2->getOperand(i: 1).getReg();
1832 Register Op2 = CMPI2->getOperand(i: 2).getReg();
1833 CMPI2->getOperand(i: 1).setReg(Op2);
1834 CMPI2->getOperand(i: 2).setReg(Op1);
1835 }
1836 if (NewImm2 != Imm2)
1837 CMPI2->getOperand(i: 2).setImm(NewImm2);
1838
1839 for (int I = 1; I <= 2; I++) {
1840 if (CMPI2->getOperand(i: I).isReg()) {
1841 MachineInstr *Inst = MRI->getVRegDef(Reg: CMPI2->getOperand(i: I).getReg());
1842 if (Inst->getParent() != &MBB2)
1843 continue;
1844
1845 assert(Inst->getOpcode() == PPC::PHI &&
1846 "We cannot support if an operand comes from this BB.");
1847 unsigned SrcReg = getIncomingRegForBlock(Phi: Inst, MBB: MBBtoMoveCmp);
1848 CMPI2->getOperand(i: I).setReg(SrcReg);
1849 addRegToUpdate(SrcReg);
1850 }
1851 }
1852 auto I = MachineBasicBlock::iterator(MBBtoMoveCmp->getFirstTerminator());
1853 MBBtoMoveCmp->splice(Where: I, Other: &MBB2, From: MachineBasicBlock::iterator(CMPI2));
1854
1855 DebugLoc DL = CMPI2->getDebugLoc();
1856 Register NewVReg = MRI->createVirtualRegister(RegClass: &PPC::CRRCRegClass);
1857 BuildMI(BB&: MBB2, I: MBB2.begin(), MIMD: DL,
1858 MCID: TII->get(Opcode: PPC::PHI), DestReg: NewVReg)
1859 .addReg(RegNo: BI1->getOperand(i: 1).getReg()).addMBB(MBB: MBB1)
1860 .addReg(RegNo: BI2->getOperand(i: 1).getReg()).addMBB(MBB: MBBtoMoveCmp);
1861 BI2->getOperand(i: 1).setReg(NewVReg);
1862 addRegToUpdate(NewVReg);
1863 }
1864 else {
1865 // We finally eliminate compare instruction in MBB2.
1866 // We do not need to treat CMPI2 specially here in terms of re-computing
1867 // live variables even though it is being deleted because:
1868 // - It defines a register that has a single use (already checked in
1869 // eligibleForCompareElimination())
1870 // - The only user (BI2) is no longer using it so the register is dead (no
1871 // def, no uses)
1872 // - We do not attempt to recompute live variables for dead registers
1873 BI2->getOperand(i: 1).setReg(BI1->getOperand(i: 1).getReg());
1874 CMPI2->eraseFromParent();
1875 }
1876
1877 LLVM_DEBUG(dbgs() << "into a compare and two branches:\n");
1878 LLVM_DEBUG(CMPI1->dump());
1879 LLVM_DEBUG(BI1->dump());
1880 LLVM_DEBUG(BI2->dump());
1881 if (IsPartiallyRedundant) {
1882 LLVM_DEBUG(dbgs() << "The following compare is moved into "
1883 << printMBBReference(*MBBtoMoveCmp)
1884 << " to handle partial redundancy.\n");
1885 LLVM_DEBUG(CMPI2->dump());
1886 }
1887 Simplified = true;
1888 }
1889
1890 return Simplified;
1891}
1892
1893// We miss the opportunity to emit an RLDIC when lowering jump tables
1894// since ISEL sees only a single basic block. When selecting, the clear
1895// and shift left will be in different blocks.
1896bool PPCMIPeephole::emitRLDICWhenLoweringJumpTables(MachineInstr &MI,
1897 MachineInstr *&ToErase) {
1898 if (MI.getOpcode() != PPC::RLDICR)
1899 return false;
1900
1901 Register SrcReg = MI.getOperand(i: 1).getReg();
1902 if (!SrcReg.isVirtual())
1903 return false;
1904
1905 MachineInstr *SrcMI = MRI->getVRegDef(Reg: SrcReg);
1906 if (SrcMI->getOpcode() != PPC::RLDICL)
1907 return false;
1908
1909 MachineOperand MOpSHSrc = SrcMI->getOperand(i: 2);
1910 MachineOperand MOpMBSrc = SrcMI->getOperand(i: 3);
1911 MachineOperand MOpSHMI = MI.getOperand(i: 2);
1912 MachineOperand MOpMEMI = MI.getOperand(i: 3);
1913 if (!(MOpSHSrc.isImm() && MOpMBSrc.isImm() && MOpSHMI.isImm() &&
1914 MOpMEMI.isImm()))
1915 return false;
1916
1917 uint64_t SHSrc = MOpSHSrc.getImm();
1918 uint64_t MBSrc = MOpMBSrc.getImm();
1919 uint64_t SHMI = MOpSHMI.getImm();
1920 uint64_t MEMI = MOpMEMI.getImm();
1921 uint64_t NewSH = SHSrc + SHMI;
1922 uint64_t NewMB = MBSrc - SHMI;
1923 if (NewMB > 63 || NewSH > 63)
1924 return false;
1925
1926 // The bits cleared with RLDICL are [0, MBSrc).
1927 // The bits cleared with RLDICR are (MEMI, 63].
1928 // After the sequence, the bits cleared are:
1929 // [0, MBSrc-SHMI) and (MEMI, 63).
1930 //
1931 // The bits cleared with RLDIC are [0, NewMB) and (63-NewSH, 63].
1932 if ((63 - NewSH) != MEMI)
1933 return false;
1934
1935 LLVM_DEBUG(dbgs() << "Converting pair: ");
1936 LLVM_DEBUG(SrcMI->dump());
1937 LLVM_DEBUG(MI.dump());
1938
1939 MI.setDesc(TII->get(Opcode: PPC::RLDIC));
1940 MI.getOperand(i: 1).setReg(SrcMI->getOperand(i: 1).getReg());
1941 MI.getOperand(i: 2).setImm(NewSH);
1942 MI.getOperand(i: 3).setImm(NewMB);
1943 addRegToUpdate(MI.getOperand(1).getReg());
1944 addRegToUpdate(SrcMI->getOperand(0).getReg());
1945
1946 LLVM_DEBUG(dbgs() << "To: ");
1947 LLVM_DEBUG(MI.dump());
1948 NumRotatesCollapsed++;
1949 // If SrcReg has no non-debug use it's safe to delete its def SrcMI.
1950 if (MRI->use_nodbg_empty(RegNo: SrcReg)) {
1951 assert(!SrcMI->hasImplicitDef() &&
1952 "Not expecting an implicit def with this instr.");
1953 ToErase = SrcMI;
1954 }
1955 return true;
1956}
1957
1958// For case in LLVM IR
1959// entry:
1960// %iconv = sext i32 %index to i64
1961// br i1 undef label %true, label %false
1962// true:
1963// %ptr = getelementptr inbounds i32, i32* null, i64 %iconv
1964// ...
1965// PPCISelLowering::combineSHL fails to combine, because sext and shl are in
1966// different BBs when conducting instruction selection. We can do a peephole
1967// optimization to combine these two instructions into extswsli after
1968// instruction selection.
1969bool PPCMIPeephole::combineSEXTAndSHL(MachineInstr &MI,
1970 MachineInstr *&ToErase) {
1971 if (MI.getOpcode() != PPC::RLDICR)
1972 return false;
1973
1974 if (!MF->getSubtarget<PPCSubtarget>().isISA3_0())
1975 return false;
1976
1977 assert(MI.getNumOperands() == 4 && "RLDICR should have 4 operands");
1978
1979 MachineOperand MOpSHMI = MI.getOperand(i: 2);
1980 MachineOperand MOpMEMI = MI.getOperand(i: 3);
1981 if (!(MOpSHMI.isImm() && MOpMEMI.isImm()))
1982 return false;
1983
1984 uint64_t SHMI = MOpSHMI.getImm();
1985 uint64_t MEMI = MOpMEMI.getImm();
1986 if (SHMI + MEMI != 63)
1987 return false;
1988
1989 Register SrcReg = MI.getOperand(i: 1).getReg();
1990 if (!SrcReg.isVirtual())
1991 return false;
1992
1993 MachineInstr *SrcMI = MRI->getVRegDef(Reg: SrcReg);
1994 if (SrcMI->getOpcode() != PPC::EXTSW &&
1995 SrcMI->getOpcode() != PPC::EXTSW_32_64)
1996 return false;
1997
1998 // If the register defined by extsw has more than one use, combination is not
1999 // needed.
2000 if (!MRI->hasOneNonDBGUse(RegNo: SrcReg))
2001 return false;
2002
2003 assert(SrcMI->getNumOperands() == 2 && "EXTSW should have 2 operands");
2004 assert(SrcMI->getOperand(1).isReg() &&
2005 "EXTSW's second operand should be a register");
2006 if (!SrcMI->getOperand(i: 1).getReg().isVirtual())
2007 return false;
2008
2009 LLVM_DEBUG(dbgs() << "Combining pair: ");
2010 LLVM_DEBUG(SrcMI->dump());
2011 LLVM_DEBUG(MI.dump());
2012
2013 MachineInstr *NewInstr =
2014 BuildMI(BB&: *MI.getParent(), I: &MI, MIMD: MI.getDebugLoc(),
2015 MCID: SrcMI->getOpcode() == PPC::EXTSW ? TII->get(Opcode: PPC::EXTSWSLI)
2016 : TII->get(Opcode: PPC::EXTSWSLI_32_64),
2017 DestReg: MI.getOperand(i: 0).getReg())
2018 .add(MO: SrcMI->getOperand(i: 1))
2019 .add(MO: MOpSHMI);
2020 (void)NewInstr;
2021
2022 LLVM_DEBUG(dbgs() << "TO: ");
2023 LLVM_DEBUG(NewInstr->dump());
2024 ++NumEXTSWAndSLDICombined;
2025 ToErase = &MI;
2026 // SrcMI, which is extsw, is of no use now, but we don't erase it here so we
2027 // can recompute its kill flags. We run DCE immediately after this pass
2028 // to clean up dead instructions such as this.
2029 addRegToUpdate(NewInstr->getOperand(1).getReg());
2030 addRegToUpdate(SrcMI->getOperand(0).getReg());
2031 return true;
2032}
2033
2034} // end default namespace
2035
2036INITIALIZE_PASS_BEGIN(PPCMIPeephole, DEBUG_TYPE,
2037 "PowerPC MI Peephole Optimization", false, false)
2038INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfoWrapperPass)
2039INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
2040INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTreeWrapperPass)
2041INITIALIZE_PASS_DEPENDENCY(LiveVariablesWrapperPass)
2042INITIALIZE_PASS_END(PPCMIPeephole, DEBUG_TYPE,
2043 "PowerPC MI Peephole Optimization", false, false)
2044
2045char PPCMIPeephole::ID = 0;
2046FunctionPass*
2047llvm::createPPCMIPeepholePass() { return new PPCMIPeephole(); }
2048