1//===-------------- PPCMIPeephole.cpp - MI Peephole Cleanups -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===---------------------------------------------------------------------===//
8//
9// This pass performs peephole optimizations to clean up ugly code
10// sequences at the MachineInstruction layer. It runs at the end of
11// the SSA phases, following VSX swap removal. A pass of dead code
12// elimination follows this one for quick clean-up of any dead
13// instructions introduced here. Although we could do this as callbacks
14// from the generic peephole pass, this would have a couple of bad
15// effects: it might remove optimization opportunities for VSX swap
16// removal, and it would miss cleanups made possible following VSX
17// swap removal.
18//
19// NOTE: We run the verifier after this pass in Asserts/Debug builds so it
20// is important to keep the code valid after transformations.
21// Common causes of errors stem from violating the contract specified
22// by kill flags. Whenever a transformation changes the live range of
23// a register, that register should be added to the work list using
24// addRegToUpdate(RegsToUpdate, <Reg>). Furthermore, if a transformation
25// is changing the definition of a register (i.e. removing the single
26// definition of the original vreg), it needs to provide a dummy
27// definition of that register using addDummyDef(<MBB>, <Reg>).
28//===---------------------------------------------------------------------===//
29
30#include "MCTargetDesc/PPCMCTargetDesc.h"
31#include "MCTargetDesc/PPCPredicates.h"
32#include "PPC.h"
33#include "PPCInstrInfo.h"
34#include "PPCMachineFunctionInfo.h"
35#include "PPCTargetMachine.h"
36#include "llvm/ADT/Statistic.h"
37#include "llvm/CodeGen/LiveVariables.h"
38#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
39#include "llvm/CodeGen/MachineDominators.h"
40#include "llvm/CodeGen/MachineFrameInfo.h"
41#include "llvm/CodeGen/MachineFunctionPass.h"
42#include "llvm/CodeGen/MachineInstrBuilder.h"
43#include "llvm/CodeGen/MachinePostDominators.h"
44#include "llvm/CodeGen/MachineRegisterInfo.h"
45#include "llvm/IR/GlobalVariable.h"
46#include "llvm/InitializePasses.h"
47#include "llvm/Support/Debug.h"
48#include "llvm/Support/DebugCounter.h"
49
50using namespace llvm;
51
52#define DEBUG_TYPE "ppc-mi-peepholes"
53
54STATISTIC(RemoveTOCSave, "Number of TOC saves removed");
55STATISTIC(MultiTOCSaves,
56 "Number of functions with multiple TOC saves that must be kept");
57STATISTIC(NumTOCSavesInPrologue, "Number of TOC saves placed in the prologue");
58STATISTIC(NumEliminatedSExt, "Number of eliminated sign-extensions");
59STATISTIC(NumEliminatedZExt, "Number of eliminated zero-extensions");
60STATISTIC(NumOptADDLIs, "Number of optimized ADD instruction fed by LI");
61STATISTIC(NumConvertedToImmediateForm,
62 "Number of instructions converted to their immediate form");
63STATISTIC(NumFunctionsEnteredInMIPeephole,
64 "Number of functions entered in PPC MI Peepholes");
65STATISTIC(NumFixedPointIterations,
66 "Number of fixed-point iterations converting reg-reg instructions "
67 "to reg-imm ones");
68STATISTIC(NumRotatesCollapsed,
69 "Number of pairs of rotate left, clear left/right collapsed");
70STATISTIC(NumEXTSWAndSLDICombined,
71 "Number of pairs of EXTSW and SLDI combined as EXTSWSLI");
72STATISTIC(NumLoadImmZeroFoldedAndRemoved,
73 "Number of LI(8) reg, 0 that are folded to r0 and removed");
74
75static cl::opt<bool>
76FixedPointRegToImm("ppc-reg-to-imm-fixed-point", cl::Hidden, cl::init(Val: true),
77 cl::desc("Iterate to a fixed point when attempting to "
78 "convert reg-reg instructions to reg-imm"));
79
80static cl::opt<bool>
81ConvertRegReg("ppc-convert-rr-to-ri", cl::Hidden, cl::init(Val: true),
82 cl::desc("Convert eligible reg+reg instructions to reg+imm"));
83
84static cl::opt<bool>
85 EnableSExtElimination("ppc-eliminate-signext",
86 cl::desc("enable elimination of sign-extensions"),
87 cl::init(Val: true), cl::Hidden);
88
89static cl::opt<bool>
90 EnableZExtElimination("ppc-eliminate-zeroext",
91 cl::desc("enable elimination of zero-extensions"),
92 cl::init(Val: true), cl::Hidden);
93
94static cl::opt<bool>
95 EnableTrapOptimization("ppc-opt-conditional-trap",
96 cl::desc("enable optimization of conditional traps"),
97 cl::init(Val: false), cl::Hidden);
98
99DEBUG_COUNTER(
100 PeepholeXToICounter, "ppc-xtoi-peephole",
101 "Controls whether PPC reg+reg to reg+imm peephole is performed on a MI");
102
103DEBUG_COUNTER(PeepholePerOpCounter, "ppc-per-op-peephole",
104 "Controls whether PPC per opcode peephole is performed on a MI");
105
106namespace {
107
108struct PPCMIPeephole : public MachineFunctionPass {
109
110 static char ID;
111 const PPCInstrInfo *TII;
112 MachineFunction *MF;
113 MachineRegisterInfo *MRI;
114 LiveVariables *LV;
115
116 PPCMIPeephole() : MachineFunctionPass(ID) {}
117
118private:
119 MachineDominatorTree *MDT;
120 MachinePostDominatorTree *MPDT;
121 MachineBlockFrequencyInfo *MBFI;
122 BlockFrequency EntryFreq;
123 SmallSet<Register, 16> RegsToUpdate;
124
125 // Initialize class variables.
126 void initialize(MachineFunction &MFParm);
127
128 // Perform peepholes.
129 bool simplifyCode();
130
131 // Perform peepholes.
132 bool eliminateRedundantCompare();
133 bool eliminateRedundantTOCSaves(std::map<MachineInstr *, bool> &TOCSaves);
134 bool combineSEXTAndSHL(MachineInstr &MI, MachineInstr *&ToErase);
135 bool emitRLDICWhenLoweringJumpTables(MachineInstr &MI,
136 MachineInstr *&ToErase);
137 void UpdateTOCSaves(std::map<MachineInstr *, bool> &TOCSaves,
138 MachineInstr *MI);
139
140 // A number of transformations will eliminate the definition of a register
141 // as all of its uses will be removed. However, this leaves a register
142 // without a definition for LiveVariables. Such transformations should
143 // use this function to provide a dummy definition of the register that
144 // will simply be removed by DCE.
145 void addDummyDef(MachineBasicBlock &MBB, MachineInstr *At, Register Reg) {
146 BuildMI(BB&: MBB, I: At, MIMD: At->getDebugLoc(), MCID: TII->get(Opcode: PPC::IMPLICIT_DEF), DestReg: Reg);
147 }
148 void addRegToUpdateWithLine(Register Reg, int Line);
149 void convertUnprimedAccPHIs(const PPCInstrInfo *TII, MachineRegisterInfo *MRI,
150 SmallVectorImpl<MachineInstr *> &PHIs,
151 Register Dst);
152
153public:
154
155 void getAnalysisUsage(AnalysisUsage &AU) const override {
156 AU.addRequired<LiveVariablesWrapperPass>();
157 AU.addRequired<MachineDominatorTreeWrapperPass>();
158 AU.addRequired<MachinePostDominatorTreeWrapperPass>();
159 AU.addRequired<MachineBlockFrequencyInfoWrapperPass>();
160 AU.addPreserved<LiveVariablesWrapperPass>();
161 AU.addPreserved<MachineDominatorTreeWrapperPass>();
162 AU.addPreserved<MachinePostDominatorTreeWrapperPass>();
163 AU.addPreserved<MachineBlockFrequencyInfoWrapperPass>();
164 MachineFunctionPass::getAnalysisUsage(AU);
165 }
166
167 // Main entry point for this pass.
168 bool runOnMachineFunction(MachineFunction &MF) override {
169 initialize(MFParm&: MF);
170 // At this point, TOC pointer should not be used in a function that uses
171 // PC-Relative addressing.
172 assert((MF.getRegInfo().use_empty(PPC::X2) ||
173 !MF.getSubtarget<PPCSubtarget>().isUsingPCRelativeCalls()) &&
174 "TOC pointer used in a function using PC-Relative addressing!");
175 if (skipFunction(F: MF.getFunction()))
176 return false;
177 bool Changed = simplifyCode();
178#ifndef NDEBUG
179 if (Changed)
180 MF.verify(this, "Error in PowerPC MI Peephole optimization, compile with "
181 "-mllvm -disable-ppc-peephole");
182#endif
183 return Changed;
184 }
185};
186
187#define addRegToUpdate(R) addRegToUpdateWithLine(R, __LINE__)
188void PPCMIPeephole::addRegToUpdateWithLine(Register Reg, int Line) {
189 if (!Reg.isVirtual())
190 return;
191 if (RegsToUpdate.insert(V: Reg).second)
192 LLVM_DEBUG(dbgs() << "Adding register: " << printReg(Reg) << " on line "
193 << Line << " for re-computation of kill flags\n");
194}
195
196// Initialize class variables.
197void PPCMIPeephole::initialize(MachineFunction &MFParm) {
198 MF = &MFParm;
199 MRI = &MF->getRegInfo();
200 MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
201 MPDT = &getAnalysis<MachinePostDominatorTreeWrapperPass>().getPostDomTree();
202 MBFI = &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI();
203 LV = &getAnalysis<LiveVariablesWrapperPass>().getLV();
204 EntryFreq = MBFI->getEntryFreq();
205 TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo();
206 RegsToUpdate.clear();
207 LLVM_DEBUG(dbgs() << "*** PowerPC MI peephole pass ***\n\n");
208 LLVM_DEBUG(MF->dump());
209}
210
211static MachineInstr *getVRegDefOrNull(MachineOperand *Op,
212 MachineRegisterInfo *MRI) {
213 assert(Op && "Invalid Operand!");
214 if (!Op->isReg())
215 return nullptr;
216
217 Register Reg = Op->getReg();
218 if (!Reg.isVirtual())
219 return nullptr;
220
221 return MRI->getVRegDef(Reg);
222}
223
224// This function returns number of known zero bits in output of MI
225// starting from the most significant bit.
226static unsigned getKnownLeadingZeroCount(const unsigned Reg,
227 const PPCInstrInfo *TII,
228 const MachineRegisterInfo *MRI) {
229 MachineInstr *MI = MRI->getVRegDef(Reg);
230 unsigned Opcode = MI->getOpcode();
231 if (Opcode == PPC::RLDICL || Opcode == PPC::RLDICL_rec ||
232 Opcode == PPC::RLDCL || Opcode == PPC::RLDCL_rec)
233 return MI->getOperand(i: 3).getImm();
234
235 if ((Opcode == PPC::RLDIC || Opcode == PPC::RLDIC_rec) &&
236 MI->getOperand(i: 3).getImm() <= 63 - MI->getOperand(i: 2).getImm())
237 return MI->getOperand(i: 3).getImm();
238
239 if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINM_rec ||
240 Opcode == PPC::RLWNM || Opcode == PPC::RLWNM_rec ||
241 Opcode == PPC::RLWINM8 || Opcode == PPC::RLWNM8) &&
242 MI->getOperand(i: 3).getImm() <= MI->getOperand(i: 4).getImm())
243 return 32 + MI->getOperand(i: 3).getImm();
244
245 if (Opcode == PPC::ANDI_rec) {
246 uint16_t Imm = MI->getOperand(i: 2).getImm();
247 return 48 + llvm::countl_zero(Val: Imm);
248 }
249
250 if (Opcode == PPC::CNTLZW || Opcode == PPC::CNTLZW_rec ||
251 Opcode == PPC::CNTTZW || Opcode == PPC::CNTTZW_rec ||
252 Opcode == PPC::CNTLZW8 || Opcode == PPC::CNTTZW8)
253 // The result ranges from 0 to 32.
254 return 58;
255
256 if (Opcode == PPC::CNTLZD || Opcode == PPC::CNTLZD_rec ||
257 Opcode == PPC::CNTTZD || Opcode == PPC::CNTTZD_rec)
258 // The result ranges from 0 to 64.
259 return 57;
260
261 if (Opcode == PPC::LHZ || Opcode == PPC::LHZX ||
262 Opcode == PPC::LHZ8 || Opcode == PPC::LHZX8 ||
263 Opcode == PPC::LHZU || Opcode == PPC::LHZUX ||
264 Opcode == PPC::LHZU8 || Opcode == PPC::LHZUX8)
265 return 48;
266
267 if (Opcode == PPC::LBZ || Opcode == PPC::LBZX ||
268 Opcode == PPC::LBZ8 || Opcode == PPC::LBZX8 ||
269 Opcode == PPC::LBZU || Opcode == PPC::LBZUX ||
270 Opcode == PPC::LBZU8 || Opcode == PPC::LBZUX8)
271 return 56;
272
273 if (Opcode == PPC::AND || Opcode == PPC::AND8 || Opcode == PPC::AND_rec ||
274 Opcode == PPC::AND8_rec)
275 return std::max(
276 a: getKnownLeadingZeroCount(Reg: MI->getOperand(i: 1).getReg(), TII, MRI),
277 b: getKnownLeadingZeroCount(Reg: MI->getOperand(i: 2).getReg(), TII, MRI));
278
279 if (Opcode == PPC::OR || Opcode == PPC::OR8 || Opcode == PPC::XOR ||
280 Opcode == PPC::XOR8 || Opcode == PPC::OR_rec ||
281 Opcode == PPC::OR8_rec || Opcode == PPC::XOR_rec ||
282 Opcode == PPC::XOR8_rec)
283 return std::min(
284 a: getKnownLeadingZeroCount(Reg: MI->getOperand(i: 1).getReg(), TII, MRI),
285 b: getKnownLeadingZeroCount(Reg: MI->getOperand(i: 2).getReg(), TII, MRI));
286
287 if (TII->isZeroExtended(Reg, MRI))
288 return 32;
289
290 return 0;
291}
292
293// This function maintains a map for the pairs <TOC Save Instr, Keep>
294// Each time a new TOC save is encountered, it checks if any of the existing
295// ones are dominated by the new one. If so, it marks the existing one as
296// redundant by setting it's entry in the map as false. It then adds the new
297// instruction to the map with either true or false depending on if any
298// existing instructions dominated the new one.
299void PPCMIPeephole::UpdateTOCSaves(
300 std::map<MachineInstr *, bool> &TOCSaves, MachineInstr *MI) {
301 assert(TII->isTOCSaveMI(*MI) && "Expecting a TOC save instruction here");
302 // FIXME: Saving TOC in prologue hasn't been implemented well in AIX ABI part,
303 // here only support it under ELFv2.
304 if (MF->getSubtarget<PPCSubtarget>().isELFv2ABI()) {
305 PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
306
307 MachineBasicBlock *Entry = &MF->front();
308 BlockFrequency CurrBlockFreq = MBFI->getBlockFreq(MBB: MI->getParent());
309
310 // If the block in which the TOC save resides is in a block that
311 // post-dominates Entry, or a block that is hotter than entry (keep in mind
312 // that early MachineLICM has already run so the TOC save won't be hoisted)
313 // we can just do the save in the prologue.
314 if (CurrBlockFreq > EntryFreq || MPDT->dominates(A: MI->getParent(), B: Entry))
315 FI->setMustSaveTOC(true);
316
317 // If we are saving the TOC in the prologue, all the TOC saves can be
318 // removed from the code.
319 if (FI->mustSaveTOC()) {
320 for (auto &TOCSave : TOCSaves)
321 TOCSave.second = false;
322 // Add new instruction to map.
323 TOCSaves[MI] = false;
324 return;
325 }
326 }
327
328 bool Keep = true;
329 for (auto &I : TOCSaves) {
330 MachineInstr *CurrInst = I.first;
331 // If new instruction dominates an existing one, mark existing one as
332 // redundant.
333 if (I.second && MDT->dominates(A: MI, B: CurrInst))
334 I.second = false;
335 // Check if the new instruction is redundant.
336 if (MDT->dominates(A: CurrInst, B: MI)) {
337 Keep = false;
338 break;
339 }
340 }
341 // Add new instruction to map.
342 TOCSaves[MI] = Keep;
343}
344
345// This function returns a list of all PHI nodes in the tree starting from
346// the RootPHI node. We perform a BFS traversal to get an ordered list of nodes.
347// The list initially only contains the root PHI. When we visit a PHI node, we
348// add it to the list. We continue to look for other PHI node operands while
349// there are nodes to visit in the list. The function returns false if the
350// optimization cannot be applied on this tree.
351static bool collectUnprimedAccPHIs(MachineRegisterInfo *MRI,
352 MachineInstr *RootPHI,
353 SmallVectorImpl<MachineInstr *> &PHIs) {
354 PHIs.push_back(Elt: RootPHI);
355 unsigned VisitedIndex = 0;
356 while (VisitedIndex < PHIs.size()) {
357 MachineInstr *VisitedPHI = PHIs[VisitedIndex];
358 for (unsigned PHIOp = 1, NumOps = VisitedPHI->getNumOperands();
359 PHIOp != NumOps; PHIOp += 2) {
360 Register RegOp = VisitedPHI->getOperand(i: PHIOp).getReg();
361 if (!RegOp.isVirtual())
362 return false;
363 MachineInstr *Instr = MRI->getVRegDef(Reg: RegOp);
364 // While collecting the PHI nodes, we check if they can be converted (i.e.
365 // all the operands are either copies, implicit defs or PHI nodes).
366 unsigned Opcode = Instr->getOpcode();
367 if (Opcode == PPC::COPY) {
368 Register Reg = Instr->getOperand(i: 1).getReg();
369 if (!Reg.isVirtual() || MRI->getRegClass(Reg) != &PPC::ACCRCRegClass)
370 return false;
371 } else if (Opcode != PPC::IMPLICIT_DEF && Opcode != PPC::PHI)
372 return false;
373 // If we detect a cycle in the PHI nodes, we exit. It would be
374 // possible to change cycles as well, but that would add a lot
375 // of complexity for a case that is unlikely to occur with MMA
376 // code.
377 if (Opcode != PPC::PHI)
378 continue;
379 if (llvm::is_contained(Range&: PHIs, Element: Instr))
380 return false;
381 PHIs.push_back(Elt: Instr);
382 }
383 VisitedIndex++;
384 }
385 return true;
386}
387
388// This function changes the unprimed accumulator PHI nodes in the PHIs list to
389// primed accumulator PHI nodes. The list is traversed in reverse order to
390// change all the PHI operands of a PHI node before changing the node itself.
391// We keep a map to associate each changed PHI node to its non-changed form.
392void PPCMIPeephole::convertUnprimedAccPHIs(
393 const PPCInstrInfo *TII, MachineRegisterInfo *MRI,
394 SmallVectorImpl<MachineInstr *> &PHIs, Register Dst) {
395 DenseMap<MachineInstr *, MachineInstr *> ChangedPHIMap;
396 for (MachineInstr *PHI : llvm::reverse(C&: PHIs)) {
397 SmallVector<std::pair<MachineOperand, MachineOperand>, 4> PHIOps;
398 // We check if the current PHI node can be changed by looking at its
399 // operands. If all the operands are either copies from primed
400 // accumulators, implicit definitions or other unprimed accumulator
401 // PHI nodes, we change it.
402 for (unsigned PHIOp = 1, NumOps = PHI->getNumOperands(); PHIOp != NumOps;
403 PHIOp += 2) {
404 Register RegOp = PHI->getOperand(i: PHIOp).getReg();
405 MachineInstr *PHIInput = MRI->getVRegDef(Reg: RegOp);
406 unsigned Opcode = PHIInput->getOpcode();
407 assert((Opcode == PPC::COPY || Opcode == PPC::IMPLICIT_DEF ||
408 Opcode == PPC::PHI) &&
409 "Unexpected instruction");
410 if (Opcode == PPC::COPY) {
411 assert(MRI->getRegClass(PHIInput->getOperand(1).getReg()) ==
412 &PPC::ACCRCRegClass &&
413 "Unexpected register class");
414 PHIOps.push_back(Elt: {PHIInput->getOperand(i: 1), PHI->getOperand(i: PHIOp + 1)});
415 } else if (Opcode == PPC::IMPLICIT_DEF) {
416 Register AccReg = MRI->createVirtualRegister(RegClass: &PPC::ACCRCRegClass);
417 BuildMI(BB&: *PHIInput->getParent(), I: PHIInput, MIMD: PHIInput->getDebugLoc(),
418 MCID: TII->get(Opcode: PPC::IMPLICIT_DEF), DestReg: AccReg);
419 PHIOps.push_back(Elt: {MachineOperand::CreateReg(Reg: AccReg, isDef: false),
420 PHI->getOperand(i: PHIOp + 1)});
421 } else if (Opcode == PPC::PHI) {
422 // We found a PHI operand. At this point we know this operand
423 // has already been changed so we get its associated changed form
424 // from the map.
425 assert(ChangedPHIMap.count(PHIInput) == 1 &&
426 "This PHI node should have already been changed.");
427 MachineInstr *PrimedAccPHI = ChangedPHIMap.lookup(Val: PHIInput);
428 PHIOps.push_back(Elt: {MachineOperand::CreateReg(
429 Reg: PrimedAccPHI->getOperand(i: 0).getReg(), isDef: false),
430 PHI->getOperand(i: PHIOp + 1)});
431 }
432 }
433 Register AccReg = Dst;
434 // If the PHI node we are changing is the root node, the register it defines
435 // will be the destination register of the original copy (of the PHI def).
436 // For all other PHI's in the list, we need to create another primed
437 // accumulator virtual register as the PHI will no longer define the
438 // unprimed accumulator.
439 if (PHI != PHIs[0])
440 AccReg = MRI->createVirtualRegister(RegClass: &PPC::ACCRCRegClass);
441 MachineInstrBuilder NewPHI = BuildMI(
442 BB&: *PHI->getParent(), I: PHI, MIMD: PHI->getDebugLoc(), MCID: TII->get(Opcode: PPC::PHI), DestReg: AccReg);
443 for (auto RegMBB : PHIOps) {
444 NewPHI.add(MO: RegMBB.first).add(MO: RegMBB.second);
445 if (MRI->isSSA())
446 addRegToUpdate(RegMBB.first.getReg());
447 }
448 // The liveness of old PHI and new PHI have to be updated.
449 addRegToUpdate(PHI->getOperand(0).getReg());
450 addRegToUpdate(AccReg);
451 ChangedPHIMap[PHI] = NewPHI.getInstr();
452 LLVM_DEBUG(dbgs() << "Converting PHI: ");
453 LLVM_DEBUG(PHI->dump());
454 LLVM_DEBUG(dbgs() << "To: ");
455 LLVM_DEBUG(NewPHI.getInstr()->dump());
456 }
457}
458
459// Perform peephole optimizations.
460bool PPCMIPeephole::simplifyCode() {
461 bool Simplified = false;
462 bool TrapOpt = false;
463 MachineInstr* ToErase = nullptr;
464 std::map<MachineInstr *, bool> TOCSaves;
465 const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
466 NumFunctionsEnteredInMIPeephole++;
467 if (ConvertRegReg) {
468 // Fixed-point conversion of reg/reg instructions fed by load-immediate
469 // into reg/imm instructions. FIXME: This is expensive, control it with
470 // an option.
471 bool SomethingChanged = false;
472 do {
473 NumFixedPointIterations++;
474 SomethingChanged = false;
475 for (MachineBasicBlock &MBB : *MF) {
476 for (MachineInstr &MI : MBB) {
477 if (MI.isDebugInstr())
478 continue;
479
480 if (!DebugCounter::shouldExecute(Counter&: PeepholeXToICounter))
481 continue;
482
483 SmallSet<Register, 4> RRToRIRegsToUpdate;
484 if (!TII->convertToImmediateForm(MI, RegsToUpdate&: RRToRIRegsToUpdate))
485 continue;
486 for (Register R : RRToRIRegsToUpdate)
487 addRegToUpdate(R);
488 // The updated instruction may now have new register operands.
489 // Conservatively add them to recompute the flags as well.
490 for (const MachineOperand &MO : MI.operands())
491 if (MO.isReg())
492 addRegToUpdate(MO.getReg());
493 // We don't erase anything in case the def has other uses. Let DCE
494 // remove it if it can be removed.
495 LLVM_DEBUG(dbgs() << "Converted instruction to imm form: ");
496 LLVM_DEBUG(MI.dump());
497 NumConvertedToImmediateForm++;
498 SomethingChanged = true;
499 Simplified = true;
500 }
501 }
502 } while (SomethingChanged && FixedPointRegToImm);
503 }
504
505 // Since we are deleting this instruction, we need to run LiveVariables
506 // on any of its definitions that are marked as needing an update since
507 // we can't run LiveVariables on a deleted register. This only needs
508 // to be done for defs since uses will have their own defining
509 // instructions so we won't be running LiveVariables on a deleted reg.
510 auto recomputeLVForDyingInstr = [&]() {
511 if (RegsToUpdate.empty())
512 return;
513 for (MachineOperand &MO : ToErase->operands()) {
514 if (!MO.isReg() || !MO.isDef() || !RegsToUpdate.count(V: MO.getReg()))
515 continue;
516 Register RegToUpdate = MO.getReg();
517 RegsToUpdate.erase(V: RegToUpdate);
518 // If some transformation has introduced an additional definition of
519 // this register (breaking SSA), we can safely convert this def to
520 // a def of an invalid register as the instruction is going away.
521 if (!MRI->getUniqueVRegDef(Reg: RegToUpdate))
522 MO.setReg(PPC::NoRegister);
523 LV->recomputeForSingleDefVirtReg(Reg: RegToUpdate);
524 }
525 };
526
527 for (MachineBasicBlock &MBB : *MF) {
528 for (MachineInstr &MI : MBB) {
529
530 // If the previous instruction was marked for elimination,
531 // remove it now.
532 if (ToErase) {
533 LLVM_DEBUG(dbgs() << "Deleting instruction: ");
534 LLVM_DEBUG(ToErase->dump());
535 recomputeLVForDyingInstr();
536 ToErase->eraseFromParent();
537 ToErase = nullptr;
538 }
539 // If a conditional trap instruction got optimized to an
540 // unconditional trap, eliminate all the instructions after
541 // the trap.
542 if (EnableTrapOptimization && TrapOpt) {
543 ToErase = &MI;
544 continue;
545 }
546
547 // Ignore debug instructions.
548 if (MI.isDebugInstr())
549 continue;
550
551 if (!DebugCounter::shouldExecute(Counter&: PeepholePerOpCounter))
552 continue;
553
554 // Per-opcode peepholes.
555 switch (MI.getOpcode()) {
556
557 default:
558 break;
559 case PPC::COPY: {
560 Register Src = MI.getOperand(i: 1).getReg();
561 Register Dst = MI.getOperand(i: 0).getReg();
562 if (!Src.isVirtual() || !Dst.isVirtual())
563 break;
564 if (MRI->getRegClass(Reg: Src) != &PPC::UACCRCRegClass ||
565 MRI->getRegClass(Reg: Dst) != &PPC::ACCRCRegClass)
566 break;
567
568 // We are copying an unprimed accumulator to a primed accumulator.
569 // If the input to the copy is a PHI that is fed only by (i) copies in
570 // the other direction (ii) implicitly defined unprimed accumulators or
571 // (iii) other PHI nodes satisfying (i) and (ii), we can change
572 // the PHI to a PHI on primed accumulators (as long as we also change
573 // its operands). To detect and change such copies, we first get a list
574 // of all the PHI nodes starting from the root PHI node in BFS order.
575 // We then visit all these PHI nodes to check if they can be changed to
576 // primed accumulator PHI nodes and if so, we change them.
577 MachineInstr *RootPHI = MRI->getVRegDef(Reg: Src);
578 if (RootPHI->getOpcode() != PPC::PHI)
579 break;
580
581 SmallVector<MachineInstr *, 4> PHIs;
582 if (!collectUnprimedAccPHIs(MRI, RootPHI, PHIs))
583 break;
584
585 convertUnprimedAccPHIs(TII, MRI, PHIs, Dst);
586
587 ToErase = &MI;
588 break;
589 }
590 case PPC::LI:
591 case PPC::LI8: {
592 // If we are materializing a zero, look for any use operands for which
593 // zero means immediate zero. All such operands can be replaced with
594 // PPC::ZERO.
595 if (!MI.getOperand(i: 1).isImm() || MI.getOperand(i: 1).getImm() != 0)
596 break;
597 Register MIDestReg = MI.getOperand(i: 0).getReg();
598 bool Folded = false;
599 for (MachineInstr& UseMI : MRI->use_instructions(Reg: MIDestReg))
600 Folded |= TII->onlyFoldImmediate(UseMI, DefMI&: MI, Reg: MIDestReg);
601 if (MRI->use_nodbg_empty(RegNo: MIDestReg)) {
602 ++NumLoadImmZeroFoldedAndRemoved;
603 ToErase = &MI;
604 }
605 if (Folded)
606 addRegToUpdate(MIDestReg);
607 Simplified |= Folded;
608 break;
609 }
610 case PPC::STW:
611 case PPC::STD: {
612 MachineFrameInfo &MFI = MF->getFrameInfo();
613 if (MFI.hasVarSizedObjects() ||
614 (!MF->getSubtarget<PPCSubtarget>().isELFv2ABI() &&
615 !MF->getSubtarget<PPCSubtarget>().isAIXABI()))
616 break;
617 // When encountering a TOC save instruction, call UpdateTOCSaves
618 // to add it to the TOCSaves map and mark any existing TOC saves
619 // it dominates as redundant.
620 if (TII->isTOCSaveMI(MI))
621 UpdateTOCSaves(TOCSaves, MI: &MI);
622 break;
623 }
624 case PPC::XXPERMDI: {
625 // Perform simplifications of 2x64 vector swaps and splats.
626 // A swap is identified by an immediate value of 2, and a splat
627 // is identified by an immediate value of 0 or 3.
628 int Immed = MI.getOperand(i: 3).getImm();
629
630 if (Immed == 1)
631 break;
632
633 // For each of these simplifications, we need the two source
634 // regs to match. Unfortunately, MachineCSE ignores COPY and
635 // SUBREG_TO_REG, so for example we can see
636 // XXPERMDI t, SUBREG_TO_REG(s), SUBREG_TO_REG(s), immed.
637 // We have to look through chains of COPY and SUBREG_TO_REG
638 // to find the real source values for comparison.
639 Register TrueReg1 =
640 TRI->lookThruCopyLike(SrcReg: MI.getOperand(i: 1).getReg(), MRI);
641 Register TrueReg2 =
642 TRI->lookThruCopyLike(SrcReg: MI.getOperand(i: 2).getReg(), MRI);
643
644 if (!(TrueReg1 == TrueReg2 && TrueReg1.isVirtual()))
645 break;
646
647 MachineInstr *DefMI = MRI->getVRegDef(Reg: TrueReg1);
648
649 if (!DefMI)
650 break;
651
652 unsigned DefOpc = DefMI->getOpcode();
653
654 // If this is a splat fed by a splatting load, the splat is
655 // redundant. Replace with a copy. This doesn't happen directly due
656 // to code in PPCDAGToDAGISel.cpp, but it can happen when converting
657 // a load of a double to a vector of 64-bit integers.
658 auto isConversionOfLoadAndSplat = [=]() -> bool {
659 if (DefOpc != PPC::XVCVDPSXDS && DefOpc != PPC::XVCVDPUXDS)
660 return false;
661 Register FeedReg1 =
662 TRI->lookThruCopyLike(SrcReg: DefMI->getOperand(i: 1).getReg(), MRI);
663 if (FeedReg1.isVirtual()) {
664 MachineInstr *LoadMI = MRI->getVRegDef(Reg: FeedReg1);
665 if (LoadMI && LoadMI->getOpcode() == PPC::LXVDSX)
666 return true;
667 }
668 return false;
669 };
670 if ((Immed == 0 || Immed == 3) &&
671 (DefOpc == PPC::LXVDSX || isConversionOfLoadAndSplat())) {
672 LLVM_DEBUG(dbgs() << "Optimizing load-and-splat/splat "
673 "to load-and-splat/copy: ");
674 LLVM_DEBUG(MI.dump());
675 BuildMI(BB&: MBB, I: &MI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: PPC::COPY),
676 DestReg: MI.getOperand(i: 0).getReg())
677 .add(MO: MI.getOperand(i: 1));
678 addRegToUpdate(MI.getOperand(1).getReg());
679 ToErase = &MI;
680 Simplified = true;
681 }
682
683 // If this is a splat or a swap fed by another splat, we
684 // can replace it with a copy.
685 if (DefOpc == PPC::XXPERMDI) {
686 Register DefReg1 = DefMI->getOperand(i: 1).getReg();
687 Register DefReg2 = DefMI->getOperand(i: 2).getReg();
688 unsigned DefImmed = DefMI->getOperand(i: 3).getImm();
689
690 // If the two inputs are not the same register, check to see if
691 // they originate from the same virtual register after only
692 // copy-like instructions.
693 if (DefReg1 != DefReg2) {
694 Register FeedReg1 = TRI->lookThruCopyLike(SrcReg: DefReg1, MRI);
695 Register FeedReg2 = TRI->lookThruCopyLike(SrcReg: DefReg2, MRI);
696
697 if (!(FeedReg1 == FeedReg2 && FeedReg1.isVirtual()))
698 break;
699 }
700
701 if (DefImmed == 0 || DefImmed == 3) {
702 LLVM_DEBUG(dbgs() << "Optimizing splat/swap or splat/splat "
703 "to splat/copy: ");
704 LLVM_DEBUG(MI.dump());
705 BuildMI(BB&: MBB, I: &MI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: PPC::COPY),
706 DestReg: MI.getOperand(i: 0).getReg())
707 .add(MO: MI.getOperand(i: 1));
708 addRegToUpdate(MI.getOperand(1).getReg());
709 ToErase = &MI;
710 Simplified = true;
711 }
712
713 // If this is a splat fed by a swap, we can simplify modify
714 // the splat to splat the other value from the swap's input
715 // parameter.
716 else if ((Immed == 0 || Immed == 3) && DefImmed == 2) {
717 LLVM_DEBUG(dbgs() << "Optimizing swap/splat => splat: ");
718 LLVM_DEBUG(MI.dump());
719 addRegToUpdate(MI.getOperand(1).getReg());
720 addRegToUpdate(MI.getOperand(2).getReg());
721 MI.getOperand(i: 1).setReg(DefReg1);
722 MI.getOperand(i: 2).setReg(DefReg2);
723 MI.getOperand(i: 3).setImm(3 - Immed);
724 addRegToUpdate(DefReg1);
725 addRegToUpdate(DefReg2);
726 Simplified = true;
727 }
728
729 // If this is a swap fed by a swap, we can replace it
730 // with a copy from the first swap's input.
731 else if (Immed == 2 && DefImmed == 2) {
732 LLVM_DEBUG(dbgs() << "Optimizing swap/swap => copy: ");
733 LLVM_DEBUG(MI.dump());
734 addRegToUpdate(MI.getOperand(1).getReg());
735
736 BuildMI(BB&: MBB, I: &MI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: PPC::COPY),
737 DestReg: MI.getOperand(i: 0).getReg())
738 .add(MO: DefMI->getOperand(i: 1));
739 addRegToUpdate(DefMI->getOperand(0).getReg());
740 addRegToUpdate(DefMI->getOperand(1).getReg());
741 ToErase = &MI;
742 Simplified = true;
743 }
744 } else if ((Immed == 0 || Immed == 3 || Immed == 2) &&
745 DefOpc == PPC::XXPERMDIs &&
746 (DefMI->getOperand(i: 2).getImm() == 0 ||
747 DefMI->getOperand(i: 2).getImm() == 3)) {
748
749 if (!MRI->hasOneNonDBGUser(RegNo: DefMI->getOperand(i: 0).getReg()))
750 break;
751 Simplified = true;
752 // Swap of a splat, convert to copy.
753 if (Immed == 2) {
754 LLVM_DEBUG(dbgs() << "Optimizing swap(splat) => copy(splat): ");
755 LLVM_DEBUG(MI.dump());
756 BuildMI(BB&: MBB, I: &MI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: PPC::COPY),
757 DestReg: MI.getOperand(i: 0).getReg())
758 .add(MO: MI.getOperand(i: 1));
759 addRegToUpdate(MI.getOperand(1).getReg());
760 ToErase = &MI;
761 break;
762 }
763 // Splat fed by another splat - switch the output of the first
764 // and remove the second.
765 ToErase = &MI;
766 DefMI->getOperand(i: 0).setReg(MI.getOperand(i: 0).getReg());
767 LLVM_DEBUG(dbgs() << "Removing redundant splat: ");
768 LLVM_DEBUG(MI.dump());
769 } else if (Immed == 2 &&
770 (DefOpc == PPC::VSPLTB || DefOpc == PPC::VSPLTH ||
771 DefOpc == PPC::VSPLTW || DefOpc == PPC::XXSPLTW ||
772 DefOpc == PPC::VSPLTISB || DefOpc == PPC::VSPLTISH ||
773 DefOpc == PPC::VSPLTISW)) {
774 // Swap of various vector splats, convert to copy.
775 ToErase = &MI;
776 Simplified = true;
777 LLVM_DEBUG(dbgs() << "Optimizing swap(vsplt(is)?[b|h|w]|xxspltw) => "
778 "copy(vsplt(is)?[b|h|w]|xxspltw): ");
779 LLVM_DEBUG(MI.dump());
780 BuildMI(BB&: MBB, I: &MI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: PPC::COPY),
781 DestReg: MI.getOperand(i: 0).getReg())
782 .add(MO: MI.getOperand(i: 1));
783 addRegToUpdate(MI.getOperand(1).getReg());
784 } else if ((Immed == 0 || Immed == 3 || Immed == 2) &&
785 TII->isLoadFromConstantPool(I: DefMI)) {
786 const Constant *C = TII->getConstantFromConstantPool(I: DefMI);
787 if (C && C->getType()->isVectorTy() && C->getSplatValue()) {
788 ToErase = &MI;
789 Simplified = true;
790 LLVM_DEBUG(dbgs()
791 << "Optimizing swap(splat pattern from constant-pool) "
792 "=> copy(splat pattern from constant-pool): ");
793 LLVM_DEBUG(MI.dump());
794 BuildMI(BB&: MBB, I: &MI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: PPC::COPY),
795 DestReg: MI.getOperand(i: 0).getReg())
796 .add(MO: MI.getOperand(i: 1));
797 addRegToUpdate(MI.getOperand(1).getReg());
798 }
799 }
800 break;
801 }
802 case PPC::VSPLTB:
803 case PPC::VSPLTH:
804 case PPC::XXSPLTW: {
805 unsigned MyOpcode = MI.getOpcode();
806 // The operand number of the source register in the splat instruction.
807 unsigned OpNo = MyOpcode == PPC::XXSPLTW ? 1 : 2;
808 Register TrueReg =
809 TRI->lookThruCopyLike(SrcReg: MI.getOperand(i: OpNo).getReg(), MRI);
810 if (!TrueReg.isVirtual())
811 break;
812 MachineInstr *DefMI = MRI->getVRegDef(Reg: TrueReg);
813 if (!DefMI)
814 break;
815 unsigned DefOpcode = DefMI->getOpcode();
816 auto isConvertOfSplat = [=]() -> bool {
817 if (DefOpcode != PPC::XVCVSPSXWS && DefOpcode != PPC::XVCVSPUXWS)
818 return false;
819 Register ConvReg = DefMI->getOperand(i: 1).getReg();
820 if (!ConvReg.isVirtual())
821 return false;
822 MachineInstr *Splt = MRI->getVRegDef(Reg: ConvReg);
823 return Splt && (Splt->getOpcode() == PPC::LXVWSX ||
824 Splt->getOpcode() == PPC::XXSPLTW);
825 };
826 bool AlreadySplat = (MyOpcode == DefOpcode) ||
827 (MyOpcode == PPC::VSPLTB && DefOpcode == PPC::VSPLTBs) ||
828 (MyOpcode == PPC::VSPLTH && DefOpcode == PPC::VSPLTHs) ||
829 (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::XXSPLTWs) ||
830 (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::LXVWSX) ||
831 (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::MTVSRWS)||
832 (MyOpcode == PPC::XXSPLTW && isConvertOfSplat());
833
834 // If the instruction[s] that feed this splat have already splat
835 // the value, this splat is redundant.
836 if (AlreadySplat) {
837 LLVM_DEBUG(dbgs() << "Changing redundant splat to a copy: ");
838 LLVM_DEBUG(MI.dump());
839 BuildMI(BB&: MBB, I: &MI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: PPC::COPY),
840 DestReg: MI.getOperand(i: 0).getReg())
841 .add(MO: MI.getOperand(i: OpNo));
842 addRegToUpdate(MI.getOperand(OpNo).getReg());
843 ToErase = &MI;
844 Simplified = true;
845 }
846
847 // Splat fed by a shift. Usually when we align value to splat into
848 // vector element zero.
849 if (DefOpcode == PPC::XXSLDWI) {
850 Register ShiftOp1 = DefMI->getOperand(i: 1).getReg();
851
852 if (ShiftOp1 == DefMI->getOperand(i: 2).getReg()) {
853 // For example, We can erase XXSLDWI from in following:
854 // %2:vrrc = XXSLDWI killed %1:vrrc, %1:vrrc, 1
855 // %6:vrrc = VSPLTB 15, killed %2:vrrc
856 // %7:vsrc = XXLAND killed %6:vrrc, killed %1:vrrc
857 //
858 // --->
859 //
860 // %6:vrrc = VSPLTB 3, killed %1:vrrc
861 // %7:vsrc = XXLAND killed %6:vrrc, killed %1:vrrc
862
863 if (MRI->hasOneNonDBGUse(RegNo: DefMI->getOperand(i: 0).getReg())) {
864 LLVM_DEBUG(dbgs() << "Removing redundant shift: ");
865 LLVM_DEBUG(DefMI->dump());
866 ToErase = DefMI;
867 }
868 Simplified = true;
869 unsigned ShiftImm = DefMI->getOperand(i: 3).getImm();
870 // The operand number of the splat Imm in the instruction.
871 unsigned SplatImmNo = MyOpcode == PPC::XXSPLTW ? 2 : 1;
872 unsigned SplatImm = MI.getOperand(i: SplatImmNo).getImm();
873
874 // Calculate the new splat-element immediate. We need to convert the
875 // element index into the proper unit (byte for VSPLTB, halfword for
876 // VSPLTH, word for VSPLTW) because PPC::XXSLDWI interprets its
877 // ShiftImm in 32-bit word units.
878 auto CalculateNewElementIdx = [&](unsigned Opcode) {
879 if (Opcode == PPC::VSPLTB)
880 return (SplatImm + ShiftImm * 4) & 0xF;
881 else if (Opcode == PPC::VSPLTH)
882 return (SplatImm + ShiftImm * 2) & 0x7;
883 else
884 return (SplatImm + ShiftImm) & 0x3;
885 };
886
887 unsigned NewElem = CalculateNewElementIdx(MyOpcode);
888
889 LLVM_DEBUG(dbgs() << "Changing splat immediate from " << SplatImm
890 << " to " << NewElem << " in instruction: ");
891 LLVM_DEBUG(MI.dump());
892 addRegToUpdate(MI.getOperand(OpNo).getReg());
893 addRegToUpdate(ShiftOp1);
894 MI.getOperand(i: OpNo).setReg(ShiftOp1);
895 MI.getOperand(i: SplatImmNo).setImm(NewElem);
896 }
897 }
898 break;
899 }
900 case PPC::XVCVDPSP: {
901 // If this is a DP->SP conversion fed by an FRSP, the FRSP is redundant.
902 Register TrueReg =
903 TRI->lookThruCopyLike(SrcReg: MI.getOperand(i: 1).getReg(), MRI);
904 if (!TrueReg.isVirtual())
905 break;
906 MachineInstr *DefMI = MRI->getVRegDef(Reg: TrueReg);
907
908 // This can occur when building a vector of single precision or integer
909 // values.
910 if (DefMI && DefMI->getOpcode() == PPC::XXPERMDI) {
911 Register DefsReg1 =
912 TRI->lookThruCopyLike(SrcReg: DefMI->getOperand(i: 1).getReg(), MRI);
913 Register DefsReg2 =
914 TRI->lookThruCopyLike(SrcReg: DefMI->getOperand(i: 2).getReg(), MRI);
915 if (!DefsReg1.isVirtual() || !DefsReg2.isVirtual())
916 break;
917 MachineInstr *P1 = MRI->getVRegDef(Reg: DefsReg1);
918 MachineInstr *P2 = MRI->getVRegDef(Reg: DefsReg2);
919
920 if (!P1 || !P2)
921 break;
922
923 // Remove the passed FRSP/XSRSP instruction if it only feeds this MI
924 // and set any uses of that FRSP/XSRSP (in this MI) to the source of
925 // the FRSP/XSRSP.
926 auto removeFRSPIfPossible = [&](MachineInstr *RoundInstr) {
927 unsigned Opc = RoundInstr->getOpcode();
928 if ((Opc == PPC::FRSP || Opc == PPC::XSRSP) &&
929 MRI->hasOneNonDBGUse(RegNo: RoundInstr->getOperand(i: 0).getReg())) {
930 Simplified = true;
931 Register ConvReg1 = RoundInstr->getOperand(i: 1).getReg();
932 Register FRSPDefines = RoundInstr->getOperand(i: 0).getReg();
933 MachineInstr &Use = *(MRI->use_instr_nodbg_begin(RegNo: FRSPDefines));
934 for (int i = 0, e = Use.getNumOperands(); i < e; ++i)
935 if (Use.getOperand(i).isReg() &&
936 Use.getOperand(i).getReg() == FRSPDefines)
937 Use.getOperand(i).setReg(ConvReg1);
938 LLVM_DEBUG(dbgs() << "Removing redundant FRSP/XSRSP:\n");
939 LLVM_DEBUG(RoundInstr->dump());
940 LLVM_DEBUG(dbgs() << "As it feeds instruction:\n");
941 LLVM_DEBUG(MI.dump());
942 LLVM_DEBUG(dbgs() << "Through instruction:\n");
943 LLVM_DEBUG(DefMI->dump());
944 addRegToUpdate(ConvReg1);
945 addRegToUpdate(FRSPDefines);
946 ToErase = RoundInstr;
947 }
948 };
949
950 // If the input to XVCVDPSP is a vector that was built (even
951 // partially) out of FRSP's, the FRSP(s) can safely be removed
952 // since this instruction performs the same operation.
953 if (P1 != P2) {
954 removeFRSPIfPossible(P1);
955 removeFRSPIfPossible(P2);
956 break;
957 }
958 removeFRSPIfPossible(P1);
959 }
960 break;
961 }
962 case PPC::EXTSH:
963 case PPC::EXTSH8:
964 case PPC::EXTSH8_32_64: {
965 if (!EnableSExtElimination) break;
966 Register NarrowReg = MI.getOperand(i: 1).getReg();
967 if (!NarrowReg.isVirtual())
968 break;
969
970 MachineInstr *SrcMI = MRI->getVRegDef(Reg: NarrowReg);
971 unsigned SrcOpcode = SrcMI->getOpcode();
972 // If we've used a zero-extending load that we will sign-extend,
973 // just do a sign-extending load.
974 if (SrcOpcode == PPC::LHZ || SrcOpcode == PPC::LHZX) {
975 if (!MRI->hasOneNonDBGUse(RegNo: SrcMI->getOperand(i: 0).getReg()))
976 break;
977 // Determine the new opcode. We need to make sure that if the original
978 // instruction has a 64 bit opcode we keep using a 64 bit opcode.
979 // Likewise if the source is X-Form the new opcode should also be
980 // X-Form.
981 unsigned Opc = PPC::LHA;
982 bool SourceIsXForm = SrcOpcode == PPC::LHZX;
983 bool MIIs64Bit = MI.getOpcode() == PPC::EXTSH8 ||
984 MI.getOpcode() == PPC::EXTSH8_32_64;
985
986 if (SourceIsXForm && MIIs64Bit)
987 Opc = PPC::LHAX8;
988 else if (SourceIsXForm && !MIIs64Bit)
989 Opc = PPC::LHAX;
990 else if (MIIs64Bit)
991 Opc = PPC::LHA8;
992
993 addRegToUpdate(NarrowReg);
994 addRegToUpdate(MI.getOperand(0).getReg());
995
996 // We are removing a definition of NarrowReg which will cause
997 // problems in AliveBlocks. Add an implicit def that will be
998 // removed so that AliveBlocks are updated correctly.
999 addDummyDef(MBB, At: &MI, Reg: NarrowReg);
1000 LLVM_DEBUG(dbgs() << "Zero-extending load\n");
1001 LLVM_DEBUG(SrcMI->dump());
1002 LLVM_DEBUG(dbgs() << "and sign-extension\n");
1003 LLVM_DEBUG(MI.dump());
1004 LLVM_DEBUG(dbgs() << "are merged into sign-extending load\n");
1005 SrcMI->setDesc(TII->get(Opcode: Opc));
1006 SrcMI->getOperand(i: 0).setReg(MI.getOperand(i: 0).getReg());
1007 ToErase = &MI;
1008 Simplified = true;
1009 NumEliminatedSExt++;
1010 }
1011 break;
1012 }
1013 case PPC::EXTSW:
1014 case PPC::EXTSW_32:
1015 case PPC::EXTSW_32_64: {
1016 if (!EnableSExtElimination) break;
1017 Register NarrowReg = MI.getOperand(i: 1).getReg();
1018 if (!NarrowReg.isVirtual())
1019 break;
1020
1021 MachineInstr *SrcMI = MRI->getVRegDef(Reg: NarrowReg);
1022 unsigned SrcOpcode = SrcMI->getOpcode();
1023 // If we've used a zero-extending load that we will sign-extend,
1024 // just do a sign-extending load.
1025 if (SrcOpcode == PPC::LWZ || SrcOpcode == PPC::LWZX) {
1026 if (!MRI->hasOneNonDBGUse(RegNo: SrcMI->getOperand(i: 0).getReg()))
1027 break;
1028
1029 // The transformation from a zero-extending load to a sign-extending
1030 // load is only legal when the displacement is a multiple of 4.
1031 // If the displacement is not at least 4 byte aligned, don't perform
1032 // the transformation.
1033 bool IsWordAligned = false;
1034 if (SrcMI->getOperand(i: 1).isGlobal()) {
1035 const GlobalVariable *GV =
1036 dyn_cast<GlobalVariable>(Val: SrcMI->getOperand(i: 1).getGlobal());
1037 if (GV && GV->getAlign() && *GV->getAlign() >= 4 &&
1038 (SrcMI->getOperand(i: 1).getOffset() % 4 == 0))
1039 IsWordAligned = true;
1040 } else if (SrcMI->getOperand(i: 1).isImm()) {
1041 int64_t Value = SrcMI->getOperand(i: 1).getImm();
1042 if (Value % 4 == 0)
1043 IsWordAligned = true;
1044 }
1045
1046 // Determine the new opcode. We need to make sure that if the original
1047 // instruction has a 64 bit opcode we keep using a 64 bit opcode.
1048 // Likewise if the source is X-Form the new opcode should also be
1049 // X-Form.
1050 unsigned Opc = PPC::LWA_32;
1051 bool SourceIsXForm = SrcOpcode == PPC::LWZX;
1052 bool MIIs64Bit = MI.getOpcode() == PPC::EXTSW ||
1053 MI.getOpcode() == PPC::EXTSW_32_64;
1054
1055 if (SourceIsXForm && MIIs64Bit)
1056 Opc = PPC::LWAX;
1057 else if (SourceIsXForm && !MIIs64Bit)
1058 Opc = PPC::LWAX_32;
1059 else if (MIIs64Bit)
1060 Opc = PPC::LWA;
1061
1062 if (!IsWordAligned && (Opc == PPC::LWA || Opc == PPC::LWA_32))
1063 break;
1064
1065 addRegToUpdate(NarrowReg);
1066 addRegToUpdate(MI.getOperand(0).getReg());
1067
1068 // We are removing a definition of NarrowReg which will cause
1069 // problems in AliveBlocks. Add an implicit def that will be
1070 // removed so that AliveBlocks are updated correctly.
1071 addDummyDef(MBB, At: &MI, Reg: NarrowReg);
1072 LLVM_DEBUG(dbgs() << "Zero-extending load\n");
1073 LLVM_DEBUG(SrcMI->dump());
1074 LLVM_DEBUG(dbgs() << "and sign-extension\n");
1075 LLVM_DEBUG(MI.dump());
1076 LLVM_DEBUG(dbgs() << "are merged into sign-extending load\n");
1077 SrcMI->setDesc(TII->get(Opcode: Opc));
1078 SrcMI->getOperand(i: 0).setReg(MI.getOperand(i: 0).getReg());
1079 ToErase = &MI;
1080 Simplified = true;
1081 NumEliminatedSExt++;
1082 } else if (MI.getOpcode() == PPC::EXTSW_32_64 &&
1083 TII->isSignExtended(Reg: NarrowReg, MRI)) {
1084 // We can eliminate EXTSW if the input is known to be already
1085 // sign-extended. However, we are not sure whether a spill will occur
1086 // during register allocation. If there is no promotion, it will use
1087 // 'stw' instead of 'std', and 'lwz' instead of 'ld' when spilling,
1088 // since the register class is 32-bits. Consequently, the high 32-bit
1089 // information will be lost. Therefore, all these instructions in the
1090 // chain used to deduce sign extension to eliminate the 'extsw' will
1091 // need to be promoted to 64-bit pseudo instructions when the 'extsw'
1092 // is eliminated.
1093 TII->promoteInstr32To64ForElimEXTSW(Reg: NarrowReg, MRI, BinOpDepth: 0, LV);
1094
1095 LLVM_DEBUG(dbgs() << "Removing redundant sign-extension\n");
1096 Register TmpReg =
1097 MF->getRegInfo().createVirtualRegister(RegClass: &PPC::G8RCRegClass);
1098 BuildMI(BB&: MBB, I: &MI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: PPC::IMPLICIT_DEF),
1099 DestReg: TmpReg);
1100 BuildMI(BB&: MBB, I: &MI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: PPC::INSERT_SUBREG),
1101 DestReg: MI.getOperand(i: 0).getReg())
1102 .addReg(RegNo: TmpReg)
1103 .addReg(RegNo: NarrowReg)
1104 .addImm(Val: PPC::sub_32);
1105 ToErase = &MI;
1106 Simplified = true;
1107 NumEliminatedSExt++;
1108 }
1109 break;
1110 }
1111 case PPC::RLDICL: {
1112 // We can eliminate RLDICL (e.g. for zero-extension)
1113 // if all bits to clear are already zero in the input.
1114 // This code assume following code sequence for zero-extension.
1115 // %6 = COPY %5:sub_32; (optional)
1116 // %8 = IMPLICIT_DEF;
1117 // %7<def,tied1> = INSERT_SUBREG %8<tied0>, %6, sub_32;
1118 if (!EnableZExtElimination) break;
1119
1120 if (MI.getOperand(i: 2).getImm() != 0)
1121 break;
1122
1123 Register SrcReg = MI.getOperand(i: 1).getReg();
1124 if (!SrcReg.isVirtual())
1125 break;
1126
1127 MachineInstr *SrcMI = MRI->getVRegDef(Reg: SrcReg);
1128 if (!(SrcMI && SrcMI->getOpcode() == PPC::INSERT_SUBREG &&
1129 SrcMI->getOperand(i: 0).isReg() && SrcMI->getOperand(i: 1).isReg()))
1130 break;
1131
1132 MachineInstr *ImpDefMI, *SubRegMI;
1133 ImpDefMI = MRI->getVRegDef(Reg: SrcMI->getOperand(i: 1).getReg());
1134 SubRegMI = MRI->getVRegDef(Reg: SrcMI->getOperand(i: 2).getReg());
1135 if (ImpDefMI->getOpcode() != PPC::IMPLICIT_DEF) break;
1136
1137 SrcMI = SubRegMI;
1138 if (SubRegMI->getOpcode() == PPC::COPY) {
1139 Register CopyReg = SubRegMI->getOperand(i: 1).getReg();
1140 if (CopyReg.isVirtual())
1141 SrcMI = MRI->getVRegDef(Reg: CopyReg);
1142 }
1143 if (!SrcMI->getOperand(i: 0).isReg())
1144 break;
1145
1146 unsigned KnownZeroCount =
1147 getKnownLeadingZeroCount(Reg: SrcMI->getOperand(i: 0).getReg(), TII, MRI);
1148 if (MI.getOperand(i: 3).getImm() <= KnownZeroCount) {
1149 LLVM_DEBUG(dbgs() << "Removing redundant zero-extension\n");
1150 BuildMI(BB&: MBB, I: &MI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: PPC::COPY),
1151 DestReg: MI.getOperand(i: 0).getReg())
1152 .addReg(RegNo: SrcReg);
1153 addRegToUpdate(SrcReg);
1154 ToErase = &MI;
1155 Simplified = true;
1156 NumEliminatedZExt++;
1157 }
1158 break;
1159 }
1160
1161 // TODO: Any instruction that has an immediate form fed only by a PHI
1162 // whose operands are all load immediate can be folded away. We currently
1163 // do this for ADD instructions, but should expand it to arithmetic and
1164 // binary instructions with immediate forms in the future.
1165 case PPC::ADD4:
1166 case PPC::ADD8: {
1167 auto isSingleUsePHI = [&](MachineOperand *PhiOp) {
1168 assert(PhiOp && "Invalid Operand!");
1169 MachineInstr *DefPhiMI = getVRegDefOrNull(Op: PhiOp, MRI);
1170
1171 return DefPhiMI && (DefPhiMI->getOpcode() == PPC::PHI) &&
1172 MRI->hasOneNonDBGUse(RegNo: DefPhiMI->getOperand(i: 0).getReg());
1173 };
1174
1175 auto dominatesAllSingleUseLIs = [&](MachineOperand *DominatorOp,
1176 MachineOperand *PhiOp) {
1177 assert(PhiOp && "Invalid Operand!");
1178 assert(DominatorOp && "Invalid Operand!");
1179 MachineInstr *DefPhiMI = getVRegDefOrNull(Op: PhiOp, MRI);
1180 MachineInstr *DefDomMI = getVRegDefOrNull(Op: DominatorOp, MRI);
1181
1182 // Note: the vregs only show up at odd indices position of PHI Node,
1183 // the even indices position save the BB info.
1184 for (unsigned i = 1; i < DefPhiMI->getNumOperands(); i += 2) {
1185 MachineInstr *LiMI =
1186 getVRegDefOrNull(Op: &DefPhiMI->getOperand(i), MRI);
1187 if (!LiMI ||
1188 (LiMI->getOpcode() != PPC::LI && LiMI->getOpcode() != PPC::LI8)
1189 || !MRI->hasOneNonDBGUse(RegNo: LiMI->getOperand(i: 0).getReg()) ||
1190 !MDT->dominates(A: DefDomMI, B: LiMI))
1191 return false;
1192 }
1193
1194 return true;
1195 };
1196
1197 MachineOperand Op1 = MI.getOperand(i: 1);
1198 MachineOperand Op2 = MI.getOperand(i: 2);
1199 if (isSingleUsePHI(&Op2) && dominatesAllSingleUseLIs(&Op1, &Op2))
1200 std::swap(a&: Op1, b&: Op2);
1201 else if (!isSingleUsePHI(&Op1) || !dominatesAllSingleUseLIs(&Op2, &Op1))
1202 break; // We don't have an ADD fed by LI's that can be transformed
1203
1204 // Now we know that Op1 is the PHI node and Op2 is the dominator
1205 Register DominatorReg = Op2.getReg();
1206
1207 const TargetRegisterClass *TRC = MI.getOpcode() == PPC::ADD8
1208 ? &PPC::G8RC_and_G8RC_NOX0RegClass
1209 : &PPC::GPRC_and_GPRC_NOR0RegClass;
1210 MRI->setRegClass(Reg: DominatorReg, RC: TRC);
1211
1212 // replace LIs with ADDIs
1213 MachineInstr *DefPhiMI = getVRegDefOrNull(Op: &Op1, MRI);
1214 for (unsigned i = 1; i < DefPhiMI->getNumOperands(); i += 2) {
1215 MachineInstr *LiMI = getVRegDefOrNull(Op: &DefPhiMI->getOperand(i), MRI);
1216 LLVM_DEBUG(dbgs() << "Optimizing LI to ADDI: ");
1217 LLVM_DEBUG(LiMI->dump());
1218
1219 // There could be repeated registers in the PHI, e.g: %1 =
1220 // PHI %6, <%bb.2>, %8, <%bb.3>, %8, <%bb.6>; So if we've
1221 // already replaced the def instruction, skip.
1222 if (LiMI->getOpcode() == PPC::ADDI || LiMI->getOpcode() == PPC::ADDI8)
1223 continue;
1224
1225 assert((LiMI->getOpcode() == PPC::LI ||
1226 LiMI->getOpcode() == PPC::LI8) &&
1227 "Invalid Opcode!");
1228 auto LiImm = LiMI->getOperand(i: 1).getImm(); // save the imm of LI
1229 LiMI->removeOperand(OpNo: 1); // remove the imm of LI
1230 LiMI->setDesc(TII->get(Opcode: LiMI->getOpcode() == PPC::LI ? PPC::ADDI
1231 : PPC::ADDI8));
1232 MachineInstrBuilder(*LiMI->getParent()->getParent(), *LiMI)
1233 .addReg(RegNo: DominatorReg)
1234 .addImm(Val: LiImm); // restore the imm of LI
1235 LLVM_DEBUG(LiMI->dump());
1236 }
1237
1238 // Replace ADD with COPY
1239 LLVM_DEBUG(dbgs() << "Optimizing ADD to COPY: ");
1240 LLVM_DEBUG(MI.dump());
1241 BuildMI(BB&: MBB, I: &MI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: PPC::COPY),
1242 DestReg: MI.getOperand(i: 0).getReg())
1243 .add(MO: Op1);
1244 addRegToUpdate(Op1.getReg());
1245 addRegToUpdate(Op2.getReg());
1246 ToErase = &MI;
1247 Simplified = true;
1248 NumOptADDLIs++;
1249 break;
1250 }
1251 case PPC::RLDICR: {
1252 Simplified |= emitRLDICWhenLoweringJumpTables(MI, ToErase) ||
1253 combineSEXTAndSHL(MI, ToErase);
1254 break;
1255 }
1256 case PPC::ANDI_rec:
1257 case PPC::ANDI8_rec:
1258 case PPC::ANDIS_rec:
1259 case PPC::ANDIS8_rec: {
1260 Register TrueReg =
1261 TRI->lookThruCopyLike(SrcReg: MI.getOperand(i: 1).getReg(), MRI);
1262 if (!TrueReg.isVirtual() || !MRI->hasOneNonDBGUse(RegNo: TrueReg))
1263 break;
1264
1265 MachineInstr *SrcMI = MRI->getVRegDef(Reg: TrueReg);
1266 if (!SrcMI)
1267 break;
1268
1269 unsigned SrcOpCode = SrcMI->getOpcode();
1270 if (SrcOpCode != PPC::RLDICL && SrcOpCode != PPC::RLDICR)
1271 break;
1272
1273 Register SrcReg, DstReg;
1274 SrcReg = SrcMI->getOperand(i: 1).getReg();
1275 DstReg = MI.getOperand(i: 1).getReg();
1276 const TargetRegisterClass *SrcRC = MRI->getRegClassOrNull(Reg: SrcReg);
1277 const TargetRegisterClass *DstRC = MRI->getRegClassOrNull(Reg: DstReg);
1278 if (DstRC != SrcRC)
1279 break;
1280
1281 uint64_t AndImm = MI.getOperand(i: 2).getImm();
1282 if (MI.getOpcode() == PPC::ANDIS_rec ||
1283 MI.getOpcode() == PPC::ANDIS8_rec)
1284 AndImm <<= 16;
1285 uint64_t LZeroAndImm = llvm::countl_zero<uint64_t>(Val: AndImm);
1286 uint64_t RZeroAndImm = llvm::countr_zero<uint64_t>(Val: AndImm);
1287 uint64_t ImmSrc = SrcMI->getOperand(i: 3).getImm();
1288
1289 // We can transfer `RLDICL/RLDICR + ANDI_rec/ANDIS_rec` to `ANDI_rec 0`
1290 // if all bits to AND are already zero in the input.
1291 bool PatternResultZero =
1292 (SrcOpCode == PPC::RLDICL && (RZeroAndImm + ImmSrc > 63)) ||
1293 (SrcOpCode == PPC::RLDICR && LZeroAndImm > ImmSrc);
1294
1295 // We can eliminate RLDICL/RLDICR if it's used to clear bits and all
1296 // bits cleared will be ANDed with 0 by ANDI_rec/ANDIS_rec.
1297 bool PatternRemoveRotate =
1298 SrcMI->getOperand(i: 2).getImm() == 0 &&
1299 ((SrcOpCode == PPC::RLDICL && LZeroAndImm >= ImmSrc) ||
1300 (SrcOpCode == PPC::RLDICR && (RZeroAndImm + ImmSrc > 63)));
1301
1302 if (!PatternResultZero && !PatternRemoveRotate)
1303 break;
1304
1305 LLVM_DEBUG(dbgs() << "Combining pair: ");
1306 LLVM_DEBUG(SrcMI->dump());
1307 LLVM_DEBUG(MI.dump());
1308 if (PatternResultZero)
1309 MI.getOperand(i: 2).setImm(0);
1310 MI.getOperand(i: 1).setReg(SrcMI->getOperand(i: 1).getReg());
1311 LLVM_DEBUG(dbgs() << "To: ");
1312 LLVM_DEBUG(MI.dump());
1313 addRegToUpdate(MI.getOperand(1).getReg());
1314 addRegToUpdate(SrcMI->getOperand(0).getReg());
1315 Simplified = true;
1316 break;
1317 }
1318 case PPC::RLWINM:
1319 case PPC::RLWINM_rec:
1320 case PPC::RLWINM8:
1321 case PPC::RLWINM8_rec: {
1322 // We might replace operand 1 of the instruction which will
1323 // require we recompute kill flags for it.
1324 Register OrigOp1Reg = MI.getOperand(i: 1).isReg()
1325 ? MI.getOperand(i: 1).getReg()
1326 : PPC::NoRegister;
1327 Simplified = TII->combineRLWINM(MI, ToErase: &ToErase);
1328 if (Simplified) {
1329 addRegToUpdate(OrigOp1Reg);
1330 if (MI.getOperand(i: 1).isReg())
1331 addRegToUpdate(MI.getOperand(1).getReg());
1332 if (ToErase && ToErase->getOperand(i: 1).isReg())
1333 for (auto UseReg : ToErase->explicit_uses())
1334 if (UseReg.isReg())
1335 addRegToUpdate(UseReg.getReg());
1336 ++NumRotatesCollapsed;
1337 }
1338 break;
1339 }
1340 // We will replace TD/TW/TDI/TWI with an unconditional trap if it will
1341 // always trap, we will delete the node if it will never trap.
1342 case PPC::TDI:
1343 case PPC::TWI:
1344 case PPC::TD:
1345 case PPC::TW: {
1346 if (!EnableTrapOptimization) break;
1347 MachineInstr *LiMI1 = getVRegDefOrNull(Op: &MI.getOperand(i: 1), MRI);
1348 MachineInstr *LiMI2 = getVRegDefOrNull(Op: &MI.getOperand(i: 2), MRI);
1349 bool IsOperand2Immediate = MI.getOperand(i: 2).isImm();
1350 // We can only do the optimization if we can get immediates
1351 // from both operands
1352 if (!(LiMI1 && (LiMI1->getOpcode() == PPC::LI ||
1353 LiMI1->getOpcode() == PPC::LI8)))
1354 break;
1355 if (!IsOperand2Immediate &&
1356 !(LiMI2 && (LiMI2->getOpcode() == PPC::LI ||
1357 LiMI2->getOpcode() == PPC::LI8)))
1358 break;
1359
1360 auto ImmOperand0 = MI.getOperand(i: 0).getImm();
1361 auto ImmOperand1 = LiMI1->getOperand(i: 1).getImm();
1362 auto ImmOperand2 = IsOperand2Immediate ? MI.getOperand(i: 2).getImm()
1363 : LiMI2->getOperand(i: 1).getImm();
1364
1365 // We will replace the MI with an unconditional trap if it will always
1366 // trap.
1367 if ((ImmOperand0 == 31) ||
1368 ((ImmOperand0 & 0x10) &&
1369 ((int64_t)ImmOperand1 < (int64_t)ImmOperand2)) ||
1370 ((ImmOperand0 & 0x8) &&
1371 ((int64_t)ImmOperand1 > (int64_t)ImmOperand2)) ||
1372 ((ImmOperand0 & 0x2) &&
1373 ((uint64_t)ImmOperand1 < (uint64_t)ImmOperand2)) ||
1374 ((ImmOperand0 & 0x1) &&
1375 ((uint64_t)ImmOperand1 > (uint64_t)ImmOperand2)) ||
1376 ((ImmOperand0 & 0x4) && (ImmOperand1 == ImmOperand2))) {
1377 BuildMI(BB&: MBB, I: &MI, MIMD: MI.getDebugLoc(), MCID: TII->get(Opcode: PPC::TRAP));
1378 TrapOpt = true;
1379 }
1380 // We will delete the MI if it will never trap.
1381 ToErase = &MI;
1382 Simplified = true;
1383 break;
1384 }
1385 }
1386 }
1387
1388 // If the last instruction was marked for elimination,
1389 // remove it now.
1390 if (ToErase) {
1391 recomputeLVForDyingInstr();
1392 ToErase->eraseFromParent();
1393 ToErase = nullptr;
1394 }
1395 // Reset TrapOpt to false at the end of the basic block.
1396 if (EnableTrapOptimization)
1397 TrapOpt = false;
1398 }
1399
1400 // Eliminate all the TOC save instructions which are redundant.
1401 Simplified |= eliminateRedundantTOCSaves(TOCSaves);
1402 PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
1403 if (FI->mustSaveTOC())
1404 NumTOCSavesInPrologue++;
1405
1406 // We try to eliminate redundant compare instruction.
1407 Simplified |= eliminateRedundantCompare();
1408
1409 // If we have made any modifications and added any registers to the set of
1410 // registers for which we need to update the kill flags, do so by recomputing
1411 // LiveVariables for those registers.
1412 for (Register Reg : RegsToUpdate) {
1413 if (!MRI->reg_empty(RegNo: Reg))
1414 LV->recomputeForSingleDefVirtReg(Reg);
1415 }
1416 return Simplified;
1417}
1418
1419// helper functions for eliminateRedundantCompare
1420static bool isEqOrNe(MachineInstr *BI) {
1421 PPC::Predicate Pred = (PPC::Predicate)BI->getOperand(i: 0).getImm();
1422 unsigned PredCond = PPC::getPredicateCondition(Opcode: Pred);
1423 return (PredCond == PPC::PRED_EQ || PredCond == PPC::PRED_NE);
1424}
1425
1426static bool isSupportedCmpOp(unsigned opCode) {
1427 return (opCode == PPC::CMPLD || opCode == PPC::CMPD ||
1428 opCode == PPC::CMPLW || opCode == PPC::CMPW ||
1429 opCode == PPC::CMPLDI || opCode == PPC::CMPDI ||
1430 opCode == PPC::CMPLWI || opCode == PPC::CMPWI);
1431}
1432
1433static bool is64bitCmpOp(unsigned opCode) {
1434 return (opCode == PPC::CMPLD || opCode == PPC::CMPD ||
1435 opCode == PPC::CMPLDI || opCode == PPC::CMPDI);
1436}
1437
1438static bool isSignedCmpOp(unsigned opCode) {
1439 return (opCode == PPC::CMPD || opCode == PPC::CMPW ||
1440 opCode == PPC::CMPDI || opCode == PPC::CMPWI);
1441}
1442
1443static unsigned getSignedCmpOpCode(unsigned opCode) {
1444 if (opCode == PPC::CMPLD) return PPC::CMPD;
1445 if (opCode == PPC::CMPLW) return PPC::CMPW;
1446 if (opCode == PPC::CMPLDI) return PPC::CMPDI;
1447 if (opCode == PPC::CMPLWI) return PPC::CMPWI;
1448 return opCode;
1449}
1450
1451// We can decrement immediate x in (GE x) by changing it to (GT x-1) or
1452// (LT x) to (LE x-1)
1453static unsigned getPredicateToDecImm(MachineInstr *BI, MachineInstr *CMPI) {
1454 uint64_t Imm = CMPI->getOperand(i: 2).getImm();
1455 bool SignedCmp = isSignedCmpOp(opCode: CMPI->getOpcode());
1456 if ((!SignedCmp && Imm == 0) || (SignedCmp && Imm == 0x8000))
1457 return 0;
1458
1459 PPC::Predicate Pred = (PPC::Predicate)BI->getOperand(i: 0).getImm();
1460 unsigned PredCond = PPC::getPredicateCondition(Opcode: Pred);
1461 unsigned PredHint = PPC::getPredicateHint(Opcode: Pred);
1462 if (PredCond == PPC::PRED_GE)
1463 return PPC::getPredicate(Condition: PPC::PRED_GT, Hint: PredHint);
1464 if (PredCond == PPC::PRED_LT)
1465 return PPC::getPredicate(Condition: PPC::PRED_LE, Hint: PredHint);
1466
1467 return 0;
1468}
1469
1470// We can increment immediate x in (GT x) by changing it to (GE x+1) or
1471// (LE x) to (LT x+1)
1472static unsigned getPredicateToIncImm(MachineInstr *BI, MachineInstr *CMPI) {
1473 uint64_t Imm = CMPI->getOperand(i: 2).getImm();
1474 bool SignedCmp = isSignedCmpOp(opCode: CMPI->getOpcode());
1475 if ((!SignedCmp && Imm == 0xFFFF) || (SignedCmp && Imm == 0x7FFF))
1476 return 0;
1477
1478 PPC::Predicate Pred = (PPC::Predicate)BI->getOperand(i: 0).getImm();
1479 unsigned PredCond = PPC::getPredicateCondition(Opcode: Pred);
1480 unsigned PredHint = PPC::getPredicateHint(Opcode: Pred);
1481 if (PredCond == PPC::PRED_GT)
1482 return PPC::getPredicate(Condition: PPC::PRED_GE, Hint: PredHint);
1483 if (PredCond == PPC::PRED_LE)
1484 return PPC::getPredicate(Condition: PPC::PRED_LT, Hint: PredHint);
1485
1486 return 0;
1487}
1488
1489// This takes a Phi node and returns a register value for the specified BB.
1490static unsigned getIncomingRegForBlock(MachineInstr *Phi,
1491 MachineBasicBlock *MBB) {
1492 for (unsigned I = 2, E = Phi->getNumOperands() + 1; I != E; I += 2) {
1493 MachineOperand &MO = Phi->getOperand(i: I);
1494 if (MO.getMBB() == MBB)
1495 return Phi->getOperand(i: I-1).getReg();
1496 }
1497 llvm_unreachable("invalid src basic block for this Phi node\n");
1498 return 0;
1499}
1500
1501// This function tracks the source of the register through register copy.
1502// If BB1 and BB2 are non-NULL, we also track PHI instruction in BB2
1503// assuming that the control comes from BB1 into BB2.
1504static unsigned getSrcVReg(unsigned Reg, MachineBasicBlock *BB1,
1505 MachineBasicBlock *BB2, MachineRegisterInfo *MRI) {
1506 unsigned SrcReg = Reg;
1507 while (true) {
1508 unsigned NextReg = SrcReg;
1509 MachineInstr *Inst = MRI->getVRegDef(Reg: SrcReg);
1510 if (BB1 && Inst->getOpcode() == PPC::PHI && Inst->getParent() == BB2) {
1511 NextReg = getIncomingRegForBlock(Phi: Inst, MBB: BB1);
1512 // We track through PHI only once to avoid infinite loop.
1513 BB1 = nullptr;
1514 }
1515 else if (Inst->isFullCopy())
1516 NextReg = Inst->getOperand(i: 1).getReg();
1517 if (NextReg == SrcReg || !Register::isVirtualRegister(Reg: NextReg))
1518 break;
1519 SrcReg = NextReg;
1520 }
1521 return SrcReg;
1522}
1523
1524static bool eligibleForCompareElimination(MachineBasicBlock &MBB,
1525 MachineBasicBlock *&PredMBB,
1526 MachineBasicBlock *&MBBtoMoveCmp,
1527 MachineRegisterInfo *MRI) {
1528
1529 auto isEligibleBB = [&](MachineBasicBlock &BB) {
1530 auto BII = BB.getFirstInstrTerminator();
1531 // We optimize BBs ending with a conditional branch.
1532 // We check only for BCC here, not BCCLR, because BCCLR
1533 // will be formed only later in the pipeline.
1534 if (BB.succ_size() == 2 &&
1535 BII != BB.instr_end() &&
1536 (*BII).getOpcode() == PPC::BCC &&
1537 (*BII).getOperand(i: 1).isReg()) {
1538 // We optimize only if the condition code is used only by one BCC.
1539 Register CndReg = (*BII).getOperand(i: 1).getReg();
1540 if (!CndReg.isVirtual() || !MRI->hasOneNonDBGUse(RegNo: CndReg))
1541 return false;
1542
1543 MachineInstr *CMPI = MRI->getVRegDef(Reg: CndReg);
1544 // We assume compare and branch are in the same BB for ease of analysis.
1545 if (CMPI->getParent() != &BB)
1546 return false;
1547
1548 // We skip this BB if a physical register is used in comparison.
1549 for (MachineOperand &MO : CMPI->operands())
1550 if (MO.isReg() && !MO.getReg().isVirtual())
1551 return false;
1552
1553 return true;
1554 }
1555 return false;
1556 };
1557
1558 // If this BB has more than one successor, we can create a new BB and
1559 // move the compare instruction in the new BB.
1560 // So far, we do not move compare instruction to a BB having multiple
1561 // successors to avoid potentially increasing code size.
1562 auto isEligibleForMoveCmp = [](MachineBasicBlock &BB) {
1563 return BB.succ_size() == 1;
1564 };
1565
1566 if (!isEligibleBB(MBB))
1567 return false;
1568
1569 unsigned NumPredBBs = MBB.pred_size();
1570 if (NumPredBBs == 1) {
1571 MachineBasicBlock *TmpMBB = *MBB.pred_begin();
1572 if (isEligibleBB(*TmpMBB)) {
1573 PredMBB = TmpMBB;
1574 MBBtoMoveCmp = nullptr;
1575 return true;
1576 }
1577 }
1578 else if (NumPredBBs == 2) {
1579 // We check for partially redundant case.
1580 // So far, we support cases with only two predecessors
1581 // to avoid increasing the number of instructions.
1582 MachineBasicBlock::pred_iterator PI = MBB.pred_begin();
1583 MachineBasicBlock *Pred1MBB = *PI;
1584 MachineBasicBlock *Pred2MBB = *(PI+1);
1585
1586 if (isEligibleBB(*Pred1MBB) && isEligibleForMoveCmp(*Pred2MBB)) {
1587 // We assume Pred1MBB is the BB containing the compare to be merged and
1588 // Pred2MBB is the BB to which we will append a compare instruction.
1589 // Proceed as is if Pred1MBB is different from MBB.
1590 }
1591 else if (isEligibleBB(*Pred2MBB) && isEligibleForMoveCmp(*Pred1MBB)) {
1592 // We need to swap Pred1MBB and Pred2MBB to canonicalize.
1593 std::swap(a&: Pred1MBB, b&: Pred2MBB);
1594 }
1595 else return false;
1596
1597 if (Pred1MBB == &MBB)
1598 return false;
1599
1600 // Here, Pred2MBB is the BB to which we need to append a compare inst.
1601 // We cannot move the compare instruction if operands are not available
1602 // in Pred2MBB (i.e. defined in MBB by an instruction other than PHI).
1603 MachineInstr *BI = &*MBB.getFirstInstrTerminator();
1604 MachineInstr *CMPI = MRI->getVRegDef(Reg: BI->getOperand(i: 1).getReg());
1605 for (int I = 1; I <= 2; I++)
1606 if (CMPI->getOperand(i: I).isReg()) {
1607 MachineInstr *Inst = MRI->getVRegDef(Reg: CMPI->getOperand(i: I).getReg());
1608 if (Inst->getParent() == &MBB && Inst->getOpcode() != PPC::PHI)
1609 return false;
1610 }
1611
1612 PredMBB = Pred1MBB;
1613 MBBtoMoveCmp = Pred2MBB;
1614 return true;
1615 }
1616
1617 return false;
1618}
1619
1620// This function will iterate over the input map containing a pair of TOC save
1621// instruction and a flag. The flag will be set to false if the TOC save is
1622// proven redundant. This function will erase from the basic block all the TOC
1623// saves marked as redundant.
1624bool PPCMIPeephole::eliminateRedundantTOCSaves(
1625 std::map<MachineInstr *, bool> &TOCSaves) {
1626 bool Simplified = false;
1627 int NumKept = 0;
1628 for (auto TOCSave : TOCSaves) {
1629 if (!TOCSave.second) {
1630 TOCSave.first->eraseFromParent();
1631 RemoveTOCSave++;
1632 Simplified = true;
1633 } else {
1634 NumKept++;
1635 }
1636 }
1637
1638 if (NumKept > 1)
1639 MultiTOCSaves++;
1640
1641 return Simplified;
1642}
1643
1644// If multiple conditional branches are executed based on the (essentially)
1645// same comparison, we merge compare instructions into one and make multiple
1646// conditional branches on this comparison.
1647// For example,
1648// if (a == 0) { ... }
1649// else if (a < 0) { ... }
1650// can be executed by one compare and two conditional branches instead of
1651// two pairs of a compare and a conditional branch.
1652//
1653// This method merges two compare instructions in two MBBs and modifies the
1654// compare and conditional branch instructions if needed.
1655// For the above example, the input for this pass looks like:
1656// cmplwi r3, 0
1657// beq 0, .LBB0_3
1658// cmpwi r3, -1
1659// bgt 0, .LBB0_4
1660// So, before merging two compares, we need to modify these instructions as
1661// cmpwi r3, 0 ; cmplwi and cmpwi yield same result for beq
1662// beq 0, .LBB0_3
1663// cmpwi r3, 0 ; greather than -1 means greater or equal to 0
1664// bge 0, .LBB0_4
1665
1666bool PPCMIPeephole::eliminateRedundantCompare() {
1667 bool Simplified = false;
1668
1669 for (MachineBasicBlock &MBB2 : *MF) {
1670 MachineBasicBlock *MBB1 = nullptr, *MBBtoMoveCmp = nullptr;
1671
1672 // For fully redundant case, we select two basic blocks MBB1 and MBB2
1673 // as an optimization target if
1674 // - both MBBs end with a conditional branch,
1675 // - MBB1 is the only predecessor of MBB2, and
1676 // - compare does not take a physical register as a operand in both MBBs.
1677 // In this case, eligibleForCompareElimination sets MBBtoMoveCmp nullptr.
1678 //
1679 // As partially redundant case, we additionally handle if MBB2 has one
1680 // additional predecessor, which has only one successor (MBB2).
1681 // In this case, we move the compare instruction originally in MBB2 into
1682 // MBBtoMoveCmp. This partially redundant case is typically appear by
1683 // compiling a while loop; here, MBBtoMoveCmp is the loop preheader.
1684 //
1685 // Overview of CFG of related basic blocks
1686 // Fully redundant case Partially redundant case
1687 // -------- ---------------- --------
1688 // | MBB1 | (w/ 2 succ) | MBBtoMoveCmp | | MBB1 | (w/ 2 succ)
1689 // -------- ---------------- --------
1690 // | \ (w/ 1 succ) \ | \
1691 // | \ \ | \
1692 // | \ |
1693 // -------- --------
1694 // | MBB2 | (w/ 1 pred | MBB2 | (w/ 2 pred
1695 // -------- and 2 succ) -------- and 2 succ)
1696 // | \ | \
1697 // | \ | \
1698 //
1699 if (!eligibleForCompareElimination(MBB&: MBB2, PredMBB&: MBB1, MBBtoMoveCmp, MRI))
1700 continue;
1701
1702 MachineInstr *BI1 = &*MBB1->getFirstInstrTerminator();
1703 MachineInstr *CMPI1 = MRI->getVRegDef(Reg: BI1->getOperand(i: 1).getReg());
1704
1705 MachineInstr *BI2 = &*MBB2.getFirstInstrTerminator();
1706 MachineInstr *CMPI2 = MRI->getVRegDef(Reg: BI2->getOperand(i: 1).getReg());
1707 bool IsPartiallyRedundant = (MBBtoMoveCmp != nullptr);
1708
1709 // We cannot optimize an unsupported compare opcode or
1710 // a mix of 32-bit and 64-bit comparisons
1711 if (!isSupportedCmpOp(opCode: CMPI1->getOpcode()) ||
1712 !isSupportedCmpOp(opCode: CMPI2->getOpcode()) ||
1713 is64bitCmpOp(opCode: CMPI1->getOpcode()) != is64bitCmpOp(opCode: CMPI2->getOpcode()))
1714 continue;
1715
1716 unsigned NewOpCode = 0;
1717 unsigned NewPredicate1 = 0, NewPredicate2 = 0;
1718 int16_t Imm1 = 0, NewImm1 = 0, Imm2 = 0, NewImm2 = 0;
1719 bool SwapOperands = false;
1720
1721 if (CMPI1->getOpcode() != CMPI2->getOpcode()) {
1722 // Typically, unsigned comparison is used for equality check, but
1723 // we replace it with a signed comparison if the comparison
1724 // to be merged is a signed comparison.
1725 // In other cases of opcode mismatch, we cannot optimize this.
1726
1727 // We cannot change opcode when comparing against an immediate
1728 // if the most significant bit of the immediate is one
1729 // due to the difference in sign extension.
1730 auto CmpAgainstImmWithSignBit = [](MachineInstr *I) {
1731 if (!I->getOperand(i: 2).isImm())
1732 return false;
1733 int16_t Imm = (int16_t)I->getOperand(i: 2).getImm();
1734 return Imm < 0;
1735 };
1736
1737 if (isEqOrNe(BI: BI2) && !CmpAgainstImmWithSignBit(CMPI2) &&
1738 CMPI1->getOpcode() == getSignedCmpOpCode(opCode: CMPI2->getOpcode()))
1739 NewOpCode = CMPI1->getOpcode();
1740 else if (isEqOrNe(BI: BI1) && !CmpAgainstImmWithSignBit(CMPI1) &&
1741 getSignedCmpOpCode(opCode: CMPI1->getOpcode()) == CMPI2->getOpcode())
1742 NewOpCode = CMPI2->getOpcode();
1743 else continue;
1744 }
1745
1746 if (CMPI1->getOperand(i: 2).isReg() && CMPI2->getOperand(i: 2).isReg()) {
1747 // In case of comparisons between two registers, these two registers
1748 // must be same to merge two comparisons.
1749 unsigned Cmp1Operand1 = getSrcVReg(Reg: CMPI1->getOperand(i: 1).getReg(),
1750 BB1: nullptr, BB2: nullptr, MRI);
1751 unsigned Cmp1Operand2 = getSrcVReg(Reg: CMPI1->getOperand(i: 2).getReg(),
1752 BB1: nullptr, BB2: nullptr, MRI);
1753 unsigned Cmp2Operand1 = getSrcVReg(Reg: CMPI2->getOperand(i: 1).getReg(),
1754 BB1: MBB1, BB2: &MBB2, MRI);
1755 unsigned Cmp2Operand2 = getSrcVReg(Reg: CMPI2->getOperand(i: 2).getReg(),
1756 BB1: MBB1, BB2: &MBB2, MRI);
1757
1758 if (Cmp1Operand1 == Cmp2Operand1 && Cmp1Operand2 == Cmp2Operand2) {
1759 // Same pair of registers in the same order; ready to merge as is.
1760 }
1761 else if (Cmp1Operand1 == Cmp2Operand2 && Cmp1Operand2 == Cmp2Operand1) {
1762 // Same pair of registers in different order.
1763 // We reverse the predicate to merge compare instructions.
1764 PPC::Predicate Pred = (PPC::Predicate)BI2->getOperand(i: 0).getImm();
1765 NewPredicate2 = (unsigned)PPC::getSwappedPredicate(Opcode: Pred);
1766 // In case of partial redundancy, we need to swap operands
1767 // in another compare instruction.
1768 SwapOperands = true;
1769 }
1770 else continue;
1771 }
1772 else if (CMPI1->getOperand(i: 2).isImm() && CMPI2->getOperand(i: 2).isImm()) {
1773 // In case of comparisons between a register and an immediate,
1774 // the operand register must be same for two compare instructions.
1775 unsigned Cmp1Operand1 = getSrcVReg(Reg: CMPI1->getOperand(i: 1).getReg(),
1776 BB1: nullptr, BB2: nullptr, MRI);
1777 unsigned Cmp2Operand1 = getSrcVReg(Reg: CMPI2->getOperand(i: 1).getReg(),
1778 BB1: MBB1, BB2: &MBB2, MRI);
1779 if (Cmp1Operand1 != Cmp2Operand1)
1780 continue;
1781
1782 NewImm1 = Imm1 = (int16_t)CMPI1->getOperand(i: 2).getImm();
1783 NewImm2 = Imm2 = (int16_t)CMPI2->getOperand(i: 2).getImm();
1784
1785 // If immediate are not same, we try to adjust by changing predicate;
1786 // e.g. GT imm means GE (imm+1).
1787 if (Imm1 != Imm2 && (!isEqOrNe(BI: BI2) || !isEqOrNe(BI: BI1))) {
1788 int Diff = Imm1 - Imm2;
1789 if (Diff < -2 || Diff > 2)
1790 continue;
1791
1792 unsigned PredToInc1 = getPredicateToIncImm(BI: BI1, CMPI: CMPI1);
1793 unsigned PredToDec1 = getPredicateToDecImm(BI: BI1, CMPI: CMPI1);
1794 unsigned PredToInc2 = getPredicateToIncImm(BI: BI2, CMPI: CMPI2);
1795 unsigned PredToDec2 = getPredicateToDecImm(BI: BI2, CMPI: CMPI2);
1796 if (Diff == 2) {
1797 if (PredToInc2 && PredToDec1) {
1798 NewPredicate2 = PredToInc2;
1799 NewPredicate1 = PredToDec1;
1800 NewImm2++;
1801 NewImm1--;
1802 }
1803 }
1804 else if (Diff == 1) {
1805 if (PredToInc2) {
1806 NewImm2++;
1807 NewPredicate2 = PredToInc2;
1808 }
1809 else if (PredToDec1) {
1810 NewImm1--;
1811 NewPredicate1 = PredToDec1;
1812 }
1813 }
1814 else if (Diff == -1) {
1815 if (PredToDec2) {
1816 NewImm2--;
1817 NewPredicate2 = PredToDec2;
1818 }
1819 else if (PredToInc1) {
1820 NewImm1++;
1821 NewPredicate1 = PredToInc1;
1822 }
1823 }
1824 else if (Diff == -2) {
1825 if (PredToDec2 && PredToInc1) {
1826 NewPredicate2 = PredToDec2;
1827 NewPredicate1 = PredToInc1;
1828 NewImm2--;
1829 NewImm1++;
1830 }
1831 }
1832 }
1833
1834 // We cannot merge two compares if the immediates are not same.
1835 if (NewImm2 != NewImm1)
1836 continue;
1837 }
1838
1839 LLVM_DEBUG(dbgs() << "Optimize two pairs of compare and branch:\n");
1840 LLVM_DEBUG(CMPI1->dump());
1841 LLVM_DEBUG(BI1->dump());
1842 LLVM_DEBUG(CMPI2->dump());
1843 LLVM_DEBUG(BI2->dump());
1844 for (const MachineOperand &MO : CMPI1->operands())
1845 if (MO.isReg())
1846 addRegToUpdate(MO.getReg());
1847 for (const MachineOperand &MO : CMPI2->operands())
1848 if (MO.isReg())
1849 addRegToUpdate(MO.getReg());
1850
1851 // We adjust opcode, predicates and immediate as we determined above.
1852 if (NewOpCode != 0 && NewOpCode != CMPI1->getOpcode()) {
1853 CMPI1->setDesc(TII->get(Opcode: NewOpCode));
1854 }
1855 if (NewPredicate1) {
1856 BI1->getOperand(i: 0).setImm(NewPredicate1);
1857 }
1858 if (NewPredicate2) {
1859 BI2->getOperand(i: 0).setImm(NewPredicate2);
1860 }
1861 if (NewImm1 != Imm1) {
1862 CMPI1->getOperand(i: 2).setImm(NewImm1);
1863 }
1864
1865 if (IsPartiallyRedundant) {
1866 // We touch up the compare instruction in MBB2 and move it to
1867 // a previous BB to handle partially redundant case.
1868 if (SwapOperands) {
1869 Register Op1 = CMPI2->getOperand(i: 1).getReg();
1870 Register Op2 = CMPI2->getOperand(i: 2).getReg();
1871 CMPI2->getOperand(i: 1).setReg(Op2);
1872 CMPI2->getOperand(i: 2).setReg(Op1);
1873 }
1874 if (NewImm2 != Imm2)
1875 CMPI2->getOperand(i: 2).setImm(NewImm2);
1876
1877 for (int I = 1; I <= 2; I++) {
1878 if (CMPI2->getOperand(i: I).isReg()) {
1879 MachineInstr *Inst = MRI->getVRegDef(Reg: CMPI2->getOperand(i: I).getReg());
1880 if (Inst->getParent() != &MBB2)
1881 continue;
1882
1883 assert(Inst->getOpcode() == PPC::PHI &&
1884 "We cannot support if an operand comes from this BB.");
1885 unsigned SrcReg = getIncomingRegForBlock(Phi: Inst, MBB: MBBtoMoveCmp);
1886 CMPI2->getOperand(i: I).setReg(SrcReg);
1887 addRegToUpdate(SrcReg);
1888 }
1889 }
1890 auto I = MachineBasicBlock::iterator(MBBtoMoveCmp->getFirstTerminator());
1891 MBBtoMoveCmp->splice(Where: I, Other: &MBB2, From: MachineBasicBlock::iterator(CMPI2));
1892
1893 DebugLoc DL = CMPI2->getDebugLoc();
1894 Register NewVReg = MRI->createVirtualRegister(RegClass: &PPC::CRRCRegClass);
1895 BuildMI(BB&: MBB2, I: MBB2.begin(), MIMD: DL,
1896 MCID: TII->get(Opcode: PPC::PHI), DestReg: NewVReg)
1897 .addReg(RegNo: BI1->getOperand(i: 1).getReg()).addMBB(MBB: MBB1)
1898 .addReg(RegNo: BI2->getOperand(i: 1).getReg()).addMBB(MBB: MBBtoMoveCmp);
1899 BI2->getOperand(i: 1).setReg(NewVReg);
1900 addRegToUpdate(NewVReg);
1901 }
1902 else {
1903 // We finally eliminate compare instruction in MBB2.
1904 // We do not need to treat CMPI2 specially here in terms of re-computing
1905 // live variables even though it is being deleted because:
1906 // - It defines a register that has a single use (already checked in
1907 // eligibleForCompareElimination())
1908 // - The only user (BI2) is no longer using it so the register is dead (no
1909 // def, no uses)
1910 // - We do not attempt to recompute live variables for dead registers
1911 BI2->getOperand(i: 1).setReg(BI1->getOperand(i: 1).getReg());
1912 CMPI2->eraseFromParent();
1913 }
1914
1915 LLVM_DEBUG(dbgs() << "into a compare and two branches:\n");
1916 LLVM_DEBUG(CMPI1->dump());
1917 LLVM_DEBUG(BI1->dump());
1918 LLVM_DEBUG(BI2->dump());
1919 if (IsPartiallyRedundant) {
1920 LLVM_DEBUG(dbgs() << "The following compare is moved into "
1921 << printMBBReference(*MBBtoMoveCmp)
1922 << " to handle partial redundancy.\n");
1923 LLVM_DEBUG(CMPI2->dump());
1924 }
1925 Simplified = true;
1926 }
1927
1928 return Simplified;
1929}
1930
1931// We miss the opportunity to emit an RLDIC when lowering jump tables
1932// since ISEL sees only a single basic block. When selecting, the clear
1933// and shift left will be in different blocks.
1934bool PPCMIPeephole::emitRLDICWhenLoweringJumpTables(MachineInstr &MI,
1935 MachineInstr *&ToErase) {
1936 if (MI.getOpcode() != PPC::RLDICR)
1937 return false;
1938
1939 Register SrcReg = MI.getOperand(i: 1).getReg();
1940 if (!SrcReg.isVirtual())
1941 return false;
1942
1943 MachineInstr *SrcMI = MRI->getVRegDef(Reg: SrcReg);
1944 if (SrcMI->getOpcode() != PPC::RLDICL)
1945 return false;
1946
1947 MachineOperand MOpSHSrc = SrcMI->getOperand(i: 2);
1948 MachineOperand MOpMBSrc = SrcMI->getOperand(i: 3);
1949 MachineOperand MOpSHMI = MI.getOperand(i: 2);
1950 MachineOperand MOpMEMI = MI.getOperand(i: 3);
1951 if (!(MOpSHSrc.isImm() && MOpMBSrc.isImm() && MOpSHMI.isImm() &&
1952 MOpMEMI.isImm()))
1953 return false;
1954
1955 uint64_t SHSrc = MOpSHSrc.getImm();
1956 uint64_t MBSrc = MOpMBSrc.getImm();
1957 uint64_t SHMI = MOpSHMI.getImm();
1958 uint64_t MEMI = MOpMEMI.getImm();
1959 uint64_t NewSH = SHSrc + SHMI;
1960 uint64_t NewMB = MBSrc - SHMI;
1961 if (NewMB > 63 || NewSH > 63)
1962 return false;
1963
1964 // The bits cleared with RLDICL are [0, MBSrc).
1965 // The bits cleared with RLDICR are (MEMI, 63].
1966 // After the sequence, the bits cleared are:
1967 // [0, MBSrc-SHMI) and (MEMI, 63).
1968 //
1969 // The bits cleared with RLDIC are [0, NewMB) and (63-NewSH, 63].
1970 if ((63 - NewSH) != MEMI)
1971 return false;
1972
1973 LLVM_DEBUG(dbgs() << "Converting pair: ");
1974 LLVM_DEBUG(SrcMI->dump());
1975 LLVM_DEBUG(MI.dump());
1976
1977 MI.setDesc(TII->get(Opcode: PPC::RLDIC));
1978 MI.getOperand(i: 1).setReg(SrcMI->getOperand(i: 1).getReg());
1979 MI.getOperand(i: 2).setImm(NewSH);
1980 MI.getOperand(i: 3).setImm(NewMB);
1981 addRegToUpdate(MI.getOperand(1).getReg());
1982 addRegToUpdate(SrcMI->getOperand(0).getReg());
1983
1984 LLVM_DEBUG(dbgs() << "To: ");
1985 LLVM_DEBUG(MI.dump());
1986 NumRotatesCollapsed++;
1987 // If SrcReg has no non-debug use it's safe to delete its def SrcMI.
1988 if (MRI->use_nodbg_empty(RegNo: SrcReg)) {
1989 assert(!SrcMI->hasImplicitDef() &&
1990 "Not expecting an implicit def with this instr.");
1991 ToErase = SrcMI;
1992 }
1993 return true;
1994}
1995
1996// For case in LLVM IR
1997// entry:
1998// %iconv = sext i32 %index to i64
1999// br i1 undef label %true, label %false
2000// true:
2001// %ptr = getelementptr inbounds i32, i32* null, i64 %iconv
2002// ...
2003// PPCISelLowering::combineSHL fails to combine, because sext and shl are in
2004// different BBs when conducting instruction selection. We can do a peephole
2005// optimization to combine these two instructions into extswsli after
2006// instruction selection.
2007bool PPCMIPeephole::combineSEXTAndSHL(MachineInstr &MI,
2008 MachineInstr *&ToErase) {
2009 if (MI.getOpcode() != PPC::RLDICR)
2010 return false;
2011
2012 if (!MF->getSubtarget<PPCSubtarget>().isISA3_0())
2013 return false;
2014
2015 assert(MI.getNumOperands() == 4 && "RLDICR should have 4 operands");
2016
2017 MachineOperand MOpSHMI = MI.getOperand(i: 2);
2018 MachineOperand MOpMEMI = MI.getOperand(i: 3);
2019 if (!(MOpSHMI.isImm() && MOpMEMI.isImm()))
2020 return false;
2021
2022 uint64_t SHMI = MOpSHMI.getImm();
2023 uint64_t MEMI = MOpMEMI.getImm();
2024 if (SHMI + MEMI != 63)
2025 return false;
2026
2027 Register SrcReg = MI.getOperand(i: 1).getReg();
2028 if (!SrcReg.isVirtual())
2029 return false;
2030
2031 MachineInstr *SrcMI = MRI->getVRegDef(Reg: SrcReg);
2032 if (SrcMI->getOpcode() != PPC::EXTSW &&
2033 SrcMI->getOpcode() != PPC::EXTSW_32_64)
2034 return false;
2035
2036 // If the register defined by extsw has more than one use, combination is not
2037 // needed.
2038 if (!MRI->hasOneNonDBGUse(RegNo: SrcReg))
2039 return false;
2040
2041 assert(SrcMI->getNumOperands() == 2 && "EXTSW should have 2 operands");
2042 assert(SrcMI->getOperand(1).isReg() &&
2043 "EXTSW's second operand should be a register");
2044 if (!SrcMI->getOperand(i: 1).getReg().isVirtual())
2045 return false;
2046
2047 LLVM_DEBUG(dbgs() << "Combining pair: ");
2048 LLVM_DEBUG(SrcMI->dump());
2049 LLVM_DEBUG(MI.dump());
2050
2051 MachineInstr *NewInstr =
2052 BuildMI(BB&: *MI.getParent(), I: &MI, MIMD: MI.getDebugLoc(),
2053 MCID: SrcMI->getOpcode() == PPC::EXTSW ? TII->get(Opcode: PPC::EXTSWSLI)
2054 : TII->get(Opcode: PPC::EXTSWSLI_32_64),
2055 DestReg: MI.getOperand(i: 0).getReg())
2056 .add(MO: SrcMI->getOperand(i: 1))
2057 .add(MO: MOpSHMI);
2058 (void)NewInstr;
2059
2060 LLVM_DEBUG(dbgs() << "TO: ");
2061 LLVM_DEBUG(NewInstr->dump());
2062 ++NumEXTSWAndSLDICombined;
2063 ToErase = &MI;
2064 // SrcMI, which is extsw, is of no use now, but we don't erase it here so we
2065 // can recompute its kill flags. We run DCE immediately after this pass
2066 // to clean up dead instructions such as this.
2067 addRegToUpdate(NewInstr->getOperand(1).getReg());
2068 addRegToUpdate(SrcMI->getOperand(0).getReg());
2069 return true;
2070}
2071
2072} // end default namespace
2073
2074INITIALIZE_PASS_BEGIN(PPCMIPeephole, DEBUG_TYPE,
2075 "PowerPC MI Peephole Optimization", false, false)
2076INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfoWrapperPass)
2077INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
2078INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTreeWrapperPass)
2079INITIALIZE_PASS_DEPENDENCY(LiveVariablesWrapperPass)
2080INITIALIZE_PASS_END(PPCMIPeephole, DEBUG_TYPE,
2081 "PowerPC MI Peephole Optimization", false, false)
2082
2083char PPCMIPeephole::ID = 0;
2084FunctionPass*
2085llvm::createPPCMIPeepholePass() { return new PPCMIPeephole(); }
2086