PPCInstrInfo.cpp source code [llvm_projects/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp]

1	//===-- PPCInstrInfo.cpp - PowerPC Instruction Information ----------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file contains the PowerPC implementation of the TargetInstrInfo class.
10	//
11	//===----------------------------------------------------------------------===//
12
13	#include "PPCInstrInfo.h"
14	#include "MCTargetDesc/PPCPredicates.h"
15	#include "PPC.h"
16	#include "PPCHazardRecognizers.h"
17	#include "PPCInstrBuilder.h"
18	#include "PPCMachineFunctionInfo.h"
19	#include "PPCTargetMachine.h"
20	#include "llvm/ADT/STLExtras.h"
21	#include "llvm/ADT/Statistic.h"
22	#include "llvm/CodeGen/LiveIntervals.h"
23	#include "llvm/CodeGen/LivePhysRegs.h"
24	#include "llvm/CodeGen/MachineCombinerPattern.h"
25	#include "llvm/CodeGen/MachineConstantPool.h"
26	#include "llvm/CodeGen/MachineFrameInfo.h"
27	#include "llvm/CodeGen/MachineInstrBuilder.h"
28	#include "llvm/CodeGen/MachineMemOperand.h"
29	#include "llvm/CodeGen/MachineRegisterInfo.h"
30	#include "llvm/CodeGen/PseudoSourceValue.h"
31	#include "llvm/CodeGen/RegisterClassInfo.h"
32	#include "llvm/CodeGen/RegisterPressure.h"
33	#include "llvm/CodeGen/ScheduleDAG.h"
34	#include "llvm/CodeGen/SlotIndexes.h"
35	#include "llvm/CodeGen/StackMaps.h"
36	#include "llvm/IR/Module.h"
37	#include "llvm/MC/MCInst.h"
38	#include "llvm/MC/TargetRegistry.h"
39	#include "llvm/Support/CommandLine.h"
40	#include "llvm/Support/Debug.h"
41	#include "llvm/Support/ErrorHandling.h"
42	#include "llvm/Support/raw_ostream.h"
43
44	using namespace llvm;
45
46	#define DEBUG_TYPE "ppc-instr-info"
47
48	#define GET_INSTRMAP_INFO
49	#define GET_INSTRINFO_CTOR_DTOR
50	#include "PPCGenInstrInfo.inc"
51
52	STATISTIC(NumStoreSPILLVSRRCAsVec,
53	"Number of spillvsrrc spilled to stack as vec");
54	STATISTIC(NumStoreSPILLVSRRCAsGpr,
55	"Number of spillvsrrc spilled to stack as gpr");
56	STATISTIC(NumGPRtoVSRSpill, "Number of gpr spills to spillvsrrc");
57	STATISTIC(CmpIselsConverted,
58	"Number of ISELs that depend on comparison of constants converted");
59	STATISTIC(MissedConvertibleImmediateInstrs,
60	"Number of compare-immediate instructions fed by constants");
61	STATISTIC(NumRcRotatesConvertedToRcAnd,
62	"Number of record-form rotates converted to record-form andi");
63
64	static cl::
65	opt<bool> DisableCTRLoopAnal("disable-ppc-ctrloop-analysis", cl::Hidden,
66	cl::desc ("Disable analysis for CTR loops"));
67
68	static cl::opt<bool> DisableCmpOpt("disable-ppc-cmp-opt",
69	cl::desc ("Disable compare instruction optimization"), cl::Hidden);
70
71	static cl::opt<bool> VSXSelfCopyCrash("crash-on-ppc-vsx-self-copy",
72	cl::desc ("Causes the backend to crash instead of generating a nop VSX copy"),
73	cl::Hidden);
74
75	static cl::opt<bool>
76	UseOldLatencyCalc("ppc-old-latency-calc", cl::Hidden,
77	cl::desc ("Use the old (incorrect) instruction latency calculation"));
78
79	static cl::opt<float>
80	FMARPFactor("ppc-fma-rp-factor", cl::Hidden, cl::init(Val: `1.5`),
81	cl::desc ("register pressure factor for the transformations."));
82
83	static cl::opt<bool> EnableFMARegPressureReduction(
84	"ppc-fma-rp-reduction", cl::Hidden, cl::init(Val: true),
85	cl::desc ("enable register pressure reduce in machine combiner pass."));
86
87	// Pin the vtable to this file.
88	void PPCInstrInfo::anchor() {}
89
90	PPCInstrInfo::PPCInstrInfo(PPCSubtarget &STI)
91	: PPCGenInstrInfo (PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP,
92	/ CatchRetOpcode / -`1`,
93	STI.isPPC64() ? PPC::BLR8 : PPC::BLR),
94	Subtarget(STI), RI (STI.getTargetMachine()) {}
95
96	/// CreateTargetHazardRecognizer - Return the hazard recognizer to use for
97	/// this target when scheduling the DAG.
98	ScheduleHazardRecognizer *
99	PPCInstrInfo::CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,
100	const ScheduleDAG DAG) const* {
101	unsigned Directive =
102	static_cast<const PPCSubtarget *>(STI)->getCPUDirective();
103	if (Directive == PPC::DIR_440 \|\| Directive == PPC::DIR_A2 \|\|
104	Directive == PPC::DIR_E500mc \|\| Directive == PPC::DIR_E5500) {
105	const InstrItineraryData *II =
106	static_cast<const PPCSubtarget *>(STI)->getInstrItineraryData();
107	return new ScoreboardHazardRecognizer (II, DAG);
108	}
109
110	return TargetInstrInfo::CreateTargetHazardRecognizer(STI, DAG);
111	}
112
113	/// CreateTargetPostRAHazardRecognizer - Return the postRA hazard recognizer
114	/// to use for this target when scheduling the DAG.
115	ScheduleHazardRecognizer *
116	PPCInstrInfo::CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
117	const ScheduleDAG DAG) const* {
118	unsigned Directive =
119	DAG->MF.getSubtarget<PPCSubtarget>().getCPUDirective();
120
121	// FIXME: Leaving this as-is until we have POWER9 scheduling info
122	if (Directive == PPC::DIR_PWR7 \|\| Directive == PPC::DIR_PWR8)
123	return new PPCDispatchGroupSBHazardRecognizer (II, DAG);
124
125	// Most subtargets use a PPC970 recognizer.
126	if (Directive != PPC::DIR_440 && Directive != PPC::DIR_A2 &&
127	Directive != PPC::DIR_E500mc && Directive != PPC::DIR_E5500) {
128	assert(DAG->TII && "No InstrInfo?");
129
130	return new PPCHazardRecognizer970 (*DAG);
131	}
132
133	return new ScoreboardHazardRecognizer (II, DAG);
134	}
135
136	unsigned PPCInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
137	const MachineInstr &MI,
138	unsigned PredCost) const* {
139	if (!ItinData \|\| UseOldLatencyCalc)
140	return PPCGenInstrInfo::getInstrLatency(ItinData, MI, PredCost);
141
142	// The default implementation of getInstrLatency calls getStageLatency, but
143	// getStageLatency does not do the right thing for us. While we have
144	// itinerary, most cores are fully pipelined, and so the itineraries only
145	// express the first part of the pipeline, not every stage. Instead, we need
146	// to use the listed output operand cycle number (using operand 0 here, which
147	// is an output).
148
149	unsigned Latency = `1`;
150	unsigned DefClass = MI.getDesc().getSchedClass();
151	for (unsigned i = `0`, e = MI.getNumOperands(); i != e; ++i) {
152	const MachineOperand &MO = MI.getOperand(i);
153	if (!MO.isReg() \|\| !MO.isDef() \|\| MO.isImplicit())
154	continue;
155
156	std::optional<unsigned> Cycle = ItinData->getOperandCycle(ItinClassIndx: DefClass, OperandIdx: i);
157	if (!Cycle)
158	continue;
159
160	Latency = std::max(a: Latency, b: *Cycle);
161	}
162
163	return Latency;
164	}
165
166	std::optional<unsigned> PPCInstrInfo::getOperandLatency(
167	const InstrItineraryData ItinData, const* MachineInstr &DefMI,
168	unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const {
169	std::optional<unsigned> Latency = PPCGenInstrInfo::getOperandLatency(
170	ItinData, DefMI, DefIdx, UseMI, UseIdx);
171
172	if (!DefMI.getParent())
173	return Latency;
174
175	const MachineOperand &DefMO = DefMI.getOperand(i: DefIdx);
176	Register Reg = DefMO.getReg();
177
178	bool IsRegCR;
179	if (Reg.isVirtual()) {
180	const MachineRegisterInfo *MRI =
181	&DefMI.getParent()->getParent()->getRegInfo();
182	IsRegCR = MRI->getRegClass(Reg)->hasSuperClassEq(RC: &PPC::CRRCRegClass) \|\|
183	MRI->getRegClass(Reg)->hasSuperClassEq(RC: &PPC::CRBITRCRegClass);
184	} else {
185	IsRegCR = PPC::CRRCRegClass.contains(Reg) \|\|
186	PPC::CRBITRCRegClass.contains(Reg);
187	}
188
189	if (UseMI.isBranch() && IsRegCR) {
190	if (!Latency)
191	Latency = getInstrLatency(ItinData, MI: DefMI);
192
193	// On some cores, there is an additional delay between writing to a condition
194	// register, and using it from a branch.
195	unsigned Directive = Subtarget.getCPUDirective();
196	switch (Directive) {
197	default: break;
198	case PPC::DIR_7400:
199	case PPC::DIR_750:
200	case PPC::DIR_970:
201	case PPC::DIR_E5500:
202	case PPC::DIR_PWR4:
203	case PPC::DIR_PWR5:
204	case PPC::DIR_PWR5X:
205	case PPC::DIR_PWR6:
206	case PPC::DIR_PWR6X:
207	case PPC::DIR_PWR7:
208	case PPC::DIR_PWR8:
209	// FIXME: Is this needed for POWER9?
210	Latency = *Latency + `2`;
211	break;
212	}
213	}
214
215	return Latency;
216	}
217
218	void PPCInstrInfo::setSpecialOperandAttr(MachineInstr &MI,
219	uint32_t Flags) const {
220	MI.setFlags(Flags);
221	MI.clearFlag(Flag: MachineInstr::MIFlag::NoSWrap);
222	MI.clearFlag(Flag: MachineInstr::MIFlag::NoUWrap);
223	MI.clearFlag(Flag: MachineInstr::MIFlag::IsExact);
224	}
225
226	// This function does not list all associative and commutative operations, but
227	// only those worth feeding through the machine combiner in an attempt to
228	// reduce the critical path. Mostly, this means floating-point operations,
229	// because they have high latencies(>=5) (compared to other operations, such as
230	// and/or, which are also associative and commutative, but have low latencies).
231	bool PPCInstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst,
232	bool Invert) const {
233	if (Invert)
234	return false;
235	switch (Inst.getOpcode()) {
236	// Floating point:
237	// FP Add:
238	case PPC::FADD:
239	case PPC::FADDS:
240	// FP Multiply:
241	case PPC::FMUL:
242	case PPC::FMULS:
243	// Altivec Add:
244	case PPC::VADDFP:
245	// VSX Add:
246	case PPC::XSADDDP:
247	case PPC::XVADDDP:
248	case PPC::XVADDSP:
249	case PPC::XSADDSP:
250	// VSX Multiply:
251	case PPC::XSMULDP:
252	case PPC::XVMULDP:
253	case PPC::XVMULSP:
254	case PPC::XSMULSP:
255	return Inst.getFlag(Flag: MachineInstr::MIFlag::FmReassoc) &&
256	Inst.getFlag(Flag: MachineInstr::MIFlag::FmNsz);
257	// Fixed point:
258	// Multiply:
259	case PPC::MULHD:
260	case PPC::MULLD:
261	case PPC::MULHW:
262	case PPC::MULLW:
263	return true;
264	default:
265	return false;
266	}
267	}
268
269	#define InfoArrayIdxFMAInst 0
270	#define InfoArrayIdxFAddInst 1
271	#define InfoArrayIdxFMULInst 2
272	#define InfoArrayIdxAddOpIdx 3
273	#define InfoArrayIdxMULOpIdx 4
274	#define InfoArrayIdxFSubInst 5
275	// Array keeps info for FMA instructions:
276	// Index 0(InfoArrayIdxFMAInst): FMA instruction;
277	// Index 1(InfoArrayIdxFAddInst): ADD instruction associated with FMA;
278	// Index 2(InfoArrayIdxFMULInst): MUL instruction associated with FMA;
279	// Index 3(InfoArrayIdxAddOpIdx): ADD operand index in FMA operands;
280	// Index 4(InfoArrayIdxMULOpIdx): first MUL operand index in FMA operands;
281	// second MUL operand index is plus 1;
282	// Index 5(InfoArrayIdxFSubInst): SUB instruction associated with FMA.
283	static const uint16_t FMAOpIdxInfo[][`6`] = {
284	// FIXME: Add more FMA instructions like XSNMADDADP and so on.
285	{PPC::XSMADDADP, PPC::XSADDDP, PPC::XSMULDP, `1`, `2`, PPC::XSSUBDP},
286	{PPC::XSMADDASP, PPC::XSADDSP, PPC::XSMULSP, `1`, `2`, PPC::XSSUBSP},
287	{PPC::XVMADDADP, PPC::XVADDDP, PPC::XVMULDP, `1`, `2`, PPC::XVSUBDP},
288	{PPC::XVMADDASP, PPC::XVADDSP, PPC::XVMULSP, `1`, `2`, PPC::XVSUBSP},
289	{PPC::FMADD, PPC::FADD, PPC::FMUL, `3`, `1`, PPC::FSUB},
290	{PPC::FMADDS, PPC::FADDS, PPC::FMULS, `3`, `1`, PPC::FSUBS}};
291
292	// Check if an opcode is a FMA instruction. If it is, return the index in array
293	// FMAOpIdxInfo. Otherwise, return -1.
294	int16_t PPCInstrInfo::getFMAOpIdxInfo(unsigned Opcode) const {
295	for (unsigned I = `0`; I < std::size(FMAOpIdxInfo); I++)
296	if (FMAOpIdxInfo[I][InfoArrayIdxFMAInst] == Opcode)
297	return I;
298	return -`1`;
299	}
300
301	// On PowerPC target, we have two kinds of patterns related to FMA:
302	// 1: Improve ILP.
303	// Try to reassociate FMA chains like below:
304	//
305	// Pattern 1:
306	// A = FADD X, Y (Leaf)
307	// B = FMA A, M21, M22 (Prev)
308	// C = FMA B, M31, M32 (Root)
309	// -->
310	// A = FMA X, M21, M22
311	// B = FMA Y, M31, M32
312	// C = FADD A, B
313	//
314	// Pattern 2:
315	// A = FMA X, M11, M12 (Leaf)
316	// B = FMA A, M21, M22 (Prev)
317	// C = FMA B, M31, M32 (Root)
318	// -->
319	// A = FMUL M11, M12
320	// B = FMA X, M21, M22
321	// D = FMA A, M31, M32
322	// C = FADD B, D
323	//
324	// breaking the dependency between A and B, allowing FMA to be executed in
325	// parallel (or back-to-back in a pipeline) instead of depending on each other.
326	//
327	// 2: Reduce register pressure.
328	// Try to reassociate FMA with FSUB and a constant like below:
329	// C is a floating point const.
330	//
331	// Pattern 1:
332	// A = FSUB X, Y (Leaf)
333	// D = FMA B, C, A (Root)
334	// -->
335	// A = FMA B, Y, -C
336	// D = FMA A, X, C
337	//
338	// Pattern 2:
339	// A = FSUB X, Y (Leaf)
340	// D = FMA B, A, C (Root)
341	// -->
342	// A = FMA B, Y, -C
343	// D = FMA A, X, C
344	//
345	// Before the transformation, A must be assigned with different hardware
346	// register with D. After the transformation, A and D must be assigned with
347	// same hardware register due to TIE attribute of FMA instructions.
348	//
349	bool PPCInstrInfo::getFMAPatterns(MachineInstr &Root,
350	SmallVectorImpl<unsigned> &Patterns,
351	bool DoRegPressureReduce) const {
352	MachineBasicBlock *MBB = Root.getParent();
353	const MachineRegisterInfo *MRI = &MBB->getParent()->getRegInfo();
354	const TargetRegisterInfo *TRI = &getRegisterInfo();
355
356	auto IsAllOpsVirtualReg = [](const MachineInstr &Instr) {
357	for (const auto &MO : Instr.explicit_operands())
358	if (!(MO.isReg() && MO.getReg().isVirtual()))
359	return false;
360	return true;
361	};
362
363	auto IsReassociableAddOrSub = [&](const MachineInstr &Instr,
364	unsigned OpType) {
365	if (Instr.getOpcode() !=
366	FMAOpIdxInfo[getFMAOpIdxInfo(Opcode: Root.getOpcode())][OpType])
367	return false;
368
369	// Instruction can be reassociated.
370	// fast math flags may prohibit reassociation.
371	if (!(Instr.getFlag(Flag: MachineInstr::MIFlag::FmReassoc) &&
372	Instr.getFlag(Flag: MachineInstr::MIFlag::FmNsz)))
373	return false;
374
375	// Instruction operands are virtual registers for reassociation.
376	if (!IsAllOpsVirtualReg (Instr))
377	return false;
378
379	// For register pressure reassociation, the FSub must have only one use as
380	// we want to delete the sub to save its def.
381	if (OpType == InfoArrayIdxFSubInst &&
382	!MRI->hasOneNonDBGUse(RegNo: Instr.getOperand(i: `0`).getReg()))
383	return false;
384
385	return true;
386	};
387
388	auto IsReassociableFMA = [&](const MachineInstr &Instr, int16_t &AddOpIdx,
389	int16_t &MulOpIdx, bool IsLeaf) {
390	int16_t Idx = getFMAOpIdxInfo(Opcode: Instr.getOpcode());
391	if (Idx < `0`)
392	return false;
393
394	// Instruction can be reassociated.
395	// fast math flags may prohibit reassociation.
396	if (!(Instr.getFlag(Flag: MachineInstr::MIFlag::FmReassoc) &&
397	Instr.getFlag(Flag: MachineInstr::MIFlag::FmNsz)))
398	return false;
399
400	// Instruction operands are virtual registers for reassociation.
401	if (!IsAllOpsVirtualReg (Instr))
402	return false;
403
404	MulOpIdx = FMAOpIdxInfo[Idx][InfoArrayIdxMULOpIdx];
405	if (IsLeaf)
406	return true;
407
408	AddOpIdx = FMAOpIdxInfo[Idx][InfoArrayIdxAddOpIdx];
409
410	const MachineOperand &OpAdd = Instr.getOperand(i: AddOpIdx);
411	MachineInstr *MIAdd = MRI->getUniqueVRegDef(Reg: OpAdd.getReg());
412	// If 'add' operand's def is not in current block, don't do ILP related opt.
413	if (!MIAdd \|\| MIAdd->getParent() != MBB)
414	return false;
415
416	// If this is not Leaf FMA Instr, its 'add' operand should only have one use
417	// as this fma will be changed later.
418	return IsLeaf ? true : MRI->hasOneNonDBGUse(RegNo: OpAdd.getReg());
419	};
420
421	int16_t AddOpIdx = -`1`;
422	int16_t MulOpIdx = -`1`;
423
424	bool IsUsedOnceL = false;
425	bool IsUsedOnceR = false;
426	MachineInstr MULInstrL = nullptr*;
427	MachineInstr MULInstrR = nullptr*;
428
429	auto IsRPReductionCandidate = [&]() {
430	// Currently, we only support float and double.
431	// FIXME: add support for other types.
432	unsigned Opcode = Root.getOpcode();
433	if (Opcode != PPC::XSMADDASP && Opcode != PPC::XSMADDADP)
434	return false;
435
436	// Root must be a valid FMA like instruction.
437	// Treat it as leaf as we don't care its add operand.
438	if (IsReassociableFMA (Root, AddOpIdx, MulOpIdx, true)) {
439	assert((MulOpIdx >= `0`) && "mul operand index not right!");
440	Register MULRegL = TRI->lookThruSingleUseCopyChain(
441	SrcReg: Root.getOperand(i: MulOpIdx).getReg(), MRI);
442	Register MULRegR = TRI->lookThruSingleUseCopyChain(
443	SrcReg: Root.getOperand(i: MulOpIdx + `1`).getReg(), MRI);
444	if (!MULRegL && !MULRegR)
445	return false;
446
447	if (MULRegL && !MULRegR) {
448	MULRegR =
449	TRI->lookThruCopyLike(SrcReg: Root.getOperand(i: MulOpIdx + `1`).getReg(), MRI);
450	IsUsedOnceL = true;
451	} else if (!MULRegL && MULRegR) {
452	MULRegL =
453	TRI->lookThruCopyLike(SrcReg: Root.getOperand(i: MulOpIdx).getReg(), MRI);
454	IsUsedOnceR = true;
455	} else {
456	IsUsedOnceL = true;
457	IsUsedOnceR = true;
458	}
459
460	if (!MULRegL.isVirtual() \|\| !MULRegR.isVirtual())
461	return false;
462
463	MULInstrL = MRI->getVRegDef(Reg: MULRegL);
464	MULInstrR = MRI->getVRegDef(Reg: MULRegR);
465	return true;
466	}
467	return false;
468	};
469
470	// Register pressure fma reassociation patterns.
471	if (DoRegPressureReduce && IsRPReductionCandidate ()) {
472	assert((MULInstrL && MULInstrR) && "wrong register preduction candidate!");
473	// Register pressure pattern 1
474	if (isLoadFromConstantPool(I: MULInstrL) && IsUsedOnceR &&
475	IsReassociableAddOrSub (*MULInstrR, InfoArrayIdxFSubInst)) {
476	LLVM_DEBUG(dbgs() << "add pattern REASSOC_XY_BCA\n");
477	Patterns.push_back(Elt: PPCMachineCombinerPattern::REASSOC_XY_BCA);
478	return true;
479	}
480
481	// Register pressure pattern 2
482	if ((isLoadFromConstantPool(I: MULInstrR) && IsUsedOnceL &&
483	IsReassociableAddOrSub (*MULInstrL, InfoArrayIdxFSubInst))) {
484	LLVM_DEBUG(dbgs() << "add pattern REASSOC_XY_BAC\n");
485	Patterns.push_back(Elt: PPCMachineCombinerPattern::REASSOC_XY_BAC);
486	return true;
487	}
488	}
489
490	// ILP fma reassociation patterns.
491	// Root must be a valid FMA like instruction.
492	AddOpIdx = -`1`;
493	if (!IsReassociableFMA (Root, AddOpIdx, MulOpIdx, false))
494	return false;
495
496	assert((AddOpIdx >= `0`) && "add operand index not right!");
497
498	Register RegB = Root.getOperand(i: AddOpIdx).getReg();
499	MachineInstr *Prev = MRI->getUniqueVRegDef(Reg: RegB);
500
501	// Prev must be a valid FMA like instruction.
502	AddOpIdx = -`1`;
503	if (!IsReassociableFMA (Prev, AddOpIdx, MulOpIdx, false*))
504	return false;
505
506	assert((AddOpIdx >= `0`) && "add operand index not right!");
507
508	Register RegA = Prev->getOperand(i: AddOpIdx).getReg();
509	MachineInstr *Leaf = MRI->getUniqueVRegDef(Reg: RegA);
510	AddOpIdx = -`1`;
511	if (IsReassociableFMA (Leaf, AddOpIdx, MulOpIdx, true*)) {
512	Patterns.push_back(Elt: PPCMachineCombinerPattern::REASSOC_XMM_AMM_BMM);
513	LLVM_DEBUG(dbgs() << "add pattern REASSOC_XMM_AMM_BMM\n");
514	return true;
515	}
516	if (IsReassociableAddOrSub (*Leaf, InfoArrayIdxFAddInst)) {
517	Patterns.push_back(Elt: PPCMachineCombinerPattern::REASSOC_XY_AMM_BMM);
518	LLVM_DEBUG(dbgs() << "add pattern REASSOC_XY_AMM_BMM\n");
519	return true;
520	}
521	return false;
522	}
523
524	void PPCInstrInfo::finalizeInsInstrs(
525	MachineInstr &Root, unsigned &Pattern,
526	SmallVectorImpl<MachineInstr > &InsInstrs) const* {
527	assert(!InsInstrs.empty() && "Instructions set to be inserted is empty!");
528
529	MachineFunction *MF = Root.getMF();
530	MachineRegisterInfo *MRI = &MF->getRegInfo();
531	const TargetRegisterInfo *TRI = &getRegisterInfo();
532	MachineConstantPool *MCP = MF->getConstantPool();
533
534	int16_t Idx = getFMAOpIdxInfo(Opcode: Root.getOpcode());
535	if (Idx < `0`)
536	return;
537
538	uint16_t FirstMulOpIdx = FMAOpIdxInfo[Idx][InfoArrayIdxMULOpIdx];
539
540	// For now we only need to fix up placeholder for register pressure reduce
541	// patterns.
542	Register ConstReg = `0`;
543	switch (Pattern) {
544	case PPCMachineCombinerPattern::REASSOC_XY_BCA:
545	ConstReg =
546	TRI->lookThruCopyLike(SrcReg: Root.getOperand(i: FirstMulOpIdx).getReg(), MRI);
547	break;
548	case PPCMachineCombinerPattern::REASSOC_XY_BAC:
549	ConstReg =
550	TRI->lookThruCopyLike(SrcReg: Root.getOperand(i: FirstMulOpIdx + `1`).getReg(), MRI);
551	break;
552	default:
553	// Not register pressure reduce patterns.
554	return;
555	}
556
557	MachineInstr *ConstDefInstr = MRI->getVRegDef(Reg: ConstReg);
558	// Get const value from const pool.
559	const Constant *C = getConstantFromConstantPool(I: ConstDefInstr);
560	assert(isa<llvm::ConstantFP>(C) && "not a valid constant!");
561
562	// Get negative fp const.
563	APFloat F1((dyn_cast<ConstantFP>(Val: C))->getValueAPF());
564	F1.changeSign();
565	Constant *NegC = ConstantFP::get(Context&: dyn_cast<ConstantFP>(Val: C)->getContext(), V: F1);
566	Align Alignment = MF->getDataLayout().getPrefTypeAlign(Ty: C->getType());
567
568	// Put negative fp const into constant pool.
569	unsigned ConstPoolIdx = MCP->getConstantPoolIndex(C: NegC, Alignment);
570
571	MachineOperand Placeholder = nullptr*;
572	// Record the placeholder PPC::ZERO8 we add in reassociateFMA.
573	for (auto *Inst : InsInstrs) {
574	for (MachineOperand &Operand : Inst->explicit_operands()) {
575	assert(Operand.isReg() && "Invalid instruction in InsInstrs!");
576	if (Operand.getReg() == PPC::ZERO8) {
577	Placeholder = &Operand;
578	break;
579	}
580	}
581	}
582
583	assert(Placeholder && "Placeholder does not exist!");
584
585	// Generate instructions to load the const fp from constant pool.
586	// We only support PPC64 and medium code model.
587	Register LoadNewConst =
588	generateLoadForNewConst(Idx: ConstPoolIdx, MI: &Root, Ty: C->getType(), InsInstrs);
589
590	// Fill the placeholder with the new load from constant pool.
591	Placeholder->setReg(LoadNewConst);
592	}
593
594	bool PPCInstrInfo::shouldReduceRegisterPressure(
595	const MachineBasicBlock MBB, const* RegisterClassInfo RegClassInfo) const* {
596
597	if (!EnableFMARegPressureReduction)
598	return false;
599
600	// Currently, we only enable register pressure reducing in machine combiner
601	// for: 1: PPC64; 2: Code Model is Medium; 3: Power9 which also has vector
602	// support.
603	//
604	// So we need following instructions to access a TOC entry:
605	//
606	// %6:g8rc_and_g8rc_nox0 = ADDIStocHA8 $x2, %const.0
607	// %7:vssrc = DFLOADf32 target-flags(ppc-toc-lo) %const.0,
608	// killed %6:g8rc_and_g8rc_nox0, implicit $x2 :: (load 4 from constant-pool)
609	//
610	// FIXME: add more supported targets, like Small and Large code model, PPC32,
611	// AIX.
612	if (!(Subtarget.isPPC64() && Subtarget.hasP9Vector() &&
613	Subtarget.getTargetMachine().getCodeModel() == CodeModel::Medium))
614	return false;
615
616	const TargetRegisterInfo *TRI = &getRegisterInfo();
617	const MachineFunction *MF = MBB->getParent();
618	const MachineRegisterInfo *MRI = &MF->getRegInfo();
619
620	auto GetMBBPressure =
621	[&](const MachineBasicBlock MBB) -> std::vector<unsigned*> {
622	RegionPressure Pressure;
623	RegPressureTracker RPTracker(Pressure);
624
625	// Initialize the register pressure tracker.
626	RPTracker.init(mf: MBB->getParent(), rci: RegClassInfo, lis: nullptr, mbb: MBB, pos: MBB->end(),
627	/TrackLaneMasks/ false, /TrackUntiedDefs=/true);
628
629	for (const auto &MI : reverse(C: *MBB)) {
630	if (MI.isDebugValue() \|\| MI.isDebugLabel())
631	continue;
632	RegisterOperands RegOpers;
633	RegOpers.collect(MI, TRI: TRI, MRI: MRI, TrackLaneMasks: false, IgnoreDead: false);
634	RPTracker.recedeSkipDebugValues();
635	assert(&*RPTracker.getPos() == &MI && "RPTracker sync error!");
636	RPTracker.recede(RegOpers);
637	}
638
639	// Close the RPTracker to finalize live ins.
640	RPTracker.closeRegion();
641
642	return RPTracker.getPressure().MaxSetPressure;
643	};
644
645	// For now we only care about float and double type fma.
646	unsigned VSSRCLimit =
647	RegClassInfo->getRegPressureSetLimit(Idx: PPC::RegisterPressureSets::VSSRC);
648
649	// Only reduce register pressure when pressure is high.
650	return GetMBBPressure (MBB)[PPC::RegisterPressureSets::VSSRC] >
651	(float)VSSRCLimit * FMARPFactor;
652	}
653
654	bool PPCInstrInfo::isLoadFromConstantPool(MachineInstr I) const* {
655	// I has only one memory operand which is load from constant pool.
656	if (!I->hasOneMemOperand())
657	return false;
658
659	MachineMemOperand *Op = I->memoperands()[`0`];
660	return Op->isLoad() && Op->getPseudoValue() &&
661	Op->getPseudoValue()->kind() == PseudoSourceValue::ConstantPool;
662	}
663
664	Register PPCInstrInfo::generateLoadForNewConst(
665	unsigned Idx, MachineInstr MI, Type Ty,
666	SmallVectorImpl<MachineInstr > &InsInstrs) const* {
667	// Now we only support PPC64, Medium code model and P9 with vector.
668	// We have immutable pattern to access const pool. See function
669	// shouldReduceRegisterPressure.
670	assert((Subtarget.isPPC64() && Subtarget.hasP9Vector() &&
671	Subtarget.getTargetMachine().getCodeModel() == CodeModel::Medium) &&
672	"Target not supported!\n");
673
674	MachineFunction *MF = MI->getMF();
675	MachineRegisterInfo *MRI = &MF->getRegInfo();
676
677	// Generate ADDIStocHA8
678	Register VReg1 = MRI->createVirtualRegister(RegClass: &PPC::G8RC_and_G8RC_NOX0RegClass);
679	MachineInstrBuilder TOCOffset =
680	BuildMI(MF&: *MF, MIMD: MI->getDebugLoc(), MCID: get(Opcode: PPC::ADDIStocHA8), DestReg: VReg1)
681	.addReg(RegNo: PPC::X2)
682	.addConstantPoolIndex(Idx);
683
684	assert((Ty->isFloatTy() \|\| Ty->isDoubleTy()) &&
685	"Only float and double are supported!");
686
687	unsigned LoadOpcode;
688	// Should be float type or double type.
689	if (Ty->isFloatTy())
690	LoadOpcode = PPC::DFLOADf32;
691	else
692	LoadOpcode = PPC::DFLOADf64;
693
694	const TargetRegisterClass *RC = MRI->getRegClass(Reg: MI->getOperand(i: `0`).getReg());
695	Register VReg2 = MRI->createVirtualRegister(RegClass: RC);
696	MachineMemOperand *MMO = MF->getMachineMemOperand(
697	PtrInfo: MachinePointerInfo::getConstantPool(MF&: *MF), F: MachineMemOperand::MOLoad,
698	Size: Ty->getScalarSizeInBits() / `8`, BaseAlignment: MF->getDataLayout().getPrefTypeAlign(Ty));
699
700	// Generate Load from constant pool.
701	MachineInstrBuilder Load =
702	BuildMI(MF&: *MF, MIMD: MI->getDebugLoc(), MCID: get(Opcode: LoadOpcode), DestReg: VReg2)
703	.addConstantPoolIndex(Idx)
704	.addReg(RegNo: VReg1, flags: getKillRegState(B: true))
705	.addMemOperand(MMO);
706
707	Load ->getOperand(i: `1`).setTargetFlags(PPCII::MO_TOC_LO);
708
709	// Insert the toc load instructions into InsInstrs.
710	InsInstrs.insert(I: InsInstrs.begin(), Elt: Load);
711	InsInstrs.insert(I: InsInstrs.begin(), Elt: TOCOffset);
712	return VReg2;
713	}
714
715	// This function returns the const value in constant pool if the \p I is a load
716	// from constant pool.
717	const Constant *
718	PPCInstrInfo::getConstantFromConstantPool(MachineInstr I) const* {
719	MachineFunction *MF = I->getMF();
720	MachineRegisterInfo *MRI = &MF->getRegInfo();
721	MachineConstantPool *MCP = MF->getConstantPool();
722	assert(I->mayLoad() && "Should be a load instruction.\n");
723	for (auto MO : I->uses()) {
724	if (!MO.isReg())
725	continue;
726	Register Reg = MO.getReg();
727	if (Reg == `0` \|\| !Reg.isVirtual())
728	continue;
729	// Find the toc address.
730	MachineInstr *DefMI = MRI->getVRegDef(Reg);
731	for (auto MO2 : DefMI->uses())
732	if (MO2.isCPI())
733	return (MCP->getConstants())[MO2.getIndex()].Val.ConstVal;
734	}
735	return nullptr;
736	}
737
738	CombinerObjective PPCInstrInfo::getCombinerObjective(unsigned Pattern) const {
739	switch (Pattern) {
740	case PPCMachineCombinerPattern::REASSOC_XY_AMM_BMM:
741	case PPCMachineCombinerPattern::REASSOC_XMM_AMM_BMM:
742	return CombinerObjective::MustReduceDepth;
743	case PPCMachineCombinerPattern::REASSOC_XY_BCA:
744	case PPCMachineCombinerPattern::REASSOC_XY_BAC:
745	return CombinerObjective::MustReduceRegisterPressure;
746	default:
747	return TargetInstrInfo::getCombinerObjective(Pattern);
748	}
749	}
750
751	bool PPCInstrInfo::getMachineCombinerPatterns(
752	MachineInstr &Root, SmallVectorImpl<unsigned> &Patterns,
753	bool DoRegPressureReduce) const {
754	// Using the machine combiner in this way is potentially expensive, so
755	// restrict to when aggressive optimizations are desired.
756	if (Subtarget.getTargetMachine().getOptLevel() != CodeGenOptLevel::Aggressive)
757	return false;
758
759	if (getFMAPatterns(Root, Patterns, DoRegPressureReduce))
760	return true;
761
762	return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns,
763	DoRegPressureReduce);
764	}
765
766	void PPCInstrInfo::genAlternativeCodeSequence(
767	MachineInstr &Root, unsigned Pattern,
768	SmallVectorImpl<MachineInstr *> &InsInstrs,
769	SmallVectorImpl<MachineInstr *> &DelInstrs,
770	DenseMap<Register, unsigned> &InstrIdxForVirtReg) const {
771	switch (Pattern) {
772	case PPCMachineCombinerPattern::REASSOC_XY_AMM_BMM:
773	case PPCMachineCombinerPattern::REASSOC_XMM_AMM_BMM:
774	case PPCMachineCombinerPattern::REASSOC_XY_BCA:
775	case PPCMachineCombinerPattern::REASSOC_XY_BAC:
776	reassociateFMA(Root, Pattern, InsInstrs, DelInstrs, InstrIdxForVirtReg);
777	break;
778	default:
779	// Reassociate default patterns.
780	TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
781	DelInstrs, InstIdxForVirtReg&: InstrIdxForVirtReg);
782	break;
783	}
784	}
785
786	void PPCInstrInfo::reassociateFMA(
787	MachineInstr &Root, unsigned Pattern,
788	SmallVectorImpl<MachineInstr *> &InsInstrs,
789	SmallVectorImpl<MachineInstr *> &DelInstrs,
790	DenseMap<Register, unsigned> &InstrIdxForVirtReg) const {
791	MachineFunction *MF = Root.getMF();
792	MachineRegisterInfo &MRI = MF->getRegInfo();
793	const TargetRegisterInfo *TRI = &getRegisterInfo();
794	MachineOperand &OpC = Root.getOperand(i: `0`);
795	Register RegC = OpC.getReg();
796	const TargetRegisterClass *RC = MRI.getRegClass(Reg: RegC);
797	MRI.constrainRegClass(Reg: RegC, RC);
798
799	unsigned FmaOp = Root.getOpcode();
800	int16_t Idx = getFMAOpIdxInfo(Opcode: FmaOp);
801	assert(Idx >= `0` && "Root must be a FMA instruction");
802
803	bool IsILPReassociate =
804	(Pattern == PPCMachineCombinerPattern::REASSOC_XY_AMM_BMM) \|\|
805	(Pattern == PPCMachineCombinerPattern::REASSOC_XMM_AMM_BMM);
806
807	uint16_t AddOpIdx = FMAOpIdxInfo[Idx][InfoArrayIdxAddOpIdx];
808	uint16_t FirstMulOpIdx = FMAOpIdxInfo[Idx][InfoArrayIdxMULOpIdx];
809
810	MachineInstr Prev = nullptr*;
811	MachineInstr Leaf = nullptr*;
812	switch (Pattern) {
813	default:
814	llvm_unreachable("not recognized pattern!");
815	case PPCMachineCombinerPattern::REASSOC_XY_AMM_BMM:
816	case PPCMachineCombinerPattern::REASSOC_XMM_AMM_BMM:
817	Prev = MRI.getUniqueVRegDef(Reg: Root.getOperand(i: AddOpIdx).getReg());
818	Leaf = MRI.getUniqueVRegDef(Reg: Prev->getOperand(i: AddOpIdx).getReg());
819	break;
820	case PPCMachineCombinerPattern::REASSOC_XY_BAC: {
821	Register MULReg =
822	TRI->lookThruCopyLike(SrcReg: Root.getOperand(i: FirstMulOpIdx).getReg(), MRI: &MRI);
823	Leaf = MRI.getVRegDef(Reg: MULReg);
824	break;
825	}
826	case PPCMachineCombinerPattern::REASSOC_XY_BCA: {
827	Register MULReg = TRI->lookThruCopyLike(
828	SrcReg: Root.getOperand(i: FirstMulOpIdx + `1`).getReg(), MRI: &MRI);
829	Leaf = MRI.getVRegDef(Reg: MULReg);
830	break;
831	}
832	}
833
834	uint32_t IntersectedFlags = `0`;
835	if (IsILPReassociate)
836	IntersectedFlags = Root.getFlags() & Prev->getFlags() & Leaf->getFlags();
837	else
838	IntersectedFlags = Root.getFlags() & Leaf->getFlags();
839
840	auto GetOperandInfo = [&](const MachineOperand &Operand, Register &Reg,
841	bool &KillFlag) {
842	Reg = Operand.getReg();
843	MRI.constrainRegClass(Reg, RC);
844	KillFlag = Operand.isKill();
845	};
846
847	auto GetFMAInstrInfo = [&](const MachineInstr &Instr, Register &MulOp1,
848	Register &MulOp2, Register &AddOp,
849	bool &MulOp1KillFlag, bool &MulOp2KillFlag,
850	bool &AddOpKillFlag) {
851	GetOperandInfo (Instr.getOperand(i: FirstMulOpIdx), MulOp1, MulOp1KillFlag);
852	GetOperandInfo (Instr.getOperand(i: FirstMulOpIdx + `1`), MulOp2, MulOp2KillFlag);
853	GetOperandInfo (Instr.getOperand(i: AddOpIdx), AddOp, AddOpKillFlag);
854	};
855
856	Register RegM11, RegM12, RegX, RegY, RegM21, RegM22, RegM31, RegM32, RegA11,
857	RegA21, RegB;
858	bool KillX = false, KillY = false, KillM11 = false, KillM12 = false,
859	KillM21 = false, KillM22 = false, KillM31 = false, KillM32 = false,
860	KillA11 = false, KillA21 = false, KillB = false;
861
862	GetFMAInstrInfo (Root, RegM31, RegM32, RegB, KillM31, KillM32, KillB);
863
864	if (IsILPReassociate)
865	GetFMAInstrInfo (*Prev, RegM21, RegM22, RegA21, KillM21, KillM22, KillA21);
866
867	if (Pattern == PPCMachineCombinerPattern::REASSOC_XMM_AMM_BMM) {
868	GetFMAInstrInfo (*Leaf, RegM11, RegM12, RegA11, KillM11, KillM12, KillA11);
869	GetOperandInfo (Leaf->getOperand(i: AddOpIdx), RegX, KillX);
870	} else if (Pattern == PPCMachineCombinerPattern::REASSOC_XY_AMM_BMM) {
871	GetOperandInfo (Leaf->getOperand(i: `1`), RegX, KillX);
872	GetOperandInfo (Leaf->getOperand(i: `2`), RegY, KillY);
873	} else {
874	// Get FSUB instruction info.
875	GetOperandInfo (Leaf->getOperand(i: `1`), RegX, KillX);
876	GetOperandInfo (Leaf->getOperand(i: `2`), RegY, KillY);
877	}
878
879	// Create new virtual registers for the new results instead of
880	// recycling legacy ones because the MachineCombiner's computation of the
881	// critical path requires a new register definition rather than an existing
882	// one.
883	// For register pressure reassociation, we only need create one virtual
884	// register for the new fma.
885	Register NewVRA = MRI.createVirtualRegister(RegClass: RC);
886	InstrIdxForVirtReg.insert(KV: std::make_pair(x&: NewVRA, y: `0`));
887
888	Register NewVRB = `0`;
889	if (IsILPReassociate) {
890	NewVRB = MRI.createVirtualRegister(RegClass: RC);
891	InstrIdxForVirtReg.insert(KV: std::make_pair(x&: NewVRB, y: `1`));
892	}
893
894	Register NewVRD = `0`;
895	if (Pattern == PPCMachineCombinerPattern::REASSOC_XMM_AMM_BMM) {
896	NewVRD = MRI.createVirtualRegister(RegClass: RC);
897	InstrIdxForVirtReg.insert(KV: std::make_pair(x&: NewVRD, y: `2`));
898	}
899
900	auto AdjustOperandOrder = [&](MachineInstr MI, Register RegAdd, bool* KillAdd,
901	Register RegMul1, bool KillRegMul1,
902	Register RegMul2, bool KillRegMul2) {
903	MI->getOperand(i: AddOpIdx).setReg(RegAdd);
904	MI->getOperand(i: AddOpIdx).setIsKill(KillAdd);
905	MI->getOperand(i: FirstMulOpIdx).setReg(RegMul1);
906	MI->getOperand(i: FirstMulOpIdx).setIsKill(KillRegMul1);
907	MI->getOperand(i: FirstMulOpIdx + `1`).setReg(RegMul2);
908	MI->getOperand(i: FirstMulOpIdx + `1`).setIsKill(KillRegMul2);
909	};
910
911	MachineInstrBuilder NewARegPressure, NewCRegPressure;
912	switch (Pattern) {
913	default:
914	llvm_unreachable("not recognized pattern!");
915	case PPCMachineCombinerPattern::REASSOC_XY_AMM_BMM: {
916	// Create new instructions for insertion.
917	MachineInstrBuilder MINewB =
918	BuildMI(MF&: *MF, MIMD: Prev->getDebugLoc(), MCID: get(Opcode: FmaOp), DestReg: NewVRB)
919	.addReg(RegNo: RegX, flags: getKillRegState(B: KillX))
920	.addReg(RegNo: RegM21, flags: getKillRegState(B: KillM21))
921	.addReg(RegNo: RegM22, flags: getKillRegState(B: KillM22));
922	MachineInstrBuilder MINewA =
923	BuildMI(MF&: *MF, MIMD: Root.getDebugLoc(), MCID: get(Opcode: FmaOp), DestReg: NewVRA)
924	.addReg(RegNo: RegY, flags: getKillRegState(B: KillY))
925	.addReg(RegNo: RegM31, flags: getKillRegState(B: KillM31))
926	.addReg(RegNo: RegM32, flags: getKillRegState(B: KillM32));
927	// If AddOpIdx is not 1, adjust the order.
928	if (AddOpIdx != `1`) {
929	AdjustOperandOrder (MINewB, RegX, KillX, RegM21, KillM21, RegM22, KillM22);
930	AdjustOperandOrder (MINewA, RegY, KillY, RegM31, KillM31, RegM32, KillM32);
931	}
932
933	MachineInstrBuilder MINewC =
934	BuildMI(MF&: *MF, MIMD: Root.getDebugLoc(),
935	MCID: get(Opcode: FMAOpIdxInfo[Idx][InfoArrayIdxFAddInst]), DestReg: RegC)
936	.addReg(RegNo: NewVRB, flags: getKillRegState(B: true))
937	.addReg(RegNo: NewVRA, flags: getKillRegState(B: true));
938
939	// Update flags for newly created instructions.
940	setSpecialOperandAttr(MI&: *MINewA, Flags: IntersectedFlags);
941	setSpecialOperandAttr(MI&: *MINewB, Flags: IntersectedFlags);
942	setSpecialOperandAttr(MI&: *MINewC, Flags: IntersectedFlags);
943
944	// Record new instructions for insertion.
945	InsInstrs.push_back(Elt: MINewA);
946	InsInstrs.push_back(Elt: MINewB);
947	InsInstrs.push_back(Elt: MINewC);
948	break;
949	}
950	case PPCMachineCombinerPattern::REASSOC_XMM_AMM_BMM: {
951	assert(NewVRD && "new FMA register not created!");
952	// Create new instructions for insertion.
953	MachineInstrBuilder MINewA =
954	BuildMI(MF&: *MF, MIMD: Leaf->getDebugLoc(),
955	MCID: get(Opcode: FMAOpIdxInfo[Idx][InfoArrayIdxFMULInst]), DestReg: NewVRA)
956	.addReg(RegNo: RegM11, flags: getKillRegState(B: KillM11))
957	.addReg(RegNo: RegM12, flags: getKillRegState(B: KillM12));
958	MachineInstrBuilder MINewB =
959	BuildMI(MF&: *MF, MIMD: Prev->getDebugLoc(), MCID: get(Opcode: FmaOp), DestReg: NewVRB)
960	.addReg(RegNo: RegX, flags: getKillRegState(B: KillX))
961	.addReg(RegNo: RegM21, flags: getKillRegState(B: KillM21))
962	.addReg(RegNo: RegM22, flags: getKillRegState(B: KillM22));
963	MachineInstrBuilder MINewD =
964	BuildMI(MF&: *MF, MIMD: Root.getDebugLoc(), MCID: get(Opcode: FmaOp), DestReg: NewVRD)
965	.addReg(RegNo: NewVRA, flags: getKillRegState(B: true))
966	.addReg(RegNo: RegM31, flags: getKillRegState(B: KillM31))
967	.addReg(RegNo: RegM32, flags: getKillRegState(B: KillM32));
968	// If AddOpIdx is not 1, adjust the order.
969	if (AddOpIdx != `1`) {
970	AdjustOperandOrder (MINewB, RegX, KillX, RegM21, KillM21, RegM22, KillM22);
971	AdjustOperandOrder (MINewD, NewVRA, true, RegM31, KillM31, RegM32,
972	KillM32);
973	}
974
975	MachineInstrBuilder MINewC =
976	BuildMI(MF&: *MF, MIMD: Root.getDebugLoc(),
977	MCID: get(Opcode: FMAOpIdxInfo[Idx][InfoArrayIdxFAddInst]), DestReg: RegC)
978	.addReg(RegNo: NewVRB, flags: getKillRegState(B: true))
979	.addReg(RegNo: NewVRD, flags: getKillRegState(B: true));
980
981	// Update flags for newly created instructions.
982	setSpecialOperandAttr(MI&: *MINewA, Flags: IntersectedFlags);
983	setSpecialOperandAttr(MI&: *MINewB, Flags: IntersectedFlags);
984	setSpecialOperandAttr(MI&: *MINewD, Flags: IntersectedFlags);
985	setSpecialOperandAttr(MI&: *MINewC, Flags: IntersectedFlags);
986
987	// Record new instructions for insertion.
988	InsInstrs.push_back(Elt: MINewA);
989	InsInstrs.push_back(Elt: MINewB);
990	InsInstrs.push_back(Elt: MINewD);
991	InsInstrs.push_back(Elt: MINewC);
992	break;
993	}
994	case PPCMachineCombinerPattern::REASSOC_XY_BAC:
995	case PPCMachineCombinerPattern::REASSOC_XY_BCA: {
996	Register VarReg;
997	bool KillVarReg = false;
998	if (Pattern == PPCMachineCombinerPattern::REASSOC_XY_BCA) {
999	VarReg = RegM31;
1000	KillVarReg = KillM31;
1001	} else {
1002	VarReg = RegM32;
1003	KillVarReg = KillM32;
1004	}
1005	// We don't want to get negative const from memory pool too early, as the
1006	// created entry will not be deleted even if it has no users. Since all
1007	// operand of Leaf and Root are virtual register, we use zero register
1008	// here as a placeholder. When the InsInstrs is selected in
1009	// MachineCombiner, we call finalizeInsInstrs to replace the zero register
1010	// with a virtual register which is a load from constant pool.
1011	NewARegPressure = BuildMI(MF&: *MF, MIMD: Root.getDebugLoc(), MCID: get(Opcode: FmaOp), DestReg: NewVRA)
1012	.addReg(RegNo: RegB, flags: getKillRegState(B: RegB))
1013	.addReg(RegNo: RegY, flags: getKillRegState(B: KillY))
1014	.addReg(RegNo: PPC::ZERO8);
1015	NewCRegPressure = BuildMI(MF&: *MF, MIMD: Root.getDebugLoc(), MCID: get(Opcode: FmaOp), DestReg: RegC)
1016	.addReg(RegNo: NewVRA, flags: getKillRegState(B: true))
1017	.addReg(RegNo: RegX, flags: getKillRegState(B: KillX))
1018	.addReg(RegNo: VarReg, flags: getKillRegState(B: KillVarReg));
1019	// For now, we only support xsmaddadp/xsmaddasp, their add operand are
1020	// both at index 1, no need to adjust.
1021	// FIXME: when add more fma instructions support, like fma/fmas, adjust
1022	// the operand index here.
1023	break;
1024	}
1025	}
1026
1027	if (!IsILPReassociate) {
1028	setSpecialOperandAttr(MI&: *NewARegPressure, Flags: IntersectedFlags);
1029	setSpecialOperandAttr(MI&: *NewCRegPressure, Flags: IntersectedFlags);
1030
1031	InsInstrs.push_back(Elt: NewARegPressure);
1032	InsInstrs.push_back(Elt: NewCRegPressure);
1033	}
1034
1035	assert(!InsInstrs.empty() &&
1036	"Insertion instructions set should not be empty!");
1037
1038	// Record old instructions for deletion.
1039	DelInstrs.push_back(Elt: Leaf);
1040	if (IsILPReassociate)
1041	DelInstrs.push_back(Elt: Prev);
1042	DelInstrs.push_back(Elt: &Root);
1043	}
1044
1045	// Detect 32 -> 64-bit extensions where we may reuse the low sub-register.
1046	bool PPCInstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
1047	Register &SrcReg, Register &DstReg,
1048	unsigned &SubIdx) const {
1049	switch (MI.getOpcode()) {
1050	default: return false;
1051	case PPC::EXTSW:
1052	case PPC::EXTSW_32:
1053	case PPC::EXTSW_32_64:
1054	SrcReg = MI.getOperand(i: `1`).getReg();
1055	DstReg = MI.getOperand(i: `0`).getReg();
1056	SubIdx = PPC::sub_32;
1057	return true;
1058	}
1059	}
1060
1061	Register PPCInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
1062	int &FrameIndex) const {
1063	if (llvm::is_contained(Range: getLoadOpcodesForSpillArray(), Element: MI.getOpcode())) {
1064	// Check for the operands added by addFrameReference (the immediate is the
1065	// offset which defaults to 0).
1066	if (MI.getOperand(i: `1`).isImm() && !MI.getOperand(i: `1`).getImm() &&
1067	MI.getOperand(i: `2`).isFI()) {
1068	FrameIndex = MI.getOperand(i: `2`).getIndex();
1069	return MI.getOperand(i: `0`).getReg();
1070	}
1071	}
1072	return `0`;
1073	}
1074
1075	// For opcodes with the ReMaterializable flag set, this function is called to
1076	// verify the instruction is really rematable.
1077	bool PPCInstrInfo::isReallyTriviallyReMaterializable(
1078	const MachineInstr &MI) const {
1079	switch (MI.getOpcode()) {
1080	default:
1081	// Let base implementaion decide.
1082	break;
1083	case PPC::LI:
1084	case PPC::LI8:
1085	case PPC::PLI:
1086	case PPC::PLI8:
1087	case PPC::LIS:
1088	case PPC::LIS8:
1089	case PPC::ADDIStocHA:
1090	case PPC::ADDIStocHA8:
1091	case PPC::ADDItocL:
1092	case PPC::ADDItocL8:
1093	case PPC::LOAD_STACK_GUARD:
1094	case PPC::PPCLdFixedAddr:
1095	case PPC::XXLXORz:
1096	case PPC::XXLXORspz:
1097	case PPC::XXLXORdpz:
1098	case PPC::XXLEQVOnes:
1099	case PPC::XXSPLTI32DX:
1100	case PPC::XXSPLTIW:
1101	case PPC::XXSPLTIDP:
1102	case PPC::V_SET0B:
1103	case PPC::V_SET0H:
1104	case PPC::V_SET0:
1105	case PPC::V_SETALLONESB:
1106	case PPC::V_SETALLONESH:
1107	case PPC::V_SETALLONES:
1108	case PPC::CRSET:
1109	case PPC::CRUNSET:
1110	case PPC::XXSETACCZ:
1111	case PPC::DMXXSETACCZ:
1112	return true;
1113	}
1114	return TargetInstrInfo::isReallyTriviallyReMaterializable(MI);
1115	}
1116
1117	Register PPCInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
1118	int &FrameIndex) const {
1119	if (llvm::is_contained(Range: getStoreOpcodesForSpillArray(), Element: MI.getOpcode())) {
1120	if (MI.getOperand(i: `1`).isImm() && !MI.getOperand(i: `1`).getImm() &&
1121	MI.getOperand(i: `2`).isFI()) {
1122	FrameIndex = MI.getOperand(i: `2`).getIndex();
1123	return MI.getOperand(i: `0`).getReg();
1124	}
1125	}
1126	return `0`;
1127	}
1128
1129	MachineInstr PPCInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool* NewMI,
1130	unsigned OpIdx1,
1131	unsigned OpIdx2) const {
1132	MachineFunction &MF = *MI.getParent()->getParent();
1133
1134	// Normal instructions can be commuted the obvious way.
1135	if (MI.getOpcode() != PPC::RLWIMI && MI.getOpcode() != PPC::RLWIMI_rec)
1136	return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
1137	// Note that RLWIMI can be commuted as a 32-bit instruction, but not as a
1138	// 64-bit instruction (so we don't handle PPC::RLWIMI8 here), because
1139	// changing the relative order of the mask operands might change what happens
1140	// to the high-bits of the mask (and, thus, the result).
1141
1142	// Cannot commute if it has a non-zero rotate count.
1143	if (MI.getOperand(i: `3`).getImm() != `0`)
1144	return nullptr;
1145
1146	// If we have a zero rotate count, we have:
1147	// M = mask(MB,ME)
1148	// Op0 = (Op1 & ~M) \| (Op2 & M)
1149	// Change this to:
1150	// M = mask((ME+1)&31, (MB-1)&31)
1151	// Op0 = (Op2 & ~M) \| (Op1 & M)
1152
1153	// Swap op1/op2
1154	assert(((OpIdx1 == `1` && OpIdx2 == `2`) \|\| (OpIdx1 == `2` && OpIdx2 == `1`)) &&
1155	"Only the operands 1 and 2 can be swapped in RLSIMI/RLWIMI_rec.");
1156	Register Reg0 = MI.getOperand(i: `0`).getReg();
1157	Register Reg1 = MI.getOperand(i: `1`).getReg();
1158	Register Reg2 = MI.getOperand(i: `2`).getReg();
1159	unsigned SubReg1 = MI.getOperand(i: `1`).getSubReg();
1160	unsigned SubReg2 = MI.getOperand(i: `2`).getSubReg();
1161	bool Reg1IsKill = MI.getOperand(i: `1`).isKill();
1162	bool Reg2IsKill = MI.getOperand(i: `2`).isKill();
1163	bool ChangeReg0 = false;
1164	// If machine instrs are no longer in two-address forms, update
1165	// destination register as well.
1166	if (Reg0 == Reg1) {
1167	// Must be two address instruction (i.e. op1 is tied to op0).
1168	assert(MI.getDesc().getOperandConstraint(`1`, MCOI::TIED_TO) == `0` &&
1169	"Expecting a two-address instruction!");
1170	assert(MI.getOperand(`0`).getSubReg() == SubReg1 && "Tied subreg mismatch");
1171	Reg2IsKill = false;
1172	ChangeReg0 = true;
1173	}
1174
1175	// Masks.
1176	unsigned MB = MI.getOperand(i: `4`).getImm();
1177	unsigned ME = MI.getOperand(i: `5`).getImm();
1178
1179	// We can't commute a trivial mask (there is no way to represent an all-zero
1180	// mask).
1181	if (MB == `0` && ME == `31`)
1182	return nullptr;
1183
1184	if (NewMI) {
1185	// Create a new instruction.
1186	Register Reg0 = ChangeReg0 ? Reg2 : MI.getOperand(i: `0`).getReg();
1187	bool Reg0IsDead = MI.getOperand(i: `0`).isDead();
1188	return BuildMI(MF, MIMD: MI.getDebugLoc(), MCID: MI.getDesc())
1189	.addReg(RegNo: Reg0, flags: RegState::Define \| getDeadRegState(B: Reg0IsDead))
1190	.addReg(RegNo: Reg2, flags: getKillRegState(B: Reg2IsKill))
1191	.addReg(RegNo: Reg1, flags: getKillRegState(B: Reg1IsKill))
1192	.addImm(Val: (ME + `1`) & `31`)
1193	.addImm(Val: (MB - `1`) & `31`);
1194	}
1195
1196	if (ChangeReg0) {
1197	MI.getOperand(i: `0`).setReg(Reg2);
1198	MI.getOperand(i: `0`).setSubReg(SubReg2);
1199	}
1200	MI.getOperand(i: `2`).setReg(Reg1);
1201	MI.getOperand(i: `1`).setReg(Reg2);
1202	MI.getOperand(i: `2`).setSubReg(SubReg1);
1203	MI.getOperand(i: `1`).setSubReg(SubReg2);
1204	MI.getOperand(i: `2`).setIsKill(Reg1IsKill);
1205	MI.getOperand(i: `1`).setIsKill(Reg2IsKill);
1206
1207	// Swap the mask around.
1208	MI.getOperand(i: `4`).setImm((ME + `1`) & `31`);
1209	MI.getOperand(i: `5`).setImm((MB - `1`) & `31`);
1210	return &MI;
1211	}
1212
1213	bool PPCInstrInfo::findCommutedOpIndices(const MachineInstr &MI,
1214	unsigned &SrcOpIdx1,
1215	unsigned &SrcOpIdx2) const {
1216	// For VSX A-Type FMA instructions, it is the first two operands that can be
1217	// commuted, however, because the non-encoded tied input operand is listed
1218	// first, the operands to swap are actually the second and third.
1219
1220	int AltOpc = PPC::getAltVSXFMAOpcode(Opcode: MI.getOpcode());
1221	if (AltOpc == -`1`)
1222	return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
1223
1224	// The commutable operand indices are 2 and 3. Return them in SrcOpIdx1
1225	// and SrcOpIdx2.
1226	return fixCommutedOpIndices(ResultIdx1&: SrcOpIdx1, ResultIdx2&: SrcOpIdx2, CommutableOpIdx1: `2`, CommutableOpIdx2: `3`);
1227	}
1228
1229	void PPCInstrInfo::insertNoop(MachineBasicBlock &MBB,
1230	MachineBasicBlock::iterator MI) const {
1231	// This function is used for scheduling, and the nop wanted here is the type
1232	// that terminates dispatch groups on the POWER cores.
1233	unsigned Directive = Subtarget.getCPUDirective();
1234	unsigned Opcode;
1235	switch (Directive) {
1236	default: Opcode = PPC::NOP; break;
1237	case PPC::DIR_PWR6: Opcode = PPC::NOP_GT_PWR6; break;
1238	case PPC::DIR_PWR7: Opcode = PPC::NOP_GT_PWR7; break;
1239	case PPC::DIR_PWR8: Opcode = PPC::NOP_GT_PWR7; break; / FIXME: Update when P8 InstrScheduling model is ready /
1240	// FIXME: Update when POWER9 scheduling model is ready.
1241	case PPC::DIR_PWR9: Opcode = PPC::NOP_GT_PWR7; break;
1242	}
1243
1244	DebugLoc DL;
1245	BuildMI(BB&: MBB, I: MI, MIMD: DL, MCID: get(Opcode));
1246	}
1247
1248	/// Return the noop instruction to use for a noop.
1249	MCInst PPCInstrInfo::getNop() const {
1250	MCInst Nop;
1251	Nop.setOpcode(PPC::NOP);
1252	return Nop;
1253	}
1254
1255	// Branch analysis.
1256	// Note: If the condition register is set to CTR or CTR8 then this is a
1257	// BDNZ (imm == 1) or BDZ (imm == 0) branch.
1258	bool PPCInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
1259	MachineBasicBlock *&TBB,
1260	MachineBasicBlock *&FBB,
1261	SmallVectorImpl<MachineOperand> &Cond,
1262	bool AllowModify) const {
1263	bool isPPC64 = Subtarget.isPPC64();
1264
1265	// If the block has no terminators, it just falls into the block after it.
1266	MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
1267	if (I == MBB.end())
1268	return false;
1269
1270	if (!isUnpredicatedTerminator(MI: *I))
1271	return false;
1272
1273	if (AllowModify) {
1274	// If the BB ends with an unconditional branch to the fallthrough BB,
1275	// we eliminate the branch instruction.
1276	if (I ->getOpcode() == PPC::B &&
1277	MBB.isLayoutSuccessor(MBB: I ->getOperand(i: `0`).getMBB())) {
1278	I ->eraseFromParent();
1279
1280	// We update iterator after deleting the last branch.
1281	I = MBB.getLastNonDebugInstr();
1282	if (I == MBB.end() \|\| !isUnpredicatedTerminator(MI: *I))
1283	return false;
1284	}
1285	}
1286
1287	// Get the last instruction in the block.
1288	MachineInstr &LastInst = *I;
1289
1290	// If there is only one terminator instruction, process it.
1291	if (I == MBB.begin() \|\| !isUnpredicatedTerminator(MI: *--I)) {
1292	if (LastInst.getOpcode() == PPC::B) {
1293	if (!LastInst.getOperand(i: `0`).isMBB())
1294	return true;
1295	TBB = LastInst.getOperand(i: `0`).getMBB();
1296	return false;
1297	} else if (LastInst.getOpcode() == PPC::BCC) {
1298	if (!LastInst.getOperand(i: `2`).isMBB())
1299	return true;
1300	// Block ends with fall-through condbranch.
1301	TBB = LastInst.getOperand(i: `2`).getMBB();
1302	Cond.push_back(Elt: LastInst.getOperand(i: `0`));
1303	Cond.push_back(Elt: LastInst.getOperand(i: `1`));
1304	return false;
1305	} else if (LastInst.getOpcode() == PPC::BC) {
1306	if (!LastInst.getOperand(i: `1`).isMBB())
1307	return true;
1308	// Block ends with fall-through condbranch.
1309	TBB = LastInst.getOperand(i: `1`).getMBB();
1310	Cond.push_back(Elt: MachineOperand::CreateImm(Val: PPC::PRED_BIT_SET));
1311	Cond.push_back(Elt: LastInst.getOperand(i: `0`));
1312	return false;
1313	} else if (LastInst.getOpcode() == PPC::BCn) {
1314	if (!LastInst.getOperand(i: `1`).isMBB())
1315	return true;
1316	// Block ends with fall-through condbranch.
1317	TBB = LastInst.getOperand(i: `1`).getMBB();
1318	Cond.push_back(Elt: MachineOperand::CreateImm(Val: PPC::PRED_BIT_UNSET));
1319	Cond.push_back(Elt: LastInst.getOperand(i: `0`));
1320	return false;
1321	} else if (LastInst.getOpcode() == PPC::BDNZ8 \|\|
1322	LastInst.getOpcode() == PPC::BDNZ) {
1323	if (!LastInst.getOperand(i: `0`).isMBB())
1324	return true;
1325	if (DisableCTRLoopAnal)
1326	return true;
1327	TBB = LastInst.getOperand(i: `0`).getMBB();
1328	Cond.push_back(Elt: MachineOperand::CreateImm(Val: `1`));
1329	Cond.push_back(Elt: MachineOperand::CreateReg(Reg: isPPC64 ? PPC::CTR8 : PPC::CTR,
1330	isDef: true));
1331	return false;
1332	} else if (LastInst.getOpcode() == PPC::BDZ8 \|\|
1333	LastInst.getOpcode() == PPC::BDZ) {
1334	if (!LastInst.getOperand(i: `0`).isMBB())
1335	return true;
1336	if (DisableCTRLoopAnal)
1337	return true;
1338	TBB = LastInst.getOperand(i: `0`).getMBB();
1339	Cond.push_back(Elt: MachineOperand::CreateImm(Val: `0`));
1340	Cond.push_back(Elt: MachineOperand::CreateReg(Reg: isPPC64 ? PPC::CTR8 : PPC::CTR,
1341	isDef: true));
1342	return false;
1343	}
1344
1345	// Otherwise, don't know what this is.
1346	return true;
1347	}
1348
1349	// Get the instruction before it if it's a terminator.
1350	MachineInstr &SecondLastInst = *I;
1351
1352	// If there are three terminators, we don't know what sort of block this is.
1353	if (I != MBB.begin() && isUnpredicatedTerminator(MI: *--I))
1354	return true;
1355
1356	// If the block ends with PPC::B and PPC:BCC, handle it.
1357	if (SecondLastInst.getOpcode() == PPC::BCC &&
1358	LastInst.getOpcode() == PPC::B) {
1359	if (!SecondLastInst.getOperand(i: `2`).isMBB() \|\|
1360	!LastInst.getOperand(i: `0`).isMBB())
1361	return true;
1362	TBB = SecondLastInst.getOperand(i: `2`).getMBB();
1363	Cond.push_back(Elt: SecondLastInst.getOperand(i: `0`));
1364	Cond.push_back(Elt: SecondLastInst.getOperand(i: `1`));
1365	FBB = LastInst.getOperand(i: `0`).getMBB();
1366	return false;
1367	} else if (SecondLastInst.getOpcode() == PPC::BC &&
1368	LastInst.getOpcode() == PPC::B) {
1369	if (!SecondLastInst.getOperand(i: `1`).isMBB() \|\|
1370	!LastInst.getOperand(i: `0`).isMBB())
1371	return true;
1372	TBB = SecondLastInst.getOperand(i: `1`).getMBB();
1373	Cond.push_back(Elt: MachineOperand::CreateImm(Val: PPC::PRED_BIT_SET));
1374	Cond.push_back(Elt: SecondLastInst.getOperand(i: `0`));
1375	FBB = LastInst.getOperand(i: `0`).getMBB();
1376	return false;
1377	} else if (SecondLastInst.getOpcode() == PPC::BCn &&
1378	LastInst.getOpcode() == PPC::B) {
1379	if (!SecondLastInst.getOperand(i: `1`).isMBB() \|\|
1380	!LastInst.getOperand(i: `0`).isMBB())
1381	return true;
1382	TBB = SecondLastInst.getOperand(i: `1`).getMBB();
1383	Cond.push_back(Elt: MachineOperand::CreateImm(Val: PPC::PRED_BIT_UNSET));
1384	Cond.push_back(Elt: SecondLastInst.getOperand(i: `0`));
1385	FBB = LastInst.getOperand(i: `0`).getMBB();
1386	return false;
1387	} else if ((SecondLastInst.getOpcode() == PPC::BDNZ8 \|\|
1388	SecondLastInst.getOpcode() == PPC::BDNZ) &&
1389	LastInst.getOpcode() == PPC::B) {
1390	if (!SecondLastInst.getOperand(i: `0`).isMBB() \|\|
1391	!LastInst.getOperand(i: `0`).isMBB())
1392	return true;
1393	if (DisableCTRLoopAnal)
1394	return true;
1395	TBB = SecondLastInst.getOperand(i: `0`).getMBB();
1396	Cond.push_back(Elt: MachineOperand::CreateImm(Val: `1`));
1397	Cond.push_back(Elt: MachineOperand::CreateReg(Reg: isPPC64 ? PPC::CTR8 : PPC::CTR,
1398	isDef: true));
1399	FBB = LastInst.getOperand(i: `0`).getMBB();
1400	return false;
1401	} else if ((SecondLastInst.getOpcode() == PPC::BDZ8 \|\|
1402	SecondLastInst.getOpcode() == PPC::BDZ) &&
1403	LastInst.getOpcode() == PPC::B) {
1404	if (!SecondLastInst.getOperand(i: `0`).isMBB() \|\|
1405	!LastInst.getOperand(i: `0`).isMBB())
1406	return true;
1407	if (DisableCTRLoopAnal)
1408	return true;
1409	TBB = SecondLastInst.getOperand(i: `0`).getMBB();
1410	Cond.push_back(Elt: MachineOperand::CreateImm(Val: `0`));
1411	Cond.push_back(Elt: MachineOperand::CreateReg(Reg: isPPC64 ? PPC::CTR8 : PPC::CTR,
1412	isDef: true));
1413	FBB = LastInst.getOperand(i: `0`).getMBB();
1414	return false;
1415	}
1416
1417	// If the block ends with two PPC:Bs, handle it. The second one is not
1418	// executed, so remove it.
1419	if (SecondLastInst.getOpcode() == PPC::B && LastInst.getOpcode() == PPC::B) {
1420	if (!SecondLastInst.getOperand(i: `0`).isMBB())
1421	return true;
1422	TBB = SecondLastInst.getOperand(i: `0`).getMBB();
1423	I = LastInst;
1424	if (AllowModify)
1425	I ->eraseFromParent();
1426	return false;
1427	}
1428
1429	// Otherwise, can't handle this.
1430	return true;
1431	}
1432
1433	unsigned PPCInstrInfo::removeBranch(MachineBasicBlock &MBB,
1434	int BytesRemoved) const* {
1435	assert(!BytesRemoved && "code size not handled");
1436
1437	MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
1438	if (I == MBB.end())
1439	return `0`;
1440
1441	if (I ->getOpcode() != PPC::B && I ->getOpcode() != PPC::BCC &&
1442	I ->getOpcode() != PPC::BC && I ->getOpcode() != PPC::BCn &&
1443	I ->getOpcode() != PPC::BDNZ8 && I ->getOpcode() != PPC::BDNZ &&
1444	I ->getOpcode() != PPC::BDZ8 && I ->getOpcode() != PPC::BDZ)
1445	return `0`;
1446
1447	// Remove the branch.
1448	I ->eraseFromParent();
1449
1450	I = MBB.end();
1451
1452	if (I == MBB.begin()) return `1`;
1453	--I;
1454	if (I ->getOpcode() != PPC::BCC &&
1455	I ->getOpcode() != PPC::BC && I ->getOpcode() != PPC::BCn &&
1456	I ->getOpcode() != PPC::BDNZ8 && I ->getOpcode() != PPC::BDNZ &&
1457	I ->getOpcode() != PPC::BDZ8 && I ->getOpcode() != PPC::BDZ)
1458	return `1`;
1459
1460	// Remove the branch.
1461	I ->eraseFromParent();
1462	return `2`;
1463	}
1464
1465	unsigned PPCInstrInfo::insertBranch(MachineBasicBlock &MBB,
1466	MachineBasicBlock *TBB,
1467	MachineBasicBlock *FBB,
1468	ArrayRef<MachineOperand> Cond,
1469	const DebugLoc &DL,
1470	int BytesAdded) const* {
1471	// Shouldn't be a fall through.
1472	assert(TBB && "insertBranch must not be told to insert a fallthrough");
1473	assert((Cond.size() == `2` \|\| Cond.size() == `0`) &&
1474	"PPC branch conditions have two components!");
1475	assert(!BytesAdded && "code size not handled");
1476
1477	bool isPPC64 = Subtarget.isPPC64();
1478
1479	// One-way branch.
1480	if (!FBB) {
1481	if (Cond.empty()) // Unconditional branch
1482	BuildMI(BB: &MBB, MIMD: DL, MCID: get(Opcode: PPC::B)).addMBB(MBB: TBB);
1483	else if (Cond [`1`].getReg() == PPC::CTR \|\| Cond [`1`].getReg() == PPC::CTR8)
1484	BuildMI(BB: &MBB, MIMD: DL, MCID: get(Opcode: Cond [`0`].getImm() ?
1485	(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
1486	(isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(MBB: TBB);
1487	else if (Cond [`0`].getImm() == PPC::PRED_BIT_SET)
1488	BuildMI(BB: &MBB, MIMD: DL, MCID: get(Opcode: PPC::BC)).add(MO: Cond [`1`]).addMBB(MBB: TBB);
1489	else if (Cond [`0`].getImm() == PPC::PRED_BIT_UNSET)
1490	BuildMI(BB: &MBB, MIMD: DL, MCID: get(Opcode: PPC::BCn)).add(MO: Cond [`1`]).addMBB(MBB: TBB);
1491	else // Conditional branch
1492	BuildMI(BB: &MBB, MIMD: DL, MCID: get(Opcode: PPC::BCC))
1493	.addImm(Val: Cond [`0`].getImm())
1494	.add(MO: Cond [`1`])
1495	.addMBB(MBB: TBB);
1496	return `1`;
1497	}
1498
1499	// Two-way Conditional Branch.
1500	if (Cond [`1`].getReg() == PPC::CTR \|\| Cond [`1`].getReg() == PPC::CTR8)
1501	BuildMI(BB: &MBB, MIMD: DL, MCID: get(Opcode: Cond [`0`].getImm() ?
1502	(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
1503	(isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(MBB: TBB);
1504	else if (Cond [`0`].getImm() == PPC::PRED_BIT_SET)
1505	BuildMI(BB: &MBB, MIMD: DL, MCID: get(Opcode: PPC::BC)).add(MO: Cond [`1`]).addMBB(MBB: TBB);
1506	else if (Cond [`0`].getImm() == PPC::PRED_BIT_UNSET)
1507	BuildMI(BB: &MBB, MIMD: DL, MCID: get(Opcode: PPC::BCn)).add(MO: Cond [`1`]).addMBB(MBB: TBB);
1508	else
1509	BuildMI(BB: &MBB, MIMD: DL, MCID: get(Opcode: PPC::BCC))
1510	.addImm(Val: Cond [`0`].getImm())
1511	.add(MO: Cond [`1`])
1512	.addMBB(MBB: TBB);
1513	BuildMI(BB: &MBB, MIMD: DL, MCID: get(Opcode: PPC::B)).addMBB(MBB: FBB);
1514	return `2`;
1515	}
1516
1517	// Select analysis.
1518	bool PPCInstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
1519	ArrayRef<MachineOperand> Cond,
1520	Register DstReg, Register TrueReg,
1521	Register FalseReg, int &CondCycles,
1522	int &TrueCycles, int &FalseCycles) const {
1523	if (!Subtarget.hasISEL())
1524	return false;
1525
1526	if (Cond.size() != `2`)
1527	return false;
1528
1529	// If this is really a bdnz-like condition, then it cannot be turned into a
1530	// select.
1531	if (Cond [`1`].getReg() == PPC::CTR \|\| Cond [`1`].getReg() == PPC::CTR8)
1532	return false;
1533
1534	// If the conditional branch uses a physical register, then it cannot be
1535	// turned into a select.
1536	if (Cond [`1`].getReg().isPhysical())
1537	return false;
1538
1539	// Check register classes.
1540	const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
1541	const TargetRegisterClass *RC =
1542	RI.getCommonSubClass(A: MRI.getRegClass(Reg: TrueReg), B: MRI.getRegClass(Reg: FalseReg));
1543	if (!RC)
1544	return false;
1545
1546	// isel is for regular integer GPRs only.
1547	if (!PPC::GPRCRegClass.hasSubClassEq(RC) &&
1548	!PPC::GPRC_NOR0RegClass.hasSubClassEq(RC) &&
1549	!PPC::G8RCRegClass.hasSubClassEq(RC) &&
1550	!PPC::G8RC_NOX0RegClass.hasSubClassEq(RC))
1551	return false;
1552
1553	// FIXME: These numbers are for the A2, how well they work for other cores is
1554	// an open question. On the A2, the isel instruction has a 2-cycle latency
1555	// but single-cycle throughput. These numbers are used in combination with
1556	// the MispredictPenalty setting from the active SchedMachineModel.
1557	CondCycles = `1`;
1558	TrueCycles = `1`;
1559	FalseCycles = `1`;
1560
1561	return true;
1562	}
1563
1564	void PPCInstrInfo::insertSelect(MachineBasicBlock &MBB,
1565	MachineBasicBlock::iterator MI,
1566	const DebugLoc &dl, Register DestReg,
1567	ArrayRef<MachineOperand> Cond, Register TrueReg,
1568	Register FalseReg) const {
1569	assert(Cond.size() == `2` &&
1570	"PPC branch conditions have two components!");
1571
1572	// Get the register classes.
1573	MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
1574	const TargetRegisterClass *RC =
1575	RI.getCommonSubClass(A: MRI.getRegClass(Reg: TrueReg), B: MRI.getRegClass(Reg: FalseReg));
1576	assert(RC && "TrueReg and FalseReg must have overlapping register classes");
1577
1578	bool Is64Bit = PPC::G8RCRegClass.hasSubClassEq(RC) \|\|
1579	PPC::G8RC_NOX0RegClass.hasSubClassEq(RC);
1580	assert((Is64Bit \|\|
1581	PPC::GPRCRegClass.hasSubClassEq(RC) \|\|
1582	PPC::GPRC_NOR0RegClass.hasSubClassEq(RC)) &&
1583	"isel is for regular integer GPRs only");
1584
1585	unsigned OpCode = Is64Bit ? PPC::ISEL8 : PPC::ISEL;
1586	auto SelectPred = static_cast<PPC::Predicate>(Cond [`0`].getImm());
1587
1588	unsigned SubIdx = `0`;
1589	bool SwapOps = false;
1590	switch (SelectPred) {
1591	case PPC::PRED_EQ:
1592	case PPC::PRED_EQ_MINUS:
1593	case PPC::PRED_EQ_PLUS:
1594	SubIdx = PPC::sub_eq; SwapOps = false; break;
1595	case PPC::PRED_NE:
1596	case PPC::PRED_NE_MINUS:
1597	case PPC::PRED_NE_PLUS:
1598	SubIdx = PPC::sub_eq; SwapOps = true; break;
1599	case PPC::PRED_LT:
1600	case PPC::PRED_LT_MINUS:
1601	case PPC::PRED_LT_PLUS:
1602	SubIdx = PPC::sub_lt; SwapOps = false; break;
1603	case PPC::PRED_GE:
1604	case PPC::PRED_GE_MINUS:
1605	case PPC::PRED_GE_PLUS:
1606	SubIdx = PPC::sub_lt; SwapOps = true; break;
1607	case PPC::PRED_GT:
1608	case PPC::PRED_GT_MINUS:
1609	case PPC::PRED_GT_PLUS:
1610	SubIdx = PPC::sub_gt; SwapOps = false; break;
1611	case PPC::PRED_LE:
1612	case PPC::PRED_LE_MINUS:
1613	case PPC::PRED_LE_PLUS:
1614	SubIdx = PPC::sub_gt; SwapOps = true; break;
1615	case PPC::PRED_UN:
1616	case PPC::PRED_UN_MINUS:
1617	case PPC::PRED_UN_PLUS:
1618	SubIdx = PPC::sub_un; SwapOps = false; break;
1619	case PPC::PRED_NU:
1620	case PPC::PRED_NU_MINUS:
1621	case PPC::PRED_NU_PLUS:
1622	SubIdx = PPC::sub_un; SwapOps = true; break;
1623	case PPC::PRED_BIT_SET: SubIdx = `0`; SwapOps = false; break;
1624	case PPC::PRED_BIT_UNSET: SubIdx = `0`; SwapOps = true; break;
1625	}
1626
1627	Register FirstReg = SwapOps ? FalseReg : TrueReg,
1628	SecondReg = SwapOps ? TrueReg : FalseReg;
1629
1630	// The first input register of isel cannot be r0. If it is a member
1631	// of a register class that can be r0, then copy it first (the
1632	// register allocator should eliminate the copy).
1633	if (MRI.getRegClass(Reg: FirstReg)->contains(Reg: PPC::R0) \|\|
1634	MRI.getRegClass(Reg: FirstReg)->contains(Reg: PPC::X0)) {
1635	const TargetRegisterClass *FirstRC =
1636	MRI.getRegClass(Reg: FirstReg)->contains(Reg: PPC::X0) ?
1637	&PPC::G8RC_NOX0RegClass : &PPC::GPRC_NOR0RegClass;
1638	Register OldFirstReg = FirstReg;
1639	FirstReg = MRI.createVirtualRegister(RegClass: FirstRC);
1640	BuildMI(BB&: MBB, I: MI, MIMD: dl, MCID: get(Opcode: TargetOpcode::COPY), DestReg: FirstReg)
1641	.addReg(RegNo: OldFirstReg);
1642	}
1643
1644	BuildMI(BB&: MBB, I: MI, MIMD: dl, MCID: get(Opcode: OpCode), DestReg)
1645	.addReg(RegNo: FirstReg).addReg(RegNo: SecondReg)
1646	.addReg(RegNo: Cond [`1`].getReg(), flags: `0`, SubReg: SubIdx);
1647	}
1648
1649	static unsigned getCRBitValue(unsigned CRBit) {
1650	unsigned Ret = `4`;
1651	if (CRBit == PPC::CR0LT \|\| CRBit == PPC::CR1LT \|\|
1652	CRBit == PPC::CR2LT \|\| CRBit == PPC::CR3LT \|\|
1653	CRBit == PPC::CR4LT \|\| CRBit == PPC::CR5LT \|\|
1654	CRBit == PPC::CR6LT \|\| CRBit == PPC::CR7LT)
1655	Ret = `3`;
1656	if (CRBit == PPC::CR0GT \|\| CRBit == PPC::CR1GT \|\|
1657	CRBit == PPC::CR2GT \|\| CRBit == PPC::CR3GT \|\|
1658	CRBit == PPC::CR4GT \|\| CRBit == PPC::CR5GT \|\|
1659	CRBit == PPC::CR6GT \|\| CRBit == PPC::CR7GT)
1660	Ret = `2`;
1661	if (CRBit == PPC::CR0EQ \|\| CRBit == PPC::CR1EQ \|\|
1662	CRBit == PPC::CR2EQ \|\| CRBit == PPC::CR3EQ \|\|
1663	CRBit == PPC::CR4EQ \|\| CRBit == PPC::CR5EQ \|\|
1664	CRBit == PPC::CR6EQ \|\| CRBit == PPC::CR7EQ)
1665	Ret = `1`;
1666	if (CRBit == PPC::CR0UN \|\| CRBit == PPC::CR1UN \|\|
1667	CRBit == PPC::CR2UN \|\| CRBit == PPC::CR3UN \|\|
1668	CRBit == PPC::CR4UN \|\| CRBit == PPC::CR5UN \|\|
1669	CRBit == PPC::CR6UN \|\| CRBit == PPC::CR7UN)
1670	Ret = `0`;
1671
1672	assert(Ret != `4` && "Invalid CR bit register");
1673	return Ret;
1674	}
1675
1676	void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
1677	MachineBasicBlock::iterator I,
1678	const DebugLoc &DL, Register DestReg,
1679	Register SrcReg, bool KillSrc,
1680	bool RenamableDest, bool RenamableSrc) const {
1681	// We can end up with self copies and similar things as a result of VSX copy
1682	// legalization. Promote them here.
1683	const TargetRegisterInfo *TRI = &getRegisterInfo();
1684	if (PPC::F8RCRegClass.contains(Reg: DestReg) &&
1685	PPC::VSRCRegClass.contains(Reg: SrcReg)) {
1686	MCRegister SuperReg =
1687	TRI->getMatchingSuperReg(Reg: DestReg, SubIdx: PPC::sub_64, RC: &PPC::VSRCRegClass);
1688
1689	if (VSXSelfCopyCrash && SrcReg == SuperReg)
1690	llvm_unreachable("nop VSX copy");
1691
1692	DestReg = SuperReg;
1693	} else if (PPC::F8RCRegClass.contains(Reg: SrcReg) &&
1694	PPC::VSRCRegClass.contains(Reg: DestReg)) {
1695	MCRegister SuperReg =
1696	TRI->getMatchingSuperReg(Reg: SrcReg, SubIdx: PPC::sub_64, RC: &PPC::VSRCRegClass);
1697
1698	if (VSXSelfCopyCrash && DestReg == SuperReg)
1699	llvm_unreachable("nop VSX copy");
1700
1701	SrcReg = SuperReg;
1702	}
1703
1704	// Different class register copy
1705	if (PPC::CRBITRCRegClass.contains(Reg: SrcReg) &&
1706	PPC::GPRCRegClass.contains(Reg: DestReg)) {
1707	MCRegister CRReg = getCRFromCRBit(SrcReg);
1708	BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: PPC::MFOCRF), DestReg).addReg(RegNo: CRReg);
1709	getKillRegState(B: KillSrc);
1710	// Rotate the CR bit in the CR fields to be the least significant bit and
1711	// then mask with 0x1 (MB = ME = 31).
1712	BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: PPC::RLWINM), DestReg)
1713	.addReg(RegNo: DestReg, flags: RegState::Kill)
1714	.addImm(Val: TRI->getEncodingValue(Reg: CRReg) * `4` + (`4` - getCRBitValue(CRBit: SrcReg)))
1715	.addImm(Val: `31`)
1716	.addImm(Val: `31`);
1717	return;
1718	} else if (PPC::CRRCRegClass.contains(Reg: SrcReg) &&
1719	(PPC::G8RCRegClass.contains(Reg: DestReg) \|\|
1720	PPC::GPRCRegClass.contains(Reg: DestReg))) {
1721	bool Is64Bit = PPC::G8RCRegClass.contains(Reg: DestReg);
1722	unsigned MvCode = Is64Bit ? PPC::MFOCRF8 : PPC::MFOCRF;
1723	unsigned ShCode = Is64Bit ? PPC::RLWINM8 : PPC::RLWINM;
1724	unsigned CRNum = TRI->getEncodingValue(Reg: SrcReg);
1725	BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: MvCode), DestReg).addReg(RegNo: SrcReg);
1726	getKillRegState(B: KillSrc);
1727	if (CRNum == `7`)
1728	return;
1729	// Shift the CR bits to make the CR field in the lowest 4 bits of GRC.
1730	BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: ShCode), DestReg)
1731	.addReg(RegNo: DestReg, flags: RegState::Kill)
1732	.addImm(Val: CRNum * `4` + `4`)
1733	.addImm(Val: `28`)
1734	.addImm(Val: `31`);
1735	return;
1736	} else if (PPC::G8RCRegClass.contains(Reg: SrcReg) &&
1737	PPC::VSFRCRegClass.contains(Reg: DestReg)) {
1738	assert(Subtarget.hasDirectMove() &&
1739	"Subtarget doesn't support directmove, don't know how to copy.");
1740	BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: PPC::MTVSRD), DestReg).addReg(RegNo: SrcReg);
1741	NumGPRtoVSRSpill ++;
1742	getKillRegState(B: KillSrc);
1743	return;
1744	} else if (PPC::VSFRCRegClass.contains(Reg: SrcReg) &&
1745	PPC::G8RCRegClass.contains(Reg: DestReg)) {
1746	assert(Subtarget.hasDirectMove() &&
1747	"Subtarget doesn't support directmove, don't know how to copy.");
1748	BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: PPC::MFVSRD), DestReg).addReg(RegNo: SrcReg);
1749	getKillRegState(B: KillSrc);
1750	return;
1751	} else if (PPC::SPERCRegClass.contains(Reg: SrcReg) &&
1752	PPC::GPRCRegClass.contains(Reg: DestReg)) {
1753	BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: PPC::EFSCFD), DestReg).addReg(RegNo: SrcReg);
1754	getKillRegState(B: KillSrc);
1755	return;
1756	} else if (PPC::GPRCRegClass.contains(Reg: SrcReg) &&
1757	PPC::SPERCRegClass.contains(Reg: DestReg)) {
1758	BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: PPC::EFDCFS), DestReg).addReg(RegNo: SrcReg);
1759	getKillRegState(B: KillSrc);
1760	return;
1761	} else if ((PPC::G8RCRegClass.contains(Reg: DestReg) \|\|
1762	PPC::GPRCRegClass.contains(Reg: DestReg)) &&
1763	SrcReg == PPC::CARRY) {
1764	bool Is64Bit = PPC::G8RCRegClass.contains(Reg: DestReg);
1765	BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: Is64Bit ? PPC::MFSPR8 : PPC::MFSPR), DestReg)
1766	.addImm(Val: `1`)
1767	.addReg(RegNo: PPC::CARRY, flags: RegState::Implicit);
1768	return;
1769	} else if ((PPC::G8RCRegClass.contains(Reg: SrcReg) \|\|
1770	PPC::GPRCRegClass.contains(Reg: SrcReg)) &&
1771	DestReg == PPC::CARRY) {
1772	bool Is64Bit = PPC::G8RCRegClass.contains(Reg: SrcReg);
1773	BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: Is64Bit ? PPC::MTSPR8 : PPC::MTSPR))
1774	.addImm(Val: `1`)
1775	.addReg(RegNo: SrcReg)
1776	.addReg(RegNo: PPC::CARRY, flags: RegState::ImplicitDefine);
1777	return;
1778	}
1779
1780	unsigned Opc;
1781	if (PPC::GPRCRegClass.contains(Reg1: DestReg, Reg2: SrcReg))
1782	Opc = PPC::OR;
1783	else if (PPC::G8RCRegClass.contains(Reg1: DestReg, Reg2: SrcReg))
1784	Opc = PPC::OR8;
1785	else if (PPC::F4RCRegClass.contains(Reg1: DestReg, Reg2: SrcReg))
1786	Opc = PPC::FMR;
1787	else if (PPC::CRRCRegClass.contains(Reg1: DestReg, Reg2: SrcReg))
1788	Opc = PPC::MCRF;
1789	else if (PPC::VRRCRegClass.contains(Reg1: DestReg, Reg2: SrcReg))
1790	Opc = PPC::VOR;
1791	else if (PPC::VSRCRegClass.contains(Reg1: DestReg, Reg2: SrcReg))
1792	// There are two different ways this can be done:
1793	// 1. xxlor : This has lower latency (on the P7), 2 cycles, but can only
1794	// issue in VSU pipeline 0.
1795	// 2. xmovdp/xmovsp: This has higher latency (on the P7), 6 cycles, but
1796	// can go to either pipeline.
1797	// We'll always use xxlor here, because in practically all cases where
1798	// copies are generated, they are close enough to some use that the
1799	// lower-latency form is preferable.
1800	Opc = PPC::XXLOR;
1801	else if (PPC::VSFRCRegClass.contains(Reg1: DestReg, Reg2: SrcReg) \|\|
1802	PPC::VSSRCRegClass.contains(Reg1: DestReg, Reg2: SrcReg))
1803	Opc = (Subtarget.hasP9Vector()) ? PPC::XSCPSGNDP : PPC::XXLORf;
1804	else if (Subtarget.pairedVectorMemops() &&
1805	PPC::VSRpRCRegClass.contains(Reg1: DestReg, Reg2: SrcReg)) {
1806	if (SrcReg > PPC::VSRp15)
1807	SrcReg = PPC::V0 + (SrcReg - PPC::VSRp16) * `2`;
1808	else
1809	SrcReg = PPC::VSL0 + (SrcReg - PPC::VSRp0) * `2`;
1810	if (DestReg > PPC::VSRp15)
1811	DestReg = PPC::V0 + (DestReg - PPC::VSRp16) * `2`;
1812	else
1813	DestReg = PPC::VSL0 + (DestReg - PPC::VSRp0) * `2`;
1814	BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: PPC::XXLOR), DestReg).
1815	addReg(RegNo: SrcReg).addReg(RegNo: SrcReg, flags: getKillRegState(B: KillSrc));
1816	BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: PPC::XXLOR), DestReg: DestReg + `1`).
1817	addReg(RegNo: SrcReg + `1`).addReg(RegNo: SrcReg + `1`, flags: getKillRegState(B: KillSrc));
1818	return;
1819	}
1820	else if (PPC::CRBITRCRegClass.contains(Reg1: DestReg, Reg2: SrcReg))
1821	Opc = PPC::CROR;
1822	else if (PPC::SPERCRegClass.contains(Reg1: DestReg, Reg2: SrcReg))
1823	Opc = PPC::EVOR;
1824	else if ((PPC::ACCRCRegClass.contains(Reg: DestReg) \|\|
1825	PPC::UACCRCRegClass.contains(Reg: DestReg)) &&
1826	(PPC::ACCRCRegClass.contains(Reg: SrcReg) \|\|
1827	PPC::UACCRCRegClass.contains(Reg: SrcReg))) {
1828	// If primed, de-prime the source register, copy the individual registers
1829	// and prime the destination if needed. The vector subregisters are
1830	// vs[(u)acc 4] - vs[(u)acc * 4 + 3]. If the copy is not a kill and the*
1831	// source is primed, we need to re-prime it after the copy as well.
1832	PPCRegisterInfo::emitAccCopyInfo(MBB, DestReg, SrcReg);
1833	bool DestPrimed = PPC::ACCRCRegClass.contains(Reg: DestReg);
1834	bool SrcPrimed = PPC::ACCRCRegClass.contains(Reg: SrcReg);
1835	MCRegister VSLSrcReg =
1836	PPC::VSL0 + (SrcReg - (SrcPrimed ? PPC::ACC0 : PPC::UACC0)) * `4`;
1837	MCRegister VSLDestReg =
1838	PPC::VSL0 + (DestReg - (DestPrimed ? PPC::ACC0 : PPC::UACC0)) * `4`;
1839	if (SrcPrimed)
1840	BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: PPC::XXMFACC), DestReg: SrcReg).addReg(RegNo: SrcReg);
1841	for (unsigned Idx = `0`; Idx < `4`; Idx++)
1842	BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: PPC::XXLOR), DestReg: VSLDestReg + Idx)
1843	.addReg(RegNo: VSLSrcReg + Idx)
1844	.addReg(RegNo: VSLSrcReg + Idx, flags: getKillRegState(B: KillSrc));
1845	if (DestPrimed)
1846	BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: PPC::XXMTACC), DestReg).addReg(RegNo: DestReg);
1847	if (SrcPrimed && !KillSrc)
1848	BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: PPC::XXMTACC), DestReg: SrcReg).addReg(RegNo: SrcReg);
1849	return;
1850	} else if (PPC::G8pRCRegClass.contains(Reg: DestReg) &&
1851	PPC::G8pRCRegClass.contains(Reg: SrcReg)) {
1852	// TODO: Handle G8RC to G8pRC (and vice versa) copy.
1853	unsigned DestRegIdx = DestReg - PPC::G8p0;
1854	MCRegister DestRegSub0 = PPC::X0 + `2` * DestRegIdx;
1855	MCRegister DestRegSub1 = PPC::X0 + `2` * DestRegIdx + `1`;
1856	unsigned SrcRegIdx = SrcReg - PPC::G8p0;
1857	MCRegister SrcRegSub0 = PPC::X0 + `2` * SrcRegIdx;
1858	MCRegister SrcRegSub1 = PPC::X0 + `2` * SrcRegIdx + `1`;
1859	BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: PPC::OR8), DestReg: DestRegSub0)
1860	.addReg(RegNo: SrcRegSub0)
1861	.addReg(RegNo: SrcRegSub0, flags: getKillRegState(B: KillSrc));
1862	BuildMI(BB&: MBB, I, MIMD: DL, MCID: get(Opcode: PPC::OR8), DestReg: DestRegSub1)
1863	.addReg(RegNo: SrcRegSub1)
1864	.addReg(RegNo: SrcRegSub1, flags: getKillRegState(B: KillSrc));
1865	return;
1866	} else
1867	llvm_unreachable("Impossible reg-to-reg copy");
1868
1869	const MCInstrDesc &MCID = get(Opcode: Opc);
1870	if (MCID.getNumOperands() == `3`)
1871	BuildMI(BB&: MBB, I, MIMD: DL, MCID, DestReg)
1872	.addReg(RegNo: SrcReg).addReg(RegNo: SrcReg, flags: getKillRegState(B: KillSrc));
1873	else
1874	BuildMI(BB&: MBB, I, MIMD: DL, MCID, DestReg).addReg(RegNo: SrcReg, flags: getKillRegState(B: KillSrc));
1875	}
1876
1877	unsigned PPCInstrInfo::getSpillIndex(const TargetRegisterClass RC) const* {
1878	int OpcodeIndex = `0`;
1879
1880	if (PPC::GPRCRegClass.hasSubClassEq(RC) \|\|
1881	PPC::GPRC_NOR0RegClass.hasSubClassEq(RC)) {
1882	OpcodeIndex = SOK_Int4Spill;
1883	} else if (PPC::G8RCRegClass.hasSubClassEq(RC) \|\|
1884	PPC::G8RC_NOX0RegClass.hasSubClassEq(RC)) {
1885	OpcodeIndex = SOK_Int8Spill;
1886	} else if (PPC::F8RCRegClass.hasSubClassEq(RC)) {
1887	OpcodeIndex = SOK_Float8Spill;
1888	} else if (PPC::F4RCRegClass.hasSubClassEq(RC)) {
1889	OpcodeIndex = SOK_Float4Spill;
1890	} else if (PPC::SPERCRegClass.hasSubClassEq(RC)) {
1891	OpcodeIndex = SOK_SPESpill;
1892	} else if (PPC::CRRCRegClass.hasSubClassEq(RC)) {
1893	OpcodeIndex = SOK_CRSpill;
1894	} else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) {
1895	OpcodeIndex = SOK_CRBitSpill;
1896	} else if (PPC::VRRCRegClass.hasSubClassEq(RC)) {
1897	OpcodeIndex = SOK_VRVectorSpill;
1898	} else if (PPC::VSRCRegClass.hasSubClassEq(RC)) {
1899	OpcodeIndex = SOK_VSXVectorSpill;
1900	} else if (PPC::VSFRCRegClass.hasSubClassEq(RC)) {
1901	OpcodeIndex = SOK_VectorFloat8Spill;
1902	} else if (PPC::VSSRCRegClass.hasSubClassEq(RC)) {
1903	OpcodeIndex = SOK_VectorFloat4Spill;
1904	} else if (PPC::SPILLTOVSRRCRegClass.hasSubClassEq(RC)) {
1905	OpcodeIndex = SOK_SpillToVSR;
1906	} else if (PPC::ACCRCRegClass.hasSubClassEq(RC)) {
1907	assert(Subtarget.pairedVectorMemops() &&
1908	"Register unexpected when paired memops are disabled.");
1909	OpcodeIndex = SOK_AccumulatorSpill;
1910	} else if (PPC::UACCRCRegClass.hasSubClassEq(RC)) {
1911	assert(Subtarget.pairedVectorMemops() &&
1912	"Register unexpected when paired memops are disabled.");
1913	OpcodeIndex = SOK_UAccumulatorSpill;
1914	} else if (PPC::WACCRCRegClass.hasSubClassEq(RC)) {
1915	assert(Subtarget.pairedVectorMemops() &&
1916	"Register unexpected when paired memops are disabled.");
1917	OpcodeIndex = SOK_WAccumulatorSpill;
1918	} else if (PPC::VSRpRCRegClass.hasSubClassEq(RC)) {
1919	assert(Subtarget.pairedVectorMemops() &&
1920	"Register unexpected when paired memops are disabled.");
1921	OpcodeIndex = SOK_PairedVecSpill;
1922	} else if (PPC::G8pRCRegClass.hasSubClassEq(RC)) {
1923	OpcodeIndex = SOK_PairedG8Spill;
1924	} else if (PPC::DMRROWRCRegClass.hasSubClassEq(RC)) {
1925	llvm_unreachable("TODO: Implement spill DMRROW regclass!");
1926	} else if (PPC::DMRROWpRCRegClass.hasSubClassEq(RC)) {
1927	llvm_unreachable("TODO: Implement spill DMRROWp regclass!");
1928	} else if (PPC::DMRpRCRegClass.hasSubClassEq(RC)) {
1929	OpcodeIndex = SOK_DMRpSpill;
1930	} else if (PPC::DMRRCRegClass.hasSubClassEq(RC)) {
1931	OpcodeIndex = SOK_DMRSpill;
1932	} else {
1933	llvm_unreachable("Unknown regclass!");
1934	}
1935	return OpcodeIndex;
1936	}
1937
1938	unsigned
1939	PPCInstrInfo::getStoreOpcodeForSpill(const TargetRegisterClass RC) const* {
1940	ArrayRef<unsigned> OpcodesForSpill = getStoreOpcodesForSpillArray();
1941	return OpcodesForSpill [getSpillIndex(RC)];
1942	}
1943
1944	unsigned
1945	PPCInstrInfo::getLoadOpcodeForSpill(const TargetRegisterClass RC) const* {
1946	ArrayRef<unsigned> OpcodesForSpill = getLoadOpcodesForSpillArray();
1947	return OpcodesForSpill [getSpillIndex(RC)];
1948	}
1949
1950	void PPCInstrInfo::StoreRegToStackSlot(
1951	MachineFunction &MF, unsigned SrcReg, bool isKill, int FrameIdx,
1952	const TargetRegisterClass *RC,
1953	SmallVectorImpl<MachineInstr > &NewMIs) const* {
1954	unsigned Opcode = getStoreOpcodeForSpill(RC);
1955	DebugLoc DL;
1956
1957	PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
1958	FuncInfo->setHasSpills();
1959
1960	NewMIs.push_back(Elt: addFrameReference(
1961	MIB: BuildMI(MF, MIMD: DL, MCID: get(Opcode)).addReg(RegNo: SrcReg, flags: getKillRegState(B: isKill)),
1962	FI: FrameIdx));
1963
1964	if (PPC::CRRCRegClass.hasSubClassEq(RC) \|\|
1965	PPC::CRBITRCRegClass.hasSubClassEq(RC))
1966	FuncInfo->setSpillsCR();
1967
1968	if (isXFormMemOp(Opcode))
1969	FuncInfo->setHasNonRISpills();
1970	}
1971
1972	void PPCInstrInfo::storeRegToStackSlotNoUpd(
1973	MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned SrcReg,
1974	bool isKill, int FrameIdx, const TargetRegisterClass *RC,
1975	const TargetRegisterInfo TRI) const* {
1976	MachineFunction &MF = *MBB.getParent();
1977	SmallVector<MachineInstr *, `4`> NewMIs;
1978
1979	StoreRegToStackSlot(MF, SrcReg, isKill, FrameIdx, RC, NewMIs);
1980
1981	for (MachineInstr *NewMI : NewMIs)
1982	MBB.insert(I: MI, MI: NewMI);
1983
1984	const MachineFrameInfo &MFI = MF.getFrameInfo();
1985	MachineMemOperand *MMO = MF.getMachineMemOperand(
1986	PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: FrameIdx),
1987	F: MachineMemOperand::MOStore, Size: MFI.getObjectSize(ObjectIdx: FrameIdx),
1988	BaseAlignment: MFI.getObjectAlign(ObjectIdx: FrameIdx));
1989	NewMIs.back()->addMemOperand(MF, MO: MMO);
1990	}
1991
1992	void PPCInstrInfo::storeRegToStackSlot(
1993	MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg,
1994	bool isKill, int FrameIdx, const TargetRegisterClass *RC,
1995	const TargetRegisterInfo *TRI, Register VReg,
1996	MachineInstr::MIFlag Flags) const {
1997	// We need to avoid a situation in which the value from a VRRC register is
1998	// spilled using an Altivec instruction and reloaded into a VSRC register
1999	// using a VSX instruction. The issue with this is that the VSX
2000	// load/store instructions swap the doublewords in the vector and the Altivec
2001	// ones don't. The register classes on the spill/reload may be different if
2002	// the register is defined using an Altivec instruction and is then used by a
2003	// VSX instruction.
2004	RC = updatedRC(RC);
2005	storeRegToStackSlotNoUpd(MBB, MI, SrcReg, isKill, FrameIdx, RC, TRI);
2006	}
2007
2008	void PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, const DebugLoc &DL,
2009	unsigned DestReg, int FrameIdx,
2010	const TargetRegisterClass *RC,
2011	SmallVectorImpl<MachineInstr *> &NewMIs)
2012	const {
2013	unsigned Opcode = getLoadOpcodeForSpill(RC);
2014	NewMIs.push_back(Elt: addFrameReference(MIB: BuildMI(MF, MIMD: DL, MCID: get(Opcode), DestReg),
2015	FI: FrameIdx));
2016	}
2017
2018	void PPCInstrInfo::loadRegFromStackSlotNoUpd(
2019	MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg,
2020	int FrameIdx, const TargetRegisterClass *RC,
2021	const TargetRegisterInfo TRI) const* {
2022	MachineFunction &MF = *MBB.getParent();
2023	SmallVector<MachineInstr*, `4`> NewMIs;
2024	DebugLoc DL;
2025	if (MI != MBB.end()) DL = MI ->getDebugLoc();
2026
2027	LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs);
2028
2029	for (MachineInstr *NewMI : NewMIs)
2030	MBB.insert(I: MI, MI: NewMI);
2031
2032	const MachineFrameInfo &MFI = MF.getFrameInfo();
2033	MachineMemOperand *MMO = MF.getMachineMemOperand(
2034	PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: FrameIdx),
2035	F: MachineMemOperand::MOLoad, Size: MFI.getObjectSize(ObjectIdx: FrameIdx),
2036	BaseAlignment: MFI.getObjectAlign(ObjectIdx: FrameIdx));
2037	NewMIs.back()->addMemOperand(MF, MO: MMO);
2038	}
2039
2040	void PPCInstrInfo::loadRegFromStackSlot(
2041	MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg,
2042	int FrameIdx, const TargetRegisterClass RC, const* TargetRegisterInfo *TRI,
2043	Register VReg, MachineInstr::MIFlag Flags) const {
2044	// We need to avoid a situation in which the value from a VRRC register is
2045	// spilled using an Altivec instruction and reloaded into a VSRC register
2046	// using a VSX instruction. The issue with this is that the VSX
2047	// load/store instructions swap the doublewords in the vector and the Altivec
2048	// ones don't. The register classes on the spill/reload may be different if
2049	// the register is defined using an Altivec instruction and is then used by a
2050	// VSX instruction.
2051	RC = updatedRC(RC);
2052
2053	loadRegFromStackSlotNoUpd(MBB, MI, DestReg, FrameIdx, RC, TRI);
2054	}
2055
2056	bool PPCInstrInfo::
2057	reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
2058	assert(Cond.size() == `2` && "Invalid PPC branch opcode!");
2059	if (Cond [`1`].getReg() == PPC::CTR8 \|\| Cond [`1`].getReg() == PPC::CTR)
2060	Cond [`0`].setImm(Cond [`0`].getImm() == `0` ? `1` : `0`);
2061	else
2062	// Leave the CR# the same, but invert the condition.
2063	Cond [`0`].setImm(PPC::InvertPredicate(Opcode: (PPC::Predicate)Cond [`0`].getImm()));
2064	return false;
2065	}
2066
2067	// For some instructions, it is legal to fold ZERO into the RA register field.
2068	// This function performs that fold by replacing the operand with PPC::ZERO,
2069	// it does not consider whether the load immediate zero is no longer in use.
2070	bool PPCInstrInfo::onlyFoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
2071	Register Reg) const {
2072	// A zero immediate should always be loaded with a single li.
2073	unsigned DefOpc = DefMI.getOpcode();
2074	if (DefOpc != PPC::LI && DefOpc != PPC::LI8)
2075	return false;
2076	if (!DefMI.getOperand(i: `1`).isImm())
2077	return false;
2078	if (DefMI.getOperand(i: `1`).getImm() != `0`)
2079	return false;
2080
2081	// Note that we cannot here invert the arguments of an isel in order to fold
2082	// a ZERO into what is presented as the second argument. All we have here
2083	// is the condition bit, and that might come from a CR-logical bit operation.
2084
2085	const MCInstrDesc &UseMCID = UseMI.getDesc();
2086
2087	// Only fold into real machine instructions.
2088	if (UseMCID.isPseudo())
2089	return false;
2090
2091	// We need to find which of the User's operands is to be folded, that will be
2092	// the operand that matches the given register ID.
2093	unsigned UseIdx;
2094	for (UseIdx = `0`; UseIdx < UseMI.getNumOperands(); ++UseIdx)
2095	if (UseMI.getOperand(i: UseIdx).isReg() &&
2096	UseMI.getOperand(i: UseIdx).getReg() == Reg)
2097	break;
2098
2099	assert(UseIdx < UseMI.getNumOperands() && "Cannot find Reg in UseMI");
2100	assert(UseIdx < UseMCID.getNumOperands() && "No operand description for Reg");
2101
2102	const MCOperandInfo *UseInfo = &UseMCID.operands()[UseIdx];
2103
2104	// We can fold the zero if this register requires a GPRC_NOR0/G8RC_NOX0
2105	// register (which might also be specified as a pointer class kind).
2106	if (UseInfo->isLookupPtrRegClass()) {
2107	if (UseInfo->RegClass / Kind / != `1`)
2108	return false;
2109	} else {
2110	if (UseInfo->RegClass != PPC::GPRC_NOR0RegClassID &&
2111	UseInfo->RegClass != PPC::G8RC_NOX0RegClassID)
2112	return false;
2113	}
2114
2115	// Make sure this is not tied to an output register (or otherwise
2116	// constrained). This is true for ST?UX registers, for example, which
2117	// are tied to their output registers.
2118	if (UseInfo->Constraints != `0`)
2119	return false;
2120
2121	MCRegister ZeroReg;
2122	if (UseInfo->isLookupPtrRegClass()) {
2123	bool isPPC64 = Subtarget.isPPC64();
2124	ZeroReg = isPPC64 ? PPC::ZERO8 : PPC::ZERO;
2125	} else {
2126	ZeroReg = UseInfo->RegClass == PPC::G8RC_NOX0RegClassID ?
2127	PPC::ZERO8 : PPC::ZERO;
2128	}
2129
2130	LLVM_DEBUG(dbgs() << "Folded immediate zero for: ");
2131	LLVM_DEBUG(UseMI.dump());
2132	UseMI.getOperand(i: UseIdx).setReg(ZeroReg);
2133	LLVM_DEBUG(dbgs() << "Into: ");
2134	LLVM_DEBUG(UseMI.dump());
2135	return true;
2136	}
2137
2138	// Folds zero into instructions which have a load immediate zero as an operand
2139	// but also recognize zero as immediate zero. If the definition of the load
2140	// has no more users it is deleted.
2141	bool PPCInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
2142	Register Reg, MachineRegisterInfo MRI) const* {
2143	bool Changed = onlyFoldImmediate(UseMI, DefMI, Reg);
2144	if (MRI->use_nodbg_empty(RegNo: Reg))
2145	DefMI.eraseFromParent();
2146	return Changed;
2147	}
2148
2149	static bool MBBDefinesCTR(MachineBasicBlock &MBB) {
2150	for (MachineInstr &MI : MBB)
2151	if (MI.definesRegister(Reg: PPC::CTR, /TRI=/nullptr) \|\|
2152	MI.definesRegister(Reg: PPC::CTR8, /TRI=/nullptr))
2153	return true;
2154	return false;
2155	}
2156
2157	// We should make sure that, if we're going to predicate both sides of a
2158	// condition (a diamond), that both sides don't define the counter register. We
2159	// can predicate counter-decrement-based branches, but while that predicates
2160	// the branching, it does not predicate the counter decrement. If we tried to
2161	// merge the triangle into one predicated block, we'd decrement the counter
2162	// twice.
2163	bool PPCInstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB,
2164	unsigned NumT, unsigned ExtraT,
2165	MachineBasicBlock &FMBB,
2166	unsigned NumF, unsigned ExtraF,
2167	BranchProbability Probability) const {
2168	return !(MBBDefinesCTR(MBB&: TMBB) && MBBDefinesCTR(MBB&: FMBB));
2169	}
2170
2171
2172	bool PPCInstrInfo::isPredicated(const MachineInstr &MI) const {
2173	// The predicated branches are identified by their type, not really by the
2174	// explicit presence of a predicate. Furthermore, some of them can be
2175	// predicated more than once. Because if conversion won't try to predicate
2176	// any instruction which already claims to be predicated (by returning true
2177	// here), always return false. In doing so, we let isPredicable() be the
2178	// final word on whether not the instruction can be (further) predicated.
2179
2180	return false;
2181	}
2182
2183	bool PPCInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
2184	const MachineBasicBlock *MBB,
2185	const MachineFunction &MF) const {
2186	switch (MI.getOpcode()) {
2187	default:
2188	break;
2189	// Set MFFS and MTFSF as scheduling boundary to avoid unexpected code motion
2190	// across them, since some FP operations may change content of FPSCR.
2191	// TODO: Model FPSCR in PPC instruction definitions and remove the workaround
2192	case PPC::MFFS:
2193	case PPC::MTFSF:
2194	case PPC::FENCE:
2195	return true;
2196	}
2197	return TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF);
2198	}
2199
2200	bool PPCInstrInfo::PredicateInstruction(MachineInstr &MI,
2201	ArrayRef<MachineOperand> Pred) const {
2202	unsigned OpC = MI.getOpcode();
2203	if (OpC == PPC::BLR \|\| OpC == PPC::BLR8) {
2204	if (Pred [`1`].getReg() == PPC::CTR8 \|\| Pred [`1`].getReg() == PPC::CTR) {
2205	bool isPPC64 = Subtarget.isPPC64();
2206	MI.setDesc(get(Opcode: Pred [`0`].getImm() ? (isPPC64 ? PPC::BDNZLR8 : PPC::BDNZLR)
2207	: (isPPC64 ? PPC::BDZLR8 : PPC::BDZLR)));
2208	// Need add Def and Use for CTR implicit operand.
2209	MachineInstrBuilder (*MI.getParent()->getParent(), MI)
2210	.addReg(RegNo: Pred [`1`].getReg(), flags: RegState::Implicit)
2211	.addReg(RegNo: Pred [`1`].getReg(), flags: RegState::ImplicitDefine);
2212	} else if (Pred [`0`].getImm() == PPC::PRED_BIT_SET) {
2213	MI.setDesc(get(Opcode: PPC::BCLR));
2214	MachineInstrBuilder (*MI.getParent()->getParent(), MI).add(MO: Pred [`1`]);
2215	} else if (Pred [`0`].getImm() == PPC::PRED_BIT_UNSET) {
2216	MI.setDesc(get(Opcode: PPC::BCLRn));
2217	MachineInstrBuilder (*MI.getParent()->getParent(), MI).add(MO: Pred [`1`]);
2218	} else {
2219	MI.setDesc(get(Opcode: PPC::BCCLR));
2220	MachineInstrBuilder (*MI.getParent()->getParent(), MI)
2221	.addImm(Val: Pred [`0`].getImm())
2222	.add(MO: Pred [`1`]);
2223	}
2224
2225	return true;
2226	} else if (OpC == PPC::B) {
2227	if (Pred [`1`].getReg() == PPC::CTR8 \|\| Pred [`1`].getReg() == PPC::CTR) {
2228	bool isPPC64 = Subtarget.isPPC64();
2229	MI.setDesc(get(Opcode: Pred [`0`].getImm() ? (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ)
2230	: (isPPC64 ? PPC::BDZ8 : PPC::BDZ)));
2231	// Need add Def and Use for CTR implicit operand.
2232	MachineInstrBuilder (*MI.getParent()->getParent(), MI)
2233	.addReg(RegNo: Pred [`1`].getReg(), flags: RegState::Implicit)
2234	.addReg(RegNo: Pred [`1`].getReg(), flags: RegState::ImplicitDefine);
2235	} else if (Pred [`0`].getImm() == PPC::PRED_BIT_SET) {
2236	MachineBasicBlock *MBB = MI.getOperand(i: `0`).getMBB();
2237	MI.removeOperand(OpNo: `0`);
2238
2239	MI.setDesc(get(Opcode: PPC::BC));
2240	MachineInstrBuilder (*MI.getParent()->getParent(), MI)
2241	.add(MO: Pred [`1`])
2242	.addMBB(MBB);
2243	} else if (Pred [`0`].getImm() == PPC::PRED_BIT_UNSET) {
2244	MachineBasicBlock *MBB = MI.getOperand(i: `0`).getMBB();
2245	MI.removeOperand(OpNo: `0`);
2246
2247	MI.setDesc(get(Opcode: PPC::BCn));
2248	MachineInstrBuilder (*MI.getParent()->getParent(), MI)
2249	.add(MO: Pred [`1`])
2250	.addMBB(MBB);
2251	} else {
2252	MachineBasicBlock *MBB = MI.getOperand(i: `0`).getMBB();
2253	MI.removeOperand(OpNo: `0`);
2254
2255	MI.setDesc(get(Opcode: PPC::BCC));
2256	MachineInstrBuilder (*MI.getParent()->getParent(), MI)
2257	.addImm(Val: Pred [`0`].getImm())
2258	.add(MO: Pred [`1`])
2259	.addMBB(MBB);
2260	}
2261
2262	return true;
2263	} else if (OpC == PPC::BCTR \|\| OpC == PPC::BCTR8 \|\| OpC == PPC::BCTRL \|\|
2264	OpC == PPC::BCTRL8 \|\| OpC == PPC::BCTRL_RM \|\|
2265	OpC == PPC::BCTRL8_RM) {
2266	if (Pred [`1`].getReg() == PPC::CTR8 \|\| Pred [`1`].getReg() == PPC::CTR)
2267	llvm_unreachable("Cannot predicate bctr[l] on the ctr register");
2268
2269	bool setLR = OpC == PPC::BCTRL \|\| OpC == PPC::BCTRL8 \|\|
2270	OpC == PPC::BCTRL_RM \|\| OpC == PPC::BCTRL8_RM;
2271	bool isPPC64 = Subtarget.isPPC64();
2272
2273	if (Pred [`0`].getImm() == PPC::PRED_BIT_SET) {
2274	MI.setDesc(get(Opcode: isPPC64 ? (setLR ? PPC::BCCTRL8 : PPC::BCCTR8)
2275	: (setLR ? PPC::BCCTRL : PPC::BCCTR)));
2276	MachineInstrBuilder (*MI.getParent()->getParent(), MI).add(MO: Pred [`1`]);
2277	} else if (Pred [`0`].getImm() == PPC::PRED_BIT_UNSET) {
2278	MI.setDesc(get(Opcode: isPPC64 ? (setLR ? PPC::BCCTRL8n : PPC::BCCTR8n)
2279	: (setLR ? PPC::BCCTRLn : PPC::BCCTRn)));
2280	MachineInstrBuilder (*MI.getParent()->getParent(), MI).add(MO: Pred [`1`]);
2281	} else {
2282	MI.setDesc(get(Opcode: isPPC64 ? (setLR ? PPC::BCCCTRL8 : PPC::BCCCTR8)
2283	: (setLR ? PPC::BCCCTRL : PPC::BCCCTR)));
2284	MachineInstrBuilder (*MI.getParent()->getParent(), MI)
2285	.addImm(Val: Pred [`0`].getImm())
2286	.add(MO: Pred [`1`]);
2287	}
2288
2289	// Need add Def and Use for LR implicit operand.
2290	if (setLR)
2291	MachineInstrBuilder (*MI.getParent()->getParent(), MI)
2292	.addReg(RegNo: isPPC64 ? PPC::LR8 : PPC::LR, flags: RegState::Implicit)
2293	.addReg(RegNo: isPPC64 ? PPC::LR8 : PPC::LR, flags: RegState::ImplicitDefine);
2294	if (OpC == PPC::BCTRL_RM \|\| OpC == PPC::BCTRL8_RM)
2295	MachineInstrBuilder (*MI.getParent()->getParent(), MI)
2296	.addReg(RegNo: PPC::RM, flags: RegState::ImplicitDefine);
2297
2298	return true;
2299	}
2300
2301	return false;
2302	}
2303
2304	bool PPCInstrInfo::SubsumesPredicate(ArrayRef<MachineOperand> Pred1,
2305	ArrayRef<MachineOperand> Pred2) const {
2306	assert(Pred1.size() == `2` && "Invalid PPC first predicate");
2307	assert(Pred2.size() == `2` && "Invalid PPC second predicate");
2308
2309	if (Pred1 [`1`].getReg() == PPC::CTR8 \|\| Pred1 [`1`].getReg() == PPC::CTR)
2310	return false;
2311	if (Pred2 [`1`].getReg() == PPC::CTR8 \|\| Pred2 [`1`].getReg() == PPC::CTR)
2312	return false;
2313
2314	// P1 can only subsume P2 if they test the same condition register.
2315	if (Pred1 [`1`].getReg() != Pred2 [`1`].getReg())
2316	return false;
2317
2318	PPC::Predicate P1 = (PPC::Predicate) Pred1 [`0`].getImm();
2319	PPC::Predicate P2 = (PPC::Predicate) Pred2 [`0`].getImm();
2320
2321	if (P1 == P2)
2322	return true;
2323
2324	// Does P1 subsume P2, e.g. GE subsumes GT.
2325	if (P1 == PPC::PRED_LE &&
2326	(P2 == PPC::PRED_LT \|\| P2 == PPC::PRED_EQ))
2327	return true;
2328	if (P1 == PPC::PRED_GE &&
2329	(P2 == PPC::PRED_GT \|\| P2 == PPC::PRED_EQ))
2330	return true;
2331
2332	return false;
2333	}
2334
2335	bool PPCInstrInfo::ClobbersPredicate(MachineInstr &MI,
2336	std::vector<MachineOperand> &Pred,
2337	bool SkipDead) const {
2338	// Note: At the present time, the contents of Pred from this function is
2339	// unused by IfConversion. This implementation follows ARM by pushing the
2340	// CR-defining operand. Because the 'DZ' and 'DNZ' count as types of
2341	// predicate, instructions defining CTR or CTR8 are also included as
2342	// predicate-defining instructions.
2343
2344	const TargetRegisterClass *RCs[] =
2345	{ &PPC::CRRCRegClass, &PPC::CRBITRCRegClass,
2346	&PPC::CTRRCRegClass, &PPC::CTRRC8RegClass };
2347
2348	bool Found = false;
2349	for (const MachineOperand &MO : MI.operands()) {
2350	for (unsigned c = `0`; c < std::size(RCs) && !Found; ++c) {
2351	const TargetRegisterClass *RC = RCs[c];
2352	if (MO.isReg()) {
2353	if (MO.isDef() && RC->contains(Reg: MO.getReg())) {
2354	Pred.push_back(x: MO);
2355	Found = true;
2356	}
2357	} else if (MO.isRegMask()) {
2358	for (MCPhysReg R : *RC)
2359	if (MO.clobbersPhysReg(PhysReg: R)) {
2360	Pred.push_back(x: MO);
2361	Found = true;
2362	}
2363	}
2364	}
2365	}
2366
2367	return Found;
2368	}
2369
2370	bool PPCInstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
2371	Register &SrcReg2, int64_t &Mask,
2372	int64_t &Value) const {
2373	unsigned Opc = MI.getOpcode();
2374
2375	switch (Opc) {
2376	default: return false;
2377	case PPC::CMPWI:
2378	case PPC::CMPLWI:
2379	case PPC::CMPDI:
2380	case PPC::CMPLDI:
2381	SrcReg = MI.getOperand(i: `1`).getReg();
2382	SrcReg2 = `0`;
2383	Value = MI.getOperand(i: `2`).getImm();
2384	Mask = `0xFFFF`;
2385	return true;
2386	case PPC::CMPW:
2387	case PPC::CMPLW:
2388	case PPC::CMPD:
2389	case PPC::CMPLD:
2390	case PPC::FCMPUS:
2391	case PPC::FCMPUD:
2392	SrcReg = MI.getOperand(i: `1`).getReg();
2393	SrcReg2 = MI.getOperand(i: `2`).getReg();
2394	Value = `0`;
2395	Mask = `0`;
2396	return true;
2397	}
2398	}
2399
2400	bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
2401	Register SrcReg2, int64_t Mask,
2402	int64_t Value,
2403	const MachineRegisterInfo MRI) const* {
2404	if (DisableCmpOpt)
2405	return false;
2406
2407	int OpC = CmpInstr.getOpcode();
2408	Register CRReg = CmpInstr.getOperand(i: `0`).getReg();
2409
2410	// FP record forms set CR1 based on the exception status bits, not a
2411	// comparison with zero.
2412	if (OpC == PPC::FCMPUS \|\| OpC == PPC::FCMPUD)
2413	return false;
2414
2415	const TargetRegisterInfo *TRI = &getRegisterInfo();
2416	// The record forms set the condition register based on a signed comparison
2417	// with zero (so says the ISA manual). This is not as straightforward as it
2418	// seems, however, because this is always a 64-bit comparison on PPC64, even
2419	// for instructions that are 32-bit in nature (like slw for example).
2420	// So, on PPC32, for unsigned comparisons, we can use the record forms only
2421	// for equality checks (as those don't depend on the sign). On PPC64,
2422	// we are restricted to equality for unsigned 64-bit comparisons and for
2423	// signed 32-bit comparisons the applicability is more restricted.
2424	bool isPPC64 = Subtarget.isPPC64();
2425	bool is32BitSignedCompare = OpC == PPC::CMPWI \|\| OpC == PPC::CMPW;
2426	bool is32BitUnsignedCompare = OpC == PPC::CMPLWI \|\| OpC == PPC::CMPLW;
2427	bool is64BitUnsignedCompare = OpC == PPC::CMPLDI \|\| OpC == PPC::CMPLD;
2428
2429	// Look through copies unless that gets us to a physical register.
2430	Register ActualSrc = TRI->lookThruCopyLike(SrcReg, MRI);
2431	if (ActualSrc.isVirtual())
2432	SrcReg = ActualSrc;
2433
2434	// Get the unique definition of SrcReg.
2435	MachineInstr *MI = MRI->getUniqueVRegDef(Reg: SrcReg);
2436	if (!MI) return false;
2437
2438	bool equalityOnly = false;
2439	bool noSub = false;
2440	if (isPPC64) {
2441	if (is32BitSignedCompare) {
2442	// We can perform this optimization only if SrcReg is sign-extending.
2443	if (isSignExtended(Reg: SrcReg, MRI))
2444	noSub = true;
2445	else
2446	return false;
2447	} else if (is32BitUnsignedCompare) {
2448	// We can perform this optimization, equality only, if SrcReg is
2449	// zero-extending.
2450	if (isZeroExtended(Reg: SrcReg, MRI)) {
2451	noSub = true;
2452	equalityOnly = true;
2453	} else
2454	return false;
2455	} else
2456	equalityOnly = is64BitUnsignedCompare;
2457	} else
2458	equalityOnly = is32BitUnsignedCompare;
2459
2460	if (equalityOnly) {
2461	// We need to check the uses of the condition register in order to reject
2462	// non-equality comparisons.
2463	for (MachineRegisterInfo::use_instr_iterator
2464	I = MRI->use_instr_begin(RegNo: CRReg), IE = MRI->use_instr_end();
2465	I != IE; ++I) {
2466	MachineInstr UseMI = &I;
2467	if (UseMI->getOpcode() == PPC::BCC) {
2468	PPC::Predicate Pred = (PPC::Predicate)UseMI->getOperand(i: `0`).getImm();
2469	unsigned PredCond = PPC::getPredicateCondition(Opcode: Pred);
2470	// We ignore hint bits when checking for non-equality comparisons.
2471	if (PredCond != PPC::PRED_EQ && PredCond != PPC::PRED_NE)
2472	return false;
2473	} else if (UseMI->getOpcode() == PPC::ISEL \|\|
2474	UseMI->getOpcode() == PPC::ISEL8) {
2475	unsigned SubIdx = UseMI->getOperand(i: `3`).getSubReg();
2476	if (SubIdx != PPC::sub_eq)
2477	return false;
2478	} else
2479	return false;
2480	}
2481	}
2482
2483	MachineBasicBlock::iterator I = CmpInstr;
2484
2485	// Scan forward to find the first use of the compare.
2486	for (MachineBasicBlock::iterator EL = CmpInstr.getParent()->end(); I != EL;
2487	++I) {
2488	bool FoundUse = false;
2489	for (MachineRegisterInfo::use_instr_iterator
2490	J = MRI->use_instr_begin(RegNo: CRReg), JE = MRI->use_instr_end();
2491	J != JE; ++J)
2492	if (&J == &I) {
2493	FoundUse = true;
2494	break;
2495	}
2496
2497	if (FoundUse)
2498	break;
2499	}
2500
2501	SmallVector<std::pair<MachineOperand*, PPC::Predicate>, `4`> PredsToUpdate;
2502	SmallVector<std::pair<MachineOperand, unsigned*>, `4`> SubRegsToUpdate;
2503
2504	// There are two possible candidates which can be changed to set CR[01].
2505	// One is MI, the other is a SUB instruction.
2506	// For CMPrr(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1).
2507	MachineInstr Sub = nullptr*;
2508	if (SrcReg2 != `0`)
2509	// MI is not a candidate for CMPrr.
2510	MI = nullptr;
2511	// FIXME: Conservatively refuse to convert an instruction which isn't in the
2512	// same BB as the comparison. This is to allow the check below to avoid calls
2513	// (and other explicit clobbers); instead we should really check for these
2514	// more explicitly (in at least a few predecessors).
2515	else if (MI->getParent() != CmpInstr.getParent())
2516	return false;
2517	else if (Value != `0`) {
2518	// The record-form instructions set CR bit based on signed comparison
2519	// against 0. We try to convert a compare against 1 or -1 into a compare
2520	// against 0 to exploit record-form instructions. For example, we change
2521	// the condition "greater than -1" into "greater than or equal to 0"
2522	// and "less than 1" into "less than or equal to 0".
2523
2524	// Since we optimize comparison based on a specific branch condition,
2525	// we don't optimize if condition code is used by more than once.
2526	if (equalityOnly \|\| !MRI->hasOneUse(RegNo: CRReg))
2527	return false;
2528
2529	MachineInstr UseMI = &MRI->use_instr_begin(RegNo: CRReg);
2530	if (UseMI->getOpcode() != PPC::BCC)
2531	return false;
2532
2533	PPC::Predicate Pred = (PPC::Predicate)UseMI->getOperand(i: `0`).getImm();
2534	unsigned PredCond = PPC::getPredicateCondition(Opcode: Pred);
2535	unsigned PredHint = PPC::getPredicateHint(Opcode: Pred);
2536	int16_t Immed = (int16_t)Value;
2537
2538	// When modifying the condition in the predicate, we propagate hint bits
2539	// from the original predicate to the new one.
2540	if (Immed == -`1` && PredCond == PPC::PRED_GT)
2541	// We convert "greater than -1" into "greater than or equal to 0",
2542	// since we are assuming signed comparison by !equalityOnly
2543	Pred = PPC::getPredicate(Condition: PPC::PRED_GE, Hint: PredHint);
2544	else if (Immed == -`1` && PredCond == PPC::PRED_LE)
2545	// We convert "less than or equal to -1" into "less than 0".
2546	Pred = PPC::getPredicate(Condition: PPC::PRED_LT, Hint: PredHint);
2547	else if (Immed == `1` && PredCond == PPC::PRED_LT)
2548	// We convert "less than 1" into "less than or equal to 0".
2549	Pred = PPC::getPredicate(Condition: PPC::PRED_LE, Hint: PredHint);
2550	else if (Immed == `1` && PredCond == PPC::PRED_GE)
2551	// We convert "greater than or equal to 1" into "greater than 0".
2552	Pred = PPC::getPredicate(Condition: PPC::PRED_GT, Hint: PredHint);
2553	else
2554	return false;
2555
2556	// Convert the comparison and its user to a compare against zero with the
2557	// appropriate predicate on the branch. Zero comparison might provide
2558	// optimization opportunities post-RA (see optimization in
2559	// PPCPreEmitPeephole.cpp).
2560	UseMI->getOperand(i: `0`).setImm(Pred);
2561	CmpInstr.getOperand(i: `2`).setImm(`0`);
2562	}
2563
2564	// Search for Sub.
2565	--I;
2566
2567	// Get ready to iterate backward from CmpInstr.
2568	MachineBasicBlock::iterator E = MI, B = CmpInstr.getParent()->begin();
2569
2570	for (; I != E && !noSub; --I) {
2571	const MachineInstr &Instr = *I;
2572	unsigned IOpC = Instr.getOpcode();
2573
2574	if (&*I != &CmpInstr && (Instr.modifiesRegister(Reg: PPC::CR0, TRI) \|\|
2575	Instr.readsRegister(Reg: PPC::CR0, TRI)))
2576	// This instruction modifies or uses the record condition register after
2577	// the one we want to change. While we could do this transformation, it
2578	// would likely not be profitable. This transformation removes one
2579	// instruction, and so even forcing RA to generate one move probably
2580	// makes it unprofitable.
2581	return false;
2582
2583	// Check whether CmpInstr can be made redundant by the current instruction.
2584	if ((OpC == PPC::CMPW \|\| OpC == PPC::CMPLW \|\|
2585	OpC == PPC::CMPD \|\| OpC == PPC::CMPLD) &&
2586	(IOpC == PPC::SUBF \|\| IOpC == PPC::SUBF8) &&
2587	((Instr.getOperand(i: `1`).getReg() == SrcReg &&
2588	Instr.getOperand(i: `2`).getReg() == SrcReg2) \|\|
2589	(Instr.getOperand(i: `1`).getReg() == SrcReg2 &&
2590	Instr.getOperand(i: `2`).getReg() == SrcReg))) {
2591	Sub = &*I;
2592	break;
2593	}
2594
2595	if (I == B)
2596	// The 'and' is below the comparison instruction.
2597	return false;
2598	}
2599
2600	// Return false if no candidates exist.
2601	if (!MI && !Sub)
2602	return false;
2603
2604	// The single candidate is called MI.
2605	if (!MI) MI = Sub;
2606
2607	int NewOpC = -`1`;
2608	int MIOpC = MI->getOpcode();
2609	if (MIOpC == PPC::ANDI_rec \|\| MIOpC == PPC::ANDI8_rec \|\|
2610	MIOpC == PPC::ANDIS_rec \|\| MIOpC == PPC::ANDIS8_rec)
2611	NewOpC = MIOpC;
2612	else {
2613	NewOpC = PPC::getRecordFormOpcode(Opcode: MIOpC);
2614	if (NewOpC == -`1` && PPC::getNonRecordFormOpcode(Opcode: MIOpC) != -`1`)
2615	NewOpC = MIOpC;
2616	}
2617
2618	// FIXME: On the non-embedded POWER architectures, only some of the record
2619	// forms are fast, and we should use only the fast ones.
2620
2621	// The defining instruction has a record form (or is already a record
2622	// form). It is possible, however, that we'll need to reverse the condition
2623	// code of the users.
2624	if (NewOpC == -`1`)
2625	return false;
2626
2627	// This transformation should not be performed if `nsw` is missing and is not
2628	// `equalityOnly` comparison. Since if there is overflow, sub_lt, sub_gt in
2629	// CRReg do not reflect correct order. If `equalityOnly` is true, sub_eq in
2630	// CRReg can reflect if compared values are equal, this optz is still valid.
2631	if (!equalityOnly && (NewOpC == PPC::SUBF_rec \|\| NewOpC == PPC::SUBF8_rec) &&
2632	Sub && !Sub->getFlag(Flag: MachineInstr::NoSWrap))
2633	return false;
2634
2635	// If we have SUB(r1, r2) and CMP(r2, r1), the condition code based on CMP
2636	// needs to be updated to be based on SUB. Push the condition code
2637	// operands to OperandsToUpdate. If it is safe to remove CmpInstr, the
2638	// condition code of these operands will be modified.
2639	// Here, Value == 0 means we haven't converted comparison against 1 or -1 to
2640	// comparison against 0, which may modify predicate.
2641	bool ShouldSwap = false;
2642	if (Sub && Value == `0`) {
2643	ShouldSwap = SrcReg2 != `0` && Sub->getOperand(i: `1`).getReg() == SrcReg2 &&
2644	Sub->getOperand(i: `2`).getReg() == SrcReg;
2645
2646	// The operands to subf are the opposite of sub, so only in the fixed-point
2647	// case, invert the order.
2648	ShouldSwap = !ShouldSwap;
2649	}
2650
2651	if (ShouldSwap)
2652	for (MachineRegisterInfo::use_instr_iterator
2653	I = MRI->use_instr_begin(RegNo: CRReg), IE = MRI->use_instr_end();
2654	I != IE; ++I) {
2655	MachineInstr UseMI = &I;
2656	if (UseMI->getOpcode() == PPC::BCC) {
2657	PPC::Predicate Pred = (PPC::Predicate) UseMI->getOperand(i: `0`).getImm();
2658	unsigned PredCond = PPC::getPredicateCondition(Opcode: Pred);
2659	assert((!equalityOnly \|\|
2660	PredCond == PPC::PRED_EQ \|\| PredCond == PPC::PRED_NE) &&
2661	"Invalid predicate for equality-only optimization");
2662	(void)PredCond; // To suppress warning in release build.
2663	PredsToUpdate.push_back(Elt: std::make_pair(x: &(UseMI->getOperand(i: `0`)),
2664	y: PPC::getSwappedPredicate(Opcode: Pred)));
2665	} else if (UseMI->getOpcode() == PPC::ISEL \|\|
2666	UseMI->getOpcode() == PPC::ISEL8) {
2667	unsigned NewSubReg = UseMI->getOperand(i: `3`).getSubReg();
2668	assert((!equalityOnly \|\| NewSubReg == PPC::sub_eq) &&
2669	"Invalid CR bit for equality-only optimization");
2670
2671	if (NewSubReg == PPC::sub_lt)
2672	NewSubReg = PPC::sub_gt;
2673	else if (NewSubReg == PPC::sub_gt)
2674	NewSubReg = PPC::sub_lt;
2675
2676	SubRegsToUpdate.push_back(Elt: std::make_pair(x: &(UseMI->getOperand(i: `3`)),
2677	y&: NewSubReg));
2678	} else // We need to abort on a user we don't understand.
2679	return false;
2680	}
2681	assert(!(Value != `0` && ShouldSwap) &&
2682	"Non-zero immediate support and ShouldSwap"
2683	"may conflict in updating predicate");
2684
2685	// Create a new virtual register to hold the value of the CR set by the
2686	// record-form instruction. If the instruction was not previously in
2687	// record form, then set the kill flag on the CR.
2688	CmpInstr.eraseFromParent();
2689
2690	MachineBasicBlock::iterator MII = MI;
2691	BuildMI(BB&: *MI->getParent(), I: std::next(x: MII), MIMD: MI->getDebugLoc(),
2692	MCID: get(Opcode: TargetOpcode::COPY), DestReg: CRReg)
2693	.addReg(RegNo: PPC::CR0, flags: MIOpC != NewOpC ? RegState::Kill : `0`);
2694
2695	// Even if CR0 register were dead before, it is alive now since the
2696	// instruction we just built uses it.
2697	MI->clearRegisterDeads(Reg: PPC::CR0);
2698
2699	if (MIOpC != NewOpC) {
2700	// We need to be careful here: we're replacing one instruction with
2701	// another, and we need to make sure that we get all of the right
2702	// implicit uses and defs. On the other hand, the caller may be holding
2703	// an iterator to this instruction, and so we can't delete it (this is
2704	// specifically the case if this is the instruction directly after the
2705	// compare).
2706
2707	// Rotates are expensive instructions. If we're emitting a record-form
2708	// rotate that can just be an andi/andis, we should just emit that.
2709	if (MIOpC == PPC::RLWINM \|\| MIOpC == PPC::RLWINM8) {
2710	Register GPRRes = MI->getOperand(i: `0`).getReg();
2711	int64_t SH = MI->getOperand(i: `2`).getImm();
2712	int64_t MB = MI->getOperand(i: `3`).getImm();
2713	int64_t ME = MI->getOperand(i: `4`).getImm();
2714	// We can only do this if both the start and end of the mask are in the
2715	// same halfword.
2716	bool MBInLoHWord = MB >= `16`;
2717	bool MEInLoHWord = ME >= `16`;
2718	uint64_t Mask = ~`0LLU`;
2719
2720	if (MB <= ME && MBInLoHWord == MEInLoHWord && SH == `0`) {
2721	Mask = ((`1LLU` << (`32` - MB)) - `1`) & ~((`1LLU` << (`31` - ME)) - `1`);
2722	// The mask value needs to shift right 16 if we're emitting andis.
2723	Mask >>= MBInLoHWord ? `0` : `16`;
2724	NewOpC = MIOpC == PPC::RLWINM
2725	? (MBInLoHWord ? PPC::ANDI_rec : PPC::ANDIS_rec)
2726	: (MBInLoHWord ? PPC::ANDI8_rec : PPC::ANDIS8_rec);
2727	} else if (MRI->use_empty(RegNo: GPRRes) && (ME == `31`) &&
2728	(ME - MB + `1` == SH) && (MB >= `16`)) {
2729	// If we are rotating by the exact number of bits as are in the mask
2730	// and the mask is in the least significant bits of the register,
2731	// that's just an andis. (as long as the GPR result has no uses).
2732	Mask = ((`1LLU` << `32`) - `1`) & ~((`1LLU` << (`32` - SH)) - `1`);
2733	Mask >>= `16`;
2734	NewOpC = MIOpC == PPC::RLWINM ? PPC::ANDIS_rec : PPC::ANDIS8_rec;
2735	}
2736	// If we've set the mask, we can transform.
2737	if (Mask != ~`0LLU`) {
2738	MI->removeOperand(OpNo: `4`);
2739	MI->removeOperand(OpNo: `3`);
2740	MI->getOperand(i: `2`).setImm(Mask);
2741	NumRcRotatesConvertedToRcAnd ++;
2742	}
2743	} else if (MIOpC == PPC::RLDICL && MI->getOperand(i: `2`).getImm() == `0`) {
2744	int64_t MB = MI->getOperand(i: `3`).getImm();
2745	if (MB >= `48`) {
2746	uint64_t Mask = (`1LLU` << (`63` - MB + `1`)) - `1`;
2747	NewOpC = PPC::ANDI8_rec;
2748	MI->removeOperand(OpNo: `3`);
2749	MI->getOperand(i: `2`).setImm(Mask);
2750	NumRcRotatesConvertedToRcAnd ++;
2751	}
2752	}
2753
2754	const MCInstrDesc &NewDesc = get(Opcode: NewOpC);
2755	MI->setDesc(NewDesc);
2756
2757	for (MCPhysReg ImpDef : NewDesc.implicit_defs()) {
2758	if (!MI->definesRegister(Reg: ImpDef, /TRI=/nullptr)) {
2759	MI->addOperand(MF&: *MI->getParent()->getParent(),
2760	Op: MachineOperand::CreateReg(Reg: ImpDef, isDef: true, isImp: true));
2761	}
2762	}
2763	for (MCPhysReg ImpUse : NewDesc.implicit_uses()) {
2764	if (!MI->readsRegister(Reg: ImpUse, /TRI=/nullptr)) {
2765	MI->addOperand(MF&: *MI->getParent()->getParent(),
2766	Op: MachineOperand::CreateReg(Reg: ImpUse, isDef: false, isImp: true));
2767	}
2768	}
2769	}
2770	assert(MI->definesRegister(PPC::CR0, /TRI=/nullptr) &&
2771	"Record-form instruction does not define cr0?");
2772
2773	// Modify the condition code of operands in OperandsToUpdate.
2774	// Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to
2775	// be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
2776	for (unsigned i = `0`, e = PredsToUpdate.size(); i < e; i++)
2777	PredsToUpdate [i].first->setImm(PredsToUpdate [i].second);
2778
2779	for (unsigned i = `0`, e = SubRegsToUpdate.size(); i < e; i++)
2780	SubRegsToUpdate [i].first->setSubReg(SubRegsToUpdate [i].second);
2781
2782	return true;
2783	}
2784
2785	bool PPCInstrInfo::optimizeCmpPostRA(MachineInstr &CmpMI) const {
2786	MachineRegisterInfo *MRI = &CmpMI.getParent()->getParent()->getRegInfo();
2787	if (MRI->isSSA())
2788	return false;
2789
2790	Register SrcReg, SrcReg2;
2791	int64_t CmpMask, CmpValue;
2792	if (!analyzeCompare(MI: CmpMI, SrcReg, SrcReg2, Mask&: CmpMask, Value&: CmpValue))
2793	return false;
2794
2795	// Try to optimize the comparison against 0.
2796	if (CmpValue \|\| !CmpMask \|\| SrcReg2)
2797	return false;
2798
2799	// The record forms set the condition register based on a signed comparison
2800	// with zero (see comments in optimizeCompareInstr). Since we can't do the
2801	// equality checks in post-RA, we are more restricted on a unsigned
2802	// comparison.
2803	unsigned Opc = CmpMI.getOpcode();
2804	if (Opc == PPC::CMPLWI \|\| Opc == PPC::CMPLDI)
2805	return false;
2806
2807	// The record forms are always based on a 64-bit comparison on PPC64
2808	// (similary, a 32-bit comparison on PPC32), while the CMPWI is a 32-bit
2809	// comparison. Since we can't do the equality checks in post-RA, we bail out
2810	// the case.
2811	if (Subtarget.isPPC64() && Opc == PPC::CMPWI)
2812	return false;
2813
2814	// CmpMI can't be deleted if it has implicit def.
2815	if (CmpMI.hasImplicitDef())
2816	return false;
2817
2818	bool SrcRegHasOtherUse = false;
2819	MachineInstr *SrcMI = getDefMIPostRA(Reg: SrcReg, MI&: CmpMI, SeenIntermediateUse&: SrcRegHasOtherUse);
2820	if (!SrcMI \|\| !SrcMI->definesRegister(Reg: SrcReg, /TRI=/nullptr))
2821	return false;
2822
2823	MachineOperand RegMO = CmpMI.getOperand(i: `0`);
2824	Register CRReg = RegMO.getReg();
2825	if (CRReg != PPC::CR0)
2826	return false;
2827
2828	// Make sure there is no def/use of CRReg between SrcMI and CmpMI.
2829	bool SeenUseOfCRReg = false;
2830	bool IsCRRegKilled = false;
2831	if (!isRegElgibleForForwarding(RegMO, DefMI: SrcMI, MI: CmpMI, KillDefMI: false*, IsFwdFeederRegKilled&: IsCRRegKilled,
2832	SeenIntermediateUse&: SeenUseOfCRReg) \|\|
2833	SrcMI->definesRegister(Reg: CRReg, /TRI=/nullptr) \|\| SeenUseOfCRReg)
2834	return false;
2835
2836	int SrcMIOpc = SrcMI->getOpcode();
2837	int NewOpC = PPC::getRecordFormOpcode(Opcode: SrcMIOpc);
2838	if (NewOpC == -`1`)
2839	return false;
2840
2841	LLVM_DEBUG(dbgs() << "Replace Instr: ");
2842	LLVM_DEBUG(SrcMI->dump());
2843
2844	const MCInstrDesc &NewDesc = get(Opcode: NewOpC);
2845	SrcMI->setDesc(NewDesc);
2846	MachineInstrBuilder (*SrcMI->getParent()->getParent(), SrcMI)
2847	.addReg(RegNo: CRReg, flags: RegState::ImplicitDefine);
2848	SrcMI->clearRegisterDeads(Reg: CRReg);
2849
2850	assert(SrcMI->definesRegister(PPC::CR0, /TRI=/nullptr) &&
2851	"Record-form instruction does not define cr0?");
2852
2853	LLVM_DEBUG(dbgs() << "with: ");
2854	LLVM_DEBUG(SrcMI->dump());
2855	LLVM_DEBUG(dbgs() << "Delete dead instruction: ");
2856	LLVM_DEBUG(CmpMI.dump());
2857	return true;
2858	}
2859
2860	bool PPCInstrInfo::getMemOperandsWithOffsetWidth(
2861	const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps,
2862	int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width,
2863	const TargetRegisterInfo TRI) const* {
2864	const MachineOperand *BaseOp;
2865	OffsetIsScalable = false;
2866	if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI))
2867	return false;
2868	BaseOps.push_back(Elt: BaseOp);
2869	return true;
2870	}
2871
2872	static bool isLdStSafeToCluster(const MachineInstr &LdSt,
2873	const TargetRegisterInfo *TRI) {
2874	// If this is a volatile load/store, don't mess with it.
2875	if (LdSt.hasOrderedMemoryRef() \|\| LdSt.getNumExplicitOperands() != `3`)
2876	return false;
2877
2878	if (LdSt.getOperand(i: `2`).isFI())
2879	return true;
2880
2881	assert(LdSt.getOperand(`2`).isReg() && "Expected a reg operand.");
2882	// Can't cluster if the instruction modifies the base register
2883	// or it is update form. e.g. ld r2,3(r2)
2884	if (LdSt.modifiesRegister(Reg: LdSt.getOperand(i: `2`).getReg(), TRI))
2885	return false;
2886
2887	return true;
2888	}
2889
2890	// Only cluster instruction pair that have the same opcode, and they are
2891	// clusterable according to PowerPC specification.
2892	static bool isClusterableLdStOpcPair(unsigned FirstOpc, unsigned SecondOpc,
2893	const PPCSubtarget &Subtarget) {
2894	switch (FirstOpc) {
2895	default:
2896	return false;
2897	case PPC::STD:
2898	case PPC::STFD:
2899	case PPC::STXSD:
2900	case PPC::DFSTOREf64:
2901	return FirstOpc == SecondOpc;
2902	// PowerPC backend has opcode STW/STW8 for instruction "stw" to deal with
2903	// 32bit and 64bit instruction selection. They are clusterable pair though
2904	// they are different opcode.
2905	case PPC::STW:
2906	case PPC::STW8:
2907	return SecondOpc == PPC::STW \|\| SecondOpc == PPC::STW8;
2908	}
2909	}
2910
2911	bool PPCInstrInfo::shouldClusterMemOps(
2912	ArrayRef<const MachineOperand *> BaseOps1, int64_t OpOffset1,
2913	bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2,
2914	int64_t OpOffset2, bool OffsetIsScalable2, unsigned ClusterSize,
2915	unsigned NumBytes) const {
2916
2917	assert(BaseOps1.size() == `1` && BaseOps2.size() == `1`);
2918	const MachineOperand &BaseOp1 = *BaseOps1.front();
2919	const MachineOperand &BaseOp2 = *BaseOps2.front();
2920	assert((BaseOp1.isReg() \|\| BaseOp1.isFI()) &&
2921	"Only base registers and frame indices are supported.");
2922
2923	// ClusterSize means the number of memory operations that will have been
2924	// clustered if this hook returns true.
2925	// Don't cluster memory op if there are already two ops clustered at least.
2926	if (ClusterSize > `2`)
2927	return false;
2928
2929	// Cluster the load/store only when they have the same base
2930	// register or FI.
2931	if ((BaseOp1.isReg() != BaseOp2.isReg()) \|\|
2932	(BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg()) \|\|
2933	(BaseOp1.isFI() && BaseOp1.getIndex() != BaseOp2.getIndex()))
2934	return false;
2935
2936	// Check if the load/store are clusterable according to the PowerPC
2937	// specification.
2938	const MachineInstr &FirstLdSt = *BaseOp1.getParent();
2939	const MachineInstr &SecondLdSt = *BaseOp2.getParent();
2940	unsigned FirstOpc = FirstLdSt.getOpcode();
2941	unsigned SecondOpc = SecondLdSt.getOpcode();
2942	const TargetRegisterInfo *TRI = &getRegisterInfo();
2943	// Cluster the load/store only when they have the same opcode, and they are
2944	// clusterable opcode according to PowerPC specification.
2945	if (!isClusterableLdStOpcPair(FirstOpc, SecondOpc, Subtarget))
2946	return false;
2947
2948	// Can't cluster load/store that have ordered or volatile memory reference.
2949	if (!isLdStSafeToCluster(LdSt: FirstLdSt, TRI) \|\|
2950	!isLdStSafeToCluster(LdSt: SecondLdSt, TRI))
2951	return false;
2952
2953	int64_t Offset1 = `0`, Offset2 = `0`;
2954	LocationSize Width1 = LocationSize::precise(Value: `0`),
2955	Width2 = LocationSize::precise(Value: `0`);
2956	const MachineOperand Base1 = nullptr, Base2 = nullptr;
2957	if (!getMemOperandWithOffsetWidth(LdSt: FirstLdSt, BaseOp&: Base1, Offset&: Offset1, Width&: Width1, TRI) \|\|
2958	!getMemOperandWithOffsetWidth(LdSt: SecondLdSt, BaseOp&: Base2, Offset&: Offset2, Width&: Width2, TRI) \|\|
2959	Width1 != Width2)
2960	return false;
2961
2962	assert(Base1 == &BaseOp1 && Base2 == &BaseOp2 &&
2963	"getMemOperandWithOffsetWidth return incorrect base op");
2964	// The caller should already have ordered FirstMemOp/SecondMemOp by offset.
2965	assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
2966	return Offset1 + (int64_t)Width1.getValue() == Offset2;
2967	}
2968
2969	/// GetInstSize - Return the number of bytes of code the specified
2970	/// instruction may be. This returns the maximum number of bytes.
2971	///
2972	unsigned PPCInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
2973	unsigned Opcode = MI.getOpcode();
2974
2975	if (Opcode == PPC::INLINEASM \|\| Opcode == PPC::INLINEASM_BR) {
2976	const MachineFunction *MF = MI.getParent()->getParent();
2977	const char *AsmStr = MI.getOperand(i: `0`).getSymbolName();
2978	return getInlineAsmLength(Str: AsmStr, MAI: *MF->getTarget().getMCAsmInfo());
2979	} else if (Opcode == TargetOpcode::STACKMAP) {
2980	StackMapOpers Opers(&MI);
2981	return Opers.getNumPatchBytes();
2982	} else if (Opcode == TargetOpcode::PATCHPOINT) {
2983	PatchPointOpers Opers(&MI);
2984	return Opers.getNumPatchBytes();
2985	} else {
2986	return get(Opcode).getSize();
2987	}
2988	}
2989
2990	std::pair<unsigned, unsigned>
2991	PPCInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
2992	// PPC always uses a direct mask.
2993	return std::make_pair(x&: TF, y: `0u`);
2994	}
2995
2996	ArrayRef<std::pair<unsigned, const char *>>
2997	PPCInstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
2998	using namespace PPCII;
2999	static const std::pair<unsigned, const char *> TargetFlags[] = {
3000	{MO_PLT, "ppc-plt"},
3001	{MO_PIC_FLAG, "ppc-pic"},
3002	{MO_PCREL_FLAG, "ppc-pcrel"},
3003	{MO_GOT_FLAG, "ppc-got"},
3004	{MO_PCREL_OPT_FLAG, "ppc-opt-pcrel"},
3005	{MO_TLSGD_FLAG, "ppc-tlsgd"},
3006	{MO_TPREL_FLAG, "ppc-tprel"},
3007	{MO_TLSLDM_FLAG, "ppc-tlsldm"},
3008	{MO_TLSLD_FLAG, "ppc-tlsld"},
3009	{MO_TLSGDM_FLAG, "ppc-tlsgdm"},
3010	{MO_GOT_TLSGD_PCREL_FLAG, "ppc-got-tlsgd-pcrel"},
3011	{MO_GOT_TLSLD_PCREL_FLAG, "ppc-got-tlsld-pcrel"},
3012	{MO_GOT_TPREL_PCREL_FLAG, "ppc-got-tprel-pcrel"},
3013	{MO_LO, "ppc-lo"},
3014	{MO_HA, "ppc-ha"},
3015	{MO_TPREL_LO, "ppc-tprel-lo"},
3016	{MO_TPREL_HA, "ppc-tprel-ha"},
3017	{MO_DTPREL_LO, "ppc-dtprel-lo"},
3018	{MO_TLSLD_LO, "ppc-tlsld-lo"},
3019	{MO_TOC_LO, "ppc-toc-lo"},
3020	{MO_TLS, "ppc-tls"},
3021	{MO_PIC_HA_FLAG, "ppc-ha-pic"},
3022	{MO_PIC_LO_FLAG, "ppc-lo-pic"},
3023	{MO_TPREL_PCREL_FLAG, "ppc-tprel-pcrel"},
3024	{MO_TLS_PCREL_FLAG, "ppc-tls-pcrel"},
3025	{MO_GOT_PCREL_FLAG, "ppc-got-pcrel"},
3026	};
3027	return ArrayRef(TargetFlags);
3028	}
3029
3030	// Expand VSX Memory Pseudo instruction to either a VSX or a FP instruction.
3031	// The VSX versions have the advantage of a full 64-register target whereas
3032	// the FP ones have the advantage of lower latency and higher throughput. So
3033	// what we are after is using the faster instructions in low register pressure
3034	// situations and using the larger register file in high register pressure
3035	// situations.
3036	bool PPCInstrInfo::expandVSXMemPseudo(MachineInstr &MI) const {
3037	unsigned UpperOpcode, LowerOpcode;
3038	switch (MI.getOpcode()) {
3039	case PPC::DFLOADf32:
3040	UpperOpcode = PPC::LXSSP;
3041	LowerOpcode = PPC::LFS;
3042	break;
3043	case PPC::DFLOADf64:
3044	UpperOpcode = PPC::LXSD;
3045	LowerOpcode = PPC::LFD;
3046	break;
3047	case PPC::DFSTOREf32:
3048	UpperOpcode = PPC::STXSSP;
3049	LowerOpcode = PPC::STFS;
3050	break;
3051	case PPC::DFSTOREf64:
3052	UpperOpcode = PPC::STXSD;
3053	LowerOpcode = PPC::STFD;
3054	break;
3055	case PPC::XFLOADf32:
3056	UpperOpcode = PPC::LXSSPX;
3057	LowerOpcode = PPC::LFSX;
3058	break;
3059	case PPC::XFLOADf64:
3060	UpperOpcode = PPC::LXSDX;
3061	LowerOpcode = PPC::LFDX;
3062	break;
3063	case PPC::XFSTOREf32:
3064	UpperOpcode = PPC::STXSSPX;
3065	LowerOpcode = PPC::STFSX;
3066	break;
3067	case PPC::XFSTOREf64:
3068	UpperOpcode = PPC::STXSDX;
3069	LowerOpcode = PPC::STFDX;
3070	break;
3071	case PPC::LIWAX:
3072	UpperOpcode = PPC::LXSIWAX;
3073	LowerOpcode = PPC::LFIWAX;
3074	break;
3075	case PPC::LIWZX:
3076	UpperOpcode = PPC::LXSIWZX;
3077	LowerOpcode = PPC::LFIWZX;
3078	break;
3079	case PPC::STIWX:
3080	UpperOpcode = PPC::STXSIWX;
3081	LowerOpcode = PPC::STFIWX;
3082	break;
3083	default:
3084	llvm_unreachable("Unknown Operation!");
3085	}
3086
3087	Register TargetReg = MI.getOperand(i: `0`).getReg();
3088	unsigned Opcode;
3089	if ((TargetReg >= PPC::F0 && TargetReg <= PPC::F31) \|\|
3090	(TargetReg >= PPC::VSL0 && TargetReg <= PPC::VSL31))
3091	Opcode = LowerOpcode;
3092	else
3093	Opcode = UpperOpcode;
3094	MI.setDesc(get(Opcode));
3095	return true;
3096	}
3097
3098	static bool isAnImmediateOperand(const MachineOperand &MO) {
3099	return MO.isCPI() \|\| MO.isGlobal() \|\| MO.isImm();
3100	}
3101
3102	bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
3103	auto &MBB = *MI.getParent();
3104	auto DL = MI.getDebugLoc();
3105
3106	switch (MI.getOpcode()) {
3107	case PPC::BUILD_UACC: {
3108	MCRegister ACC = MI.getOperand(i: `0`).getReg();
3109	MCRegister UACC = MI.getOperand(i: `1`).getReg();
3110	if (ACC - PPC::ACC0 != UACC - PPC::UACC0) {
3111	MCRegister SrcVSR = PPC::VSL0 + (UACC - PPC::UACC0) * `4`;
3112	MCRegister DstVSR = PPC::VSL0 + (ACC - PPC::ACC0) * `4`;
3113	// FIXME: This can easily be improved to look up to the top of the MBB
3114	// to see if the inputs are XXLOR's. If they are and SrcReg is killed,
3115	// we can just re-target any such XXLOR's to DstVSR + offset.
3116	for (int VecNo = `0`; VecNo < `4`; VecNo++)
3117	BuildMI(BB&: MBB, I&: MI, MIMD: DL, MCID: get(Opcode: PPC::XXLOR), DestReg: DstVSR + VecNo)
3118	.addReg(RegNo: SrcVSR + VecNo)
3119	.addReg(RegNo: SrcVSR + VecNo);
3120	}
3121	// BUILD_UACC is expanded to 4 copies of the underlying vsx registers.
3122	// So after building the 4 copies, we can replace the BUILD_UACC instruction
3123	// with a NOP.
3124	[[fallthrough]];
3125	}
3126	case PPC::KILL_PAIR: {
3127	MI.setDesc(get(Opcode: PPC::UNENCODED_NOP));
3128	MI.removeOperand(OpNo: `1`);
3129	MI.removeOperand(OpNo: `0`);
3130	return true;
3131	}
3132	case TargetOpcode::LOAD_STACK_GUARD: {
3133	auto M = MBB.getParent()->getFunction().getParent();
3134	assert(
3135	(Subtarget.isTargetLinux() \|\| M->getStackProtectorGuard() == "tls") &&
3136	"Only Linux target or tls mode are expected to contain "
3137	"LOAD_STACK_GUARD");
3138	int64_t Offset;
3139	if (M->getStackProtectorGuard() == "tls")
3140	Offset = M->getStackProtectorGuardOffset();
3141	else
3142	Offset = Subtarget.isPPC64() ? -`0x7010` : -`0x7008`;
3143	const unsigned Reg = Subtarget.isPPC64() ? PPC::X13 : PPC::R2;
3144	MI.setDesc(get(Opcode: Subtarget.isPPC64() ? PPC::LD : PPC::LWZ));
3145	MachineInstrBuilder (*MI.getParent()->getParent(), MI)
3146	.addImm(Val: Offset)
3147	.addReg(RegNo: Reg);
3148	return true;
3149	}
3150	case PPC::PPCLdFixedAddr: {
3151	assert(Subtarget.getTargetTriple().isOSGlibc() &&
3152	"Only targets with Glibc expected to contain PPCLdFixedAddr");
3153	int64_t Offset = `0`;
3154	const unsigned Reg = Subtarget.isPPC64() ? PPC::X13 : PPC::R2;
3155	MI.setDesc(get(Opcode: PPC::LWZ));
3156	uint64_t FAType = MI.getOperand(i: `1`).getImm();
3157	#undef PPC_LNX_FEATURE
3158	#undef PPC_CPU
3159	#define PPC_LNX_DEFINE_OFFSETS
3160	#include "llvm/TargetParser/PPCTargetParser.def"
3161	bool IsLE = Subtarget.isLittleEndian();
3162	bool Is64 = Subtarget.isPPC64();
3163	if (FAType == PPC_FAWORD_HWCAP) {
3164	if (IsLE)
3165	Offset = Is64 ? PPC_HWCAP_OFFSET_LE64 : PPC_HWCAP_OFFSET_LE32;
3166	else
3167	Offset = Is64 ? PPC_HWCAP_OFFSET_BE64 : PPC_HWCAP_OFFSET_BE32;
3168	} else if (FAType == PPC_FAWORD_HWCAP2) {
3169	if (IsLE)
3170	Offset = Is64 ? PPC_HWCAP2_OFFSET_LE64 : PPC_HWCAP2_OFFSET_LE32;
3171	else
3172	Offset = Is64 ? PPC_HWCAP2_OFFSET_BE64 : PPC_HWCAP2_OFFSET_BE32;
3173	} else if (FAType == PPC_FAWORD_CPUID) {
3174	if (IsLE)
3175	Offset = Is64 ? PPC_CPUID_OFFSET_LE64 : PPC_CPUID_OFFSET_LE32;
3176	else
3177	Offset = Is64 ? PPC_CPUID_OFFSET_BE64 : PPC_CPUID_OFFSET_BE32;
3178	}
3179	assert(Offset && "Do not know the offset for this fixed addr load");
3180	MI.removeOperand(OpNo: `1`);
3181	Subtarget.getTargetMachine().setGlibcHWCAPAccess();
3182	MachineInstrBuilder (*MI.getParent()->getParent(), MI)
3183	.addImm(Val: Offset)
3184	.addReg(RegNo: Reg);
3185	return true;
3186	#define PPC_TGT_PARSER_UNDEF_MACROS
3187	#include "llvm/TargetParser/PPCTargetParser.def"
3188	#undef PPC_TGT_PARSER_UNDEF_MACROS
3189	}
3190	case PPC::DFLOADf32:
3191	case PPC::DFLOADf64:
3192	case PPC::DFSTOREf32:
3193	case PPC::DFSTOREf64: {
3194	assert(Subtarget.hasP9Vector() &&
3195	"Invalid D-Form Pseudo-ops on Pre-P9 target.");
3196	assert(MI.getOperand(`2`).isReg() &&
3197	isAnImmediateOperand(MI.getOperand(`1`)) &&
3198	"D-form op must have register and immediate operands");
3199	return expandVSXMemPseudo(MI);
3200	}
3201	case PPC::XFLOADf32:
3202	case PPC::XFSTOREf32:
3203	case PPC::LIWAX:
3204	case PPC::LIWZX:
3205	case PPC::STIWX: {
3206	assert(Subtarget.hasP8Vector() &&
3207	"Invalid X-Form Pseudo-ops on Pre-P8 target.");
3208	assert(MI.getOperand(`2`).isReg() && MI.getOperand(`1`).isReg() &&
3209	"X-form op must have register and register operands");
3210	return expandVSXMemPseudo(MI);
3211	}
3212	case PPC::XFLOADf64:
3213	case PPC::XFSTOREf64: {
3214	assert(Subtarget.hasVSX() &&
3215	"Invalid X-Form Pseudo-ops on target that has no VSX.");
3216	assert(MI.getOperand(`2`).isReg() && MI.getOperand(`1`).isReg() &&
3217	"X-form op must have register and register operands");
3218	return expandVSXMemPseudo(MI);
3219	}
3220	case PPC::SPILLTOVSR_LD: {
3221	Register TargetReg = MI.getOperand(i: `0`).getReg();
3222	if (PPC::VSFRCRegClass.contains(Reg: TargetReg)) {
3223	MI.setDesc(get(Opcode: PPC::DFLOADf64));
3224	return expandPostRAPseudo(MI);
3225	}
3226	else
3227	MI.setDesc(get(Opcode: PPC::LD));
3228	return true;
3229	}
3230	case PPC::SPILLTOVSR_ST: {
3231	Register SrcReg = MI.getOperand(i: `0`).getReg();
3232	if (PPC::VSFRCRegClass.contains(Reg: SrcReg)) {
3233	NumStoreSPILLVSRRCAsVec ++;
3234	MI.setDesc(get(Opcode: PPC::DFSTOREf64));
3235	return expandPostRAPseudo(MI);
3236	} else {
3237	NumStoreSPILLVSRRCAsGpr ++;
3238	MI.setDesc(get(Opcode: PPC::STD));
3239	}
3240	return true;
3241	}
3242	case PPC::SPILLTOVSR_LDX: {
3243	Register TargetReg = MI.getOperand(i: `0`).getReg();
3244	if (PPC::VSFRCRegClass.contains(Reg: TargetReg))
3245	MI.setDesc(get(Opcode: PPC::LXSDX));
3246	else
3247	MI.setDesc(get(Opcode: PPC::LDX));
3248	return true;
3249	}
3250	case PPC::SPILLTOVSR_STX: {
3251	Register SrcReg = MI.getOperand(i: `0`).getReg();
3252	if (PPC::VSFRCRegClass.contains(Reg: SrcReg)) {
3253	NumStoreSPILLVSRRCAsVec ++;
3254	MI.setDesc(get(Opcode: PPC::STXSDX));
3255	} else {
3256	NumStoreSPILLVSRRCAsGpr ++;
3257	MI.setDesc(get(Opcode: PPC::STDX));
3258	}
3259	return true;
3260	}
3261
3262	// FIXME: Maybe we can expand it in 'PowerPC Expand Atomic' pass.
3263	case PPC::CFENCE:
3264	case PPC::CFENCE8: {
3265	auto Val = MI.getOperand(i: `0`).getReg();
3266	unsigned CmpOp = Subtarget.isPPC64() ? PPC::CMPD : PPC::CMPW;
3267	BuildMI(BB&: MBB, I&: MI, MIMD: DL, MCID: get(Opcode: CmpOp), DestReg: PPC::CR7).addReg(RegNo: Val).addReg(RegNo: Val);
3268	BuildMI(BB&: MBB, I&: MI, MIMD: DL, MCID: get(Opcode: PPC::CTRL_DEP))
3269	.addImm(Val: PPC::PRED_NE_MINUS)
3270	.addReg(RegNo: PPC::CR7)
3271	.addImm(Val: `1`);
3272	MI.setDesc(get(Opcode: PPC::ISYNC));
3273	MI.removeOperand(OpNo: `0`);
3274	return true;
3275	}
3276	}
3277	return false;
3278	}
3279
3280	// Essentially a compile-time implementation of a compare->isel sequence.
3281	// It takes two constants to compare, along with the true/false registers
3282	// and the comparison type (as a subreg to a CR field) and returns one
3283	// of the true/false registers, depending on the comparison results.
3284	static unsigned selectReg(int64_t Imm1, int64_t Imm2, unsigned CompareOpc,
3285	unsigned TrueReg, unsigned FalseReg,
3286	unsigned CRSubReg) {
3287	// Signed comparisons. The immediates are assumed to be sign-extended.
3288	if (CompareOpc == PPC::CMPWI \|\| CompareOpc == PPC::CMPDI) {
3289	switch (CRSubReg) {
3290	default: llvm_unreachable("Unknown integer comparison type.");
3291	case PPC::sub_lt:
3292	return Imm1 < Imm2 ? TrueReg : FalseReg;
3293	case PPC::sub_gt:
3294	return Imm1 > Imm2 ? TrueReg : FalseReg;
3295	case PPC::sub_eq:
3296	return Imm1 == Imm2 ? TrueReg : FalseReg;
3297	}
3298	}
3299	// Unsigned comparisons.
3300	else if (CompareOpc == PPC::CMPLWI \|\| CompareOpc == PPC::CMPLDI) {
3301	switch (CRSubReg) {
3302	default: llvm_unreachable("Unknown integer comparison type.");
3303	case PPC::sub_lt:
3304	return (uint64_t)Imm1 < (uint64_t)Imm2 ? TrueReg : FalseReg;
3305	case PPC::sub_gt:
3306	return (uint64_t)Imm1 > (uint64_t)Imm2 ? TrueReg : FalseReg;
3307	case PPC::sub_eq:
3308	return Imm1 == Imm2 ? TrueReg : FalseReg;
3309	}
3310	}
3311	return PPC::NoRegister;
3312	}
3313
3314	void PPCInstrInfo::replaceInstrOperandWithImm(MachineInstr &MI,
3315	unsigned OpNo,
3316	int64_t Imm) const {
3317	assert(MI.getOperand(OpNo).isReg() && "Operand must be a REG");
3318	// Replace the REG with the Immediate.
3319	Register InUseReg = MI.getOperand(i: OpNo).getReg();
3320	MI.getOperand(i: OpNo).ChangeToImmediate(ImmVal: Imm);
3321
3322	// We need to make sure that the MI didn't have any implicit use
3323	// of this REG any more. We don't call MI.implicit_operands().empty() to
3324	// return early, since MI's MCID might be changed in calling context, as a
3325	// result its number of explicit operands may be changed, thus the begin of
3326	// implicit operand is changed.
3327	const TargetRegisterInfo *TRI = &getRegisterInfo();
3328	int UseOpIdx = MI.findRegisterUseOperandIdx(Reg: InUseReg, TRI, isKill: false);
3329	if (UseOpIdx >= `0`) {
3330	MachineOperand &MO = MI.getOperand(i: UseOpIdx);
3331	if (MO.isImplicit())
3332	// The operands must always be in the following order:
3333	// - explicit reg defs,
3334	// - other explicit operands (reg uses, immediates, etc.),
3335	// - implicit reg defs
3336	// - implicit reg uses
3337	// Therefore, removing the implicit operand won't change the explicit
3338	// operands layout.
3339	MI.removeOperand(OpNo: UseOpIdx);
3340	}
3341	}
3342
3343	// Replace an instruction with one that materializes a constant (and sets
3344	// CR0 if the original instruction was a record-form instruction).
3345	void PPCInstrInfo::replaceInstrWithLI(MachineInstr &MI,
3346	const LoadImmediateInfo &LII) const {
3347	// Remove existing operands.
3348	int OperandToKeep = LII.SetCR ? `1` : `0`;
3349	for (int i = MI.getNumOperands() - `1`; i > OperandToKeep; i--)
3350	MI.removeOperand(OpNo: i);
3351
3352	// Replace the instruction.
3353	if (LII.SetCR) {
3354	MI.setDesc(get(Opcode: LII.Is64Bit ? PPC::ANDI8_rec : PPC::ANDI_rec));
3355	// Set the immediate.
3356	MachineInstrBuilder (*MI.getParent()->getParent(), MI)
3357	.addImm(Val: LII.Imm).addReg(RegNo: PPC::CR0, flags: RegState::ImplicitDefine);
3358	return;
3359	}
3360	else
3361	MI.setDesc(get(Opcode: LII.Is64Bit ? PPC::LI8 : PPC::LI));
3362
3363	// Set the immediate.
3364	MachineInstrBuilder (*MI.getParent()->getParent(), MI)
3365	.addImm(Val: LII.Imm);
3366	}
3367
3368	MachineInstr PPCInstrInfo::getDefMIPostRA(unsigned* Reg, MachineInstr &MI,
3369	bool &SeenIntermediateUse) const {
3370	assert(!MI.getParent()->getParent()->getRegInfo().isSSA() &&
3371	"Should be called after register allocation.");
3372	const TargetRegisterInfo *TRI = &getRegisterInfo();
3373	MachineBasicBlock::reverse_iterator E = MI.getParent()->rend(), It = MI;
3374	It ++;
3375	SeenIntermediateUse = false;
3376	for (; It != E; ++It) {
3377	if (It ->modifiesRegister(Reg, TRI))
3378	return &*It;
3379	if (It ->readsRegister(Reg, TRI))
3380	SeenIntermediateUse = true;
3381	}
3382	return nullptr;
3383	}
3384
3385	void PPCInstrInfo::materializeImmPostRA(MachineBasicBlock &MBB,
3386	MachineBasicBlock::iterator MBBI,
3387	const DebugLoc &DL, Register Reg,
3388	int64_t Imm) const {
3389	assert(!MBB.getParent()->getRegInfo().isSSA() &&
3390	"Register should be in non-SSA form after RA");
3391	bool isPPC64 = Subtarget.isPPC64();
3392	// FIXME: Materialization here is not optimal.
3393	// For some special bit patterns we can use less instructions.
3394	// See `selectI64ImmDirect` in PPCISelDAGToDAG.cpp.
3395	if (isInt<`16`>(x: Imm)) {
3396	BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: get(Opcode: isPPC64 ? PPC::LI8 : PPC::LI), DestReg: Reg).addImm(Val: Imm);
3397	} else if (isInt<`32`>(x: Imm)) {
3398	BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: get(Opcode: isPPC64 ? PPC::LIS8 : PPC::LIS), DestReg: Reg)
3399	.addImm(Val: Imm >> `16`);
3400	if (Imm & `0xFFFF`)
3401	BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: get(Opcode: isPPC64 ? PPC::ORI8 : PPC::ORI), DestReg: Reg)
3402	.addReg(RegNo: Reg, flags: RegState::Kill)
3403	.addImm(Val: Imm & `0xFFFF`);
3404	} else {
3405	assert(isPPC64 && "Materializing 64-bit immediate to single register is "
3406	"only supported in PPC64");
3407	BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: get(Opcode: PPC::LIS8), DestReg: Reg).addImm(Val: Imm >> `48`);
3408	if ((Imm >> `32`) & `0xFFFF`)
3409	BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: get(Opcode: PPC::ORI8), DestReg: Reg)
3410	.addReg(RegNo: Reg, flags: RegState::Kill)
3411	.addImm(Val: (Imm >> `32`) & `0xFFFF`);
3412	BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: get(Opcode: PPC::RLDICR), DestReg: Reg)
3413	.addReg(RegNo: Reg, flags: RegState::Kill)
3414	.addImm(Val: `32`)
3415	.addImm(Val: `31`);
3416	BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: get(Opcode: PPC::ORIS8), DestReg: Reg)
3417	.addReg(RegNo: Reg, flags: RegState::Kill)
3418	.addImm(Val: (Imm >> `16`) & `0xFFFF`);
3419	if (Imm & `0xFFFF`)
3420	BuildMI(BB&: MBB, I: MBBI, MIMD: DL, MCID: get(Opcode: PPC::ORI8), DestReg: Reg)
3421	.addReg(RegNo: Reg, flags: RegState::Kill)
3422	.addImm(Val: Imm & `0xFFFF`);
3423	}
3424	}
3425
3426	MachineInstr *PPCInstrInfo::getForwardingDefMI(
3427	MachineInstr &MI,
3428	unsigned &OpNoForForwarding,
3429	bool &SeenIntermediateUse) const {
3430	OpNoForForwarding = ~`0U`;
3431	MachineInstr DefMI = nullptr*;
3432	MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo();
3433	const TargetRegisterInfo *TRI = &getRegisterInfo();
3434	// If we're in SSA, get the defs through the MRI. Otherwise, only look
3435	// within the basic block to see if the register is defined using an
3436	// LI/LI8/ADDI/ADDI8.
3437	if (MRI->isSSA()) {
3438	for (int i = `1`, e = MI.getNumOperands(); i < e; i++) {
3439	if (!MI.getOperand(i).isReg())
3440	continue;
3441	Register Reg = MI.getOperand(i).getReg();
3442	if (!Reg.isVirtual())
3443	continue;
3444	Register TrueReg = TRI->lookThruCopyLike(SrcReg: Reg, MRI);
3445	if (TrueReg.isVirtual()) {
3446	MachineInstr *DefMIForTrueReg = MRI->getVRegDef(Reg: TrueReg);
3447	if (DefMIForTrueReg->getOpcode() == PPC::LI \|\|
3448	DefMIForTrueReg->getOpcode() == PPC::LI8 \|\|
3449	DefMIForTrueReg->getOpcode() == PPC::ADDI \|\|
3450	DefMIForTrueReg->getOpcode() == PPC::ADDI8) {
3451	OpNoForForwarding = i;
3452	DefMI = DefMIForTrueReg;
3453	// The ADDI and LI operand maybe exist in one instruction at same
3454	// time. we prefer to fold LI operand as LI only has one Imm operand
3455	// and is more possible to be converted. So if current DefMI is
3456	// ADDI/ADDI8, we continue to find possible LI/LI8.
3457	if (DefMI->getOpcode() == PPC::LI \|\| DefMI->getOpcode() == PPC::LI8)
3458	break;
3459	}
3460	}
3461	}
3462	} else {
3463	// Looking back through the definition for each operand could be expensive,
3464	// so exit early if this isn't an instruction that either has an immediate
3465	// form or is already an immediate form that we can handle.
3466	ImmInstrInfo III;
3467	unsigned Opc = MI.getOpcode();
3468	bool ConvertibleImmForm =
3469	Opc == PPC::CMPWI \|\| Opc == PPC::CMPLWI \|\| Opc == PPC::CMPDI \|\|
3470	Opc == PPC::CMPLDI \|\| Opc == PPC::ADDI \|\| Opc == PPC::ADDI8 \|\|
3471	Opc == PPC::ORI \|\| Opc == PPC::ORI8 \|\| Opc == PPC::XORI \|\|
3472	Opc == PPC::XORI8 \|\| Opc == PPC::RLDICL \|\| Opc == PPC::RLDICL_rec \|\|
3473	Opc == PPC::RLDICL_32 \|\| Opc == PPC::RLDICL_32_64 \|\|
3474	Opc == PPC::RLWINM \|\| Opc == PPC::RLWINM_rec \|\| Opc == PPC::RLWINM8 \|\|
3475	Opc == PPC::RLWINM8_rec;
3476	bool IsVFReg = (MI.getNumOperands() && MI.getOperand(i: `0`).isReg())
3477	? PPC::isVFRegister(Reg: MI.getOperand(i: `0`).getReg())
3478	: false;
3479	if (!ConvertibleImmForm && !instrHasImmForm(Opc, IsVFReg, III, PostRA: true))
3480	return nullptr;
3481
3482	// Don't convert or %X, %Y, %Y since that's just a register move.
3483	if ((Opc == PPC::OR \|\| Opc == PPC::OR8) &&
3484	MI.getOperand(i: `1`).getReg() == MI.getOperand(i: `2`).getReg())
3485	return nullptr;
3486	for (int i = `1`, e = MI.getNumOperands(); i < e; i++) {
3487	MachineOperand &MO = MI.getOperand(i);
3488	SeenIntermediateUse = false;
3489	if (MO.isReg() && MO.isUse() && !MO.isImplicit()) {
3490	Register Reg = MI.getOperand(i).getReg();
3491	// If we see another use of this reg between the def and the MI,
3492	// we want to flag it so the def isn't deleted.
3493	MachineInstr *DefMI = getDefMIPostRA(Reg, MI, SeenIntermediateUse);
3494	if (DefMI) {
3495	// Is this register defined by some form of add-immediate (including
3496	// load-immediate) within this basic block?
3497	switch (DefMI->getOpcode()) {
3498	default:
3499	break;
3500	case PPC::LI:
3501	case PPC::LI8:
3502	case PPC::ADDItocL8:
3503	case PPC::ADDI:
3504	case PPC::ADDI8:
3505	OpNoForForwarding = i;
3506	return DefMI;
3507	}
3508	}
3509	}
3510	}
3511	}
3512	return OpNoForForwarding == ~`0U` ? nullptr : DefMI;
3513	}
3514
3515	unsigned PPCInstrInfo::getSpillTarget() const {
3516	// With P10, we may need to spill paired vector registers or accumulator
3517	// registers. MMA implies paired vectors, so we can just check that.
3518	bool IsP10Variant = Subtarget.isISA3_1() \|\| Subtarget.pairedVectorMemops();
3519	// P11 uses the P10 target.
3520	return Subtarget.isISAFuture() ? `3` : IsP10Variant ?
3521	`2` : Subtarget.hasP9Vector() ?
3522	`1` : `0`;
3523	}
3524
3525	ArrayRef<unsigned> PPCInstrInfo::getStoreOpcodesForSpillArray() const {
3526	return {StoreSpillOpcodesArray[getSpillTarget()], SOK_LastOpcodeSpill};
3527	}
3528
3529	ArrayRef<unsigned> PPCInstrInfo::getLoadOpcodesForSpillArray() const {
3530	return {LoadSpillOpcodesArray[getSpillTarget()], SOK_LastOpcodeSpill};
3531	}
3532
3533	// This opt tries to convert the following imm form to an index form to save an
3534	// add for stack variables.
3535	// Return false if no such pattern found.
3536	//
3537	// ADDI instr: ToBeChangedReg = ADDI FrameBaseReg, OffsetAddi
3538	// ADD instr: ToBeDeletedReg = ADD ToBeChangedReg(killed), ScaleReg
3539	// Imm instr: Reg = op OffsetImm, ToBeDeletedReg(killed)
3540	//
3541	// can be converted to:
3542	//
3543	// new ADDI instr: ToBeChangedReg = ADDI FrameBaseReg, (OffsetAddi + OffsetImm)
3544	// Index instr: Reg = opx ScaleReg, ToBeChangedReg(killed)
3545	//
3546	// In order to eliminate ADD instr, make sure that:
3547	// 1: (OffsetAddi + OffsetImm) must be int16 since this offset will be used in
3548	// new ADDI instr and ADDI can only take int16 Imm.
3549	// 2: ToBeChangedReg must be killed in ADD instr and there is no other use
3550	// between ADDI and ADD instr since its original def in ADDI will be changed
3551	// in new ADDI instr. And also there should be no new def for it between
3552	// ADD and Imm instr as ToBeChangedReg will be used in Index instr.
3553	// 3: ToBeDeletedReg must be killed in Imm instr and there is no other use
3554	// between ADD and Imm instr since ADD instr will be eliminated.
3555	// 4: ScaleReg must not be redefined between ADD and Imm instr since it will be
3556	// moved to Index instr.
3557	bool PPCInstrInfo::foldFrameOffset(MachineInstr &MI) const {
3558	MachineFunction *MF = MI.getParent()->getParent();
3559	MachineRegisterInfo *MRI = &MF->getRegInfo();
3560	bool PostRA = !MRI->isSSA();
3561	// Do this opt after PEI which is after RA. The reason is stack slot expansion
3562	// in PEI may expose such opportunities since in PEI, stack slot offsets to
3563	// frame base(OffsetAddi) are determined.
3564	if (!PostRA)
3565	return false;
3566	unsigned ToBeDeletedReg = `0`;
3567	int64_t OffsetImm = `0`;
3568	unsigned XFormOpcode = `0`;
3569	ImmInstrInfo III;
3570
3571	// Check if Imm instr meets requirement.
3572	if (!isImmInstrEligibleForFolding(MI, BaseReg&: ToBeDeletedReg, XFormOpcode, OffsetOfImmInstr&: OffsetImm,
3573	III))
3574	return false;
3575
3576	bool OtherIntermediateUse = false;
3577	MachineInstr *ADDMI = getDefMIPostRA(Reg: ToBeDeletedReg, MI, SeenIntermediateUse&: OtherIntermediateUse);
3578
3579	// Exit if there is other use between ADD and Imm instr or no def found.
3580	if (OtherIntermediateUse \|\| !ADDMI)
3581	return false;
3582
3583	// Check if ADD instr meets requirement.
3584	if (!isADDInstrEligibleForFolding(ADDMI&: *ADDMI))
3585	return false;
3586
3587	unsigned ScaleRegIdx = `0`;
3588	int64_t OffsetAddi = `0`;
3589	MachineInstr ADDIMI = nullptr*;
3590
3591	// Check if there is a valid ToBeChangedReg in ADDMI.
3592	// 1: It must be killed.
3593	// 2: Its definition must be a valid ADDIMI.
3594	// 3: It must satify int16 offset requirement.
3595	if (isValidToBeChangedReg(ADDMI, Index: `1`, ADDIMI, OffsetAddi, OffsetImm))
3596	ScaleRegIdx = `2`;
3597	else if (isValidToBeChangedReg(ADDMI, Index: `2`, ADDIMI, OffsetAddi, OffsetImm))
3598	ScaleRegIdx = `1`;
3599	else
3600	return false;
3601
3602	assert(ADDIMI && "There should be ADDIMI for valid ToBeChangedReg.");
3603	Register ToBeChangedReg = ADDIMI->getOperand(i: `0`).getReg();
3604	Register ScaleReg = ADDMI->getOperand(i: ScaleRegIdx).getReg();
3605	auto NewDefFor = [&](unsigned Reg, MachineBasicBlock::iterator Start,
3606	MachineBasicBlock::iterator End) {
3607	for (auto It = ++Start; It != End; It ++)
3608	if (It ->modifiesRegister(Reg, TRI: &getRegisterInfo()))
3609	return true;
3610	return false;
3611	};
3612
3613	// We are trying to replace the ImmOpNo with ScaleReg. Give up if it is
3614	// treated as special zero when ScaleReg is R0/X0 register.
3615	if (III.ZeroIsSpecialOrig == III.ImmOpNo &&
3616	(ScaleReg == PPC::R0 \|\| ScaleReg == PPC::X0))
3617	return false;
3618
3619	// Make sure no other def for ToBeChangedReg and ScaleReg between ADD Instr
3620	// and Imm Instr.
3621	if (NewDefFor (ToBeChangedReg, ADDMI, MI) \|\| NewDefFor (ScaleReg, ADDMI, MI))
3622	return false;
3623
3624	// Now start to do the transformation.
3625	LLVM_DEBUG(dbgs() << "Replace instruction: "
3626	<< "\n");
3627	LLVM_DEBUG(ADDIMI->dump());
3628	LLVM_DEBUG(ADDMI->dump());
3629	LLVM_DEBUG(MI.dump());
3630	LLVM_DEBUG(dbgs() << "with: "
3631	<< "\n");
3632
3633	// Update ADDI instr.
3634	ADDIMI->getOperand(i: `2`).setImm(OffsetAddi + OffsetImm);
3635
3636	// Update Imm instr.
3637	MI.setDesc(get(Opcode: XFormOpcode));
3638	MI.getOperand(i: III.ImmOpNo)
3639	.ChangeToRegister(Reg: ScaleReg, isDef: false, isImp: false,
3640	isKill: ADDMI->getOperand(i: ScaleRegIdx).isKill());
3641
3642	MI.getOperand(i: III.OpNoForForwarding)
3643	.ChangeToRegister(Reg: ToBeChangedReg, isDef: false, isImp: false, isKill: true);
3644
3645	// Eliminate ADD instr.
3646	ADDMI->eraseFromParent();
3647
3648	LLVM_DEBUG(ADDIMI->dump());
3649	LLVM_DEBUG(MI.dump());
3650
3651	return true;
3652	}
3653
3654	bool PPCInstrInfo::isADDIInstrEligibleForFolding(MachineInstr &ADDIMI,
3655	int64_t &Imm) const {
3656	unsigned Opc = ADDIMI.getOpcode();
3657
3658	// Exit if the instruction is not ADDI.
3659	if (Opc != PPC::ADDI && Opc != PPC::ADDI8)
3660	return false;
3661
3662	// The operand may not necessarily be an immediate - it could be a relocation.
3663	if (!ADDIMI.getOperand(i: `2`).isImm())
3664	return false;
3665
3666	Imm = ADDIMI.getOperand(i: `2`).getImm();
3667
3668	return true;
3669	}
3670
3671	bool PPCInstrInfo::isADDInstrEligibleForFolding(MachineInstr &ADDMI) const {
3672	unsigned Opc = ADDMI.getOpcode();
3673
3674	// Exit if the instruction is not ADD.
3675	return Opc == PPC::ADD4 \|\| Opc == PPC::ADD8;
3676	}
3677
3678	bool PPCInstrInfo::isImmInstrEligibleForFolding(MachineInstr &MI,
3679	unsigned &ToBeDeletedReg,
3680	unsigned &XFormOpcode,
3681	int64_t &OffsetImm,
3682	ImmInstrInfo &III) const {
3683	// Only handle load/store.
3684	if (!MI.mayLoadOrStore())
3685	return false;
3686
3687	unsigned Opc = MI.getOpcode();
3688
3689	XFormOpcode = RI.getMappedIdxOpcForImmOpc(ImmOpcode: Opc);
3690
3691	// Exit if instruction has no index form.
3692	if (XFormOpcode == PPC::INSTRUCTION_LIST_END)
3693	return false;
3694
3695	// TODO: sync the logic between instrHasImmForm() and ImmToIdxMap.
3696	if (!instrHasImmForm(Opc: XFormOpcode,
3697	IsVFReg: PPC::isVFRegister(Reg: MI.getOperand(i: `0`).getReg()), III, PostRA: true))
3698	return false;
3699
3700	if (!III.IsSummingOperands)
3701	return false;
3702
3703	MachineOperand ImmOperand = MI.getOperand(i: III.ImmOpNo);
3704	MachineOperand RegOperand = MI.getOperand(i: III.OpNoForForwarding);
3705	// Only support imm operands, not relocation slots or others.
3706	if (!ImmOperand.isImm())
3707	return false;
3708
3709	assert(RegOperand.isReg() && "Instruction format is not right");
3710
3711	// There are other use for ToBeDeletedReg after Imm instr, can not delete it.
3712	if (!RegOperand.isKill())
3713	return false;
3714
3715	ToBeDeletedReg = RegOperand.getReg();
3716	OffsetImm = ImmOperand.getImm();
3717
3718	return true;
3719	}
3720
3721	bool PPCInstrInfo::isValidToBeChangedReg(MachineInstr ADDMI, unsigned* Index,
3722	MachineInstr *&ADDIMI,
3723	int64_t &OffsetAddi,
3724	int64_t OffsetImm) const {
3725	assert((Index == `1` \|\| Index == `2`) && "Invalid operand index for add.");
3726	MachineOperand &MO = ADDMI->getOperand(i: Index);
3727
3728	if (!MO.isKill())
3729	return false;
3730
3731	bool OtherIntermediateUse = false;
3732
3733	ADDIMI = getDefMIPostRA(Reg: MO.getReg(), MI&: *ADDMI, SeenIntermediateUse&: OtherIntermediateUse);
3734	// Currently handle only one "add + Imminstr" pair case, exit if other
3735	// intermediate use for ToBeChangedReg found.
3736	// TODO: handle the cases where there are other "add + Imminstr" pairs
3737	// with same offset in Imminstr which is like:
3738	//
3739	// ADDI instr: ToBeChangedReg = ADDI FrameBaseReg, OffsetAddi
3740	// ADD instr1: ToBeDeletedReg1 = ADD ToBeChangedReg, ScaleReg1
3741	// Imm instr1: Reg1 = op1 OffsetImm, ToBeDeletedReg1(killed)
3742	// ADD instr2: ToBeDeletedReg2 = ADD ToBeChangedReg(killed), ScaleReg2
3743	// Imm instr2: Reg2 = op2 OffsetImm, ToBeDeletedReg2(killed)
3744	//
3745	// can be converted to:
3746	//
3747	// new ADDI instr: ToBeChangedReg = ADDI FrameBaseReg,
3748	// (OffsetAddi + OffsetImm)
3749	// Index instr1: Reg1 = opx1 ScaleReg1, ToBeChangedReg
3750	// Index instr2: Reg2 = opx2 ScaleReg2, ToBeChangedReg(killed)
3751
3752	if (OtherIntermediateUse \|\| !ADDIMI)
3753	return false;
3754	// Check if ADDI instr meets requirement.
3755	if (!isADDIInstrEligibleForFolding(ADDIMI&: *ADDIMI, Imm&: OffsetAddi))
3756	return false;
3757
3758	if (isInt<`16`>(x: OffsetAddi + OffsetImm))
3759	return true;
3760	return false;
3761	}
3762
3763	// If this instruction has an immediate form and one of its operands is a
3764	// result of a load-immediate or an add-immediate, convert it to
3765	// the immediate form if the constant is in range.
3766	bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
3767	SmallSet<Register, `4`> &RegsToUpdate,
3768	MachineInstr *KilledDef) const* {
3769	MachineFunction *MF = MI.getParent()->getParent();
3770	MachineRegisterInfo *MRI = &MF->getRegInfo();
3771	bool PostRA = !MRI->isSSA();
3772	bool SeenIntermediateUse = true;
3773	unsigned ForwardingOperand = ~`0U`;
3774	MachineInstr *DefMI = getForwardingDefMI(MI, OpNoForForwarding&: ForwardingOperand,
3775	SeenIntermediateUse);
3776	if (!DefMI)
3777	return false;
3778	assert(ForwardingOperand < MI.getNumOperands() &&
3779	"The forwarding operand needs to be valid at this point");
3780	bool IsForwardingOperandKilled = MI.getOperand(i: ForwardingOperand).isKill();
3781	bool KillFwdDefMI = !SeenIntermediateUse && IsForwardingOperandKilled;
3782	if (KilledDef && KillFwdDefMI)
3783	*KilledDef = DefMI;
3784
3785	// Conservatively add defs from DefMI and defs/uses from MI to the set of
3786	// registers that need their kill flags updated.
3787	for (const MachineOperand &MO : DefMI->operands())
3788	if (MO.isReg() && MO.isDef())
3789	RegsToUpdate.insert(V: MO.getReg());
3790	for (const MachineOperand &MO : MI.operands())
3791	if (MO.isReg())
3792	RegsToUpdate.insert(V: MO.getReg());
3793
3794	// If this is a imm instruction and its register operands is produced by ADDI,
3795	// put the imm into imm inst directly.
3796	if (RI.getMappedIdxOpcForImmOpc(ImmOpcode: MI.getOpcode()) !=
3797	PPC::INSTRUCTION_LIST_END &&
3798	transformToNewImmFormFedByAdd(MI, DefMI&: *DefMI, OpNoForForwarding: ForwardingOperand))
3799	return true;
3800
3801	ImmInstrInfo III;
3802	bool IsVFReg = MI.getOperand(i: `0`).isReg()
3803	? PPC::isVFRegister(Reg: MI.getOperand(i: `0`).getReg())
3804	: false;
3805	bool HasImmForm = instrHasImmForm(Opc: MI.getOpcode(), IsVFReg, III, PostRA);
3806	// If this is a reg+reg instruction that has a reg+imm form,
3807	// and one of the operands is produced by an add-immediate,
3808	// try to convert it.
3809	if (HasImmForm &&
3810	transformToImmFormFedByAdd(MI, III, ConstantOpNo: ForwardingOperand, DefMI&: *DefMI,
3811	KillDefMI: KillFwdDefMI))
3812	return true;
3813
3814	// If this is a reg+reg instruction that has a reg+imm form,
3815	// and one of the operands is produced by LI, convert it now.
3816	if (HasImmForm &&
3817	transformToImmFormFedByLI(MI, III, ConstantOpNo: ForwardingOperand, DefMI&: *DefMI))
3818	return true;
3819
3820	// If this is not a reg+reg, but the DefMI is LI/LI8, check if its user MI
3821	// can be simpified to LI.
3822	if (!HasImmForm && simplifyToLI(MI, DefMI&: *DefMI, OpNoForForwarding: ForwardingOperand, KilledDef))
3823	return true;
3824
3825	return false;
3826	}
3827
3828	bool PPCInstrInfo::combineRLWINM(MachineInstr &MI,
3829	MachineInstr *ToErase) const* {
3830	MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo();
3831	Register FoldingReg = MI.getOperand(i: `1`).getReg();
3832	if (!FoldingReg.isVirtual())
3833	return false;
3834	MachineInstr *SrcMI = MRI->getVRegDef(Reg: FoldingReg);
3835	if (SrcMI->getOpcode() != PPC::RLWINM &&
3836	SrcMI->getOpcode() != PPC::RLWINM_rec &&
3837	SrcMI->getOpcode() != PPC::RLWINM8 &&
3838	SrcMI->getOpcode() != PPC::RLWINM8_rec)
3839	return false;
3840	assert((MI.getOperand(`2`).isImm() && MI.getOperand(`3`).isImm() &&
3841	MI.getOperand(`4`).isImm() && SrcMI->getOperand(`2`).isImm() &&
3842	SrcMI->getOperand(`3`).isImm() && SrcMI->getOperand(`4`).isImm()) &&
3843	"Invalid PPC::RLWINM Instruction!");
3844	uint64_t SHSrc = SrcMI->getOperand(i: `2`).getImm();
3845	uint64_t SHMI = MI.getOperand(i: `2`).getImm();
3846	uint64_t MBSrc = SrcMI->getOperand(i: `3`).getImm();
3847	uint64_t MBMI = MI.getOperand(i: `3`).getImm();
3848	uint64_t MESrc = SrcMI->getOperand(i: `4`).getImm();
3849	uint64_t MEMI = MI.getOperand(i: `4`).getImm();
3850
3851	assert((MEMI < `32` && MESrc < `32` && MBMI < `32` && MBSrc < `32`) &&
3852	"Invalid PPC::RLWINM Instruction!");
3853	// If MBMI is bigger than MEMI, we always can not get run of ones.
3854	// RotatedSrcMask non-wrap:
3855	// 0........31\|32........63
3856	// RotatedSrcMask: B---E B---E
3857	// MaskMI: -----------\|--E B------
3858	// Result: ----- --- (Bad candidate)
3859	//
3860	// RotatedSrcMask wrap:
3861	// 0........31\|32........63
3862	// RotatedSrcMask: --E B----\|--E B----
3863	// MaskMI: -----------\|--E B------
3864	// Result: --- -----\|--- ----- (Bad candidate)
3865	//
3866	// One special case is RotatedSrcMask is a full set mask.
3867	// RotatedSrcMask full:
3868	// 0........31\|32........63
3869	// RotatedSrcMask: ------EB---\|-------EB---
3870	// MaskMI: -----------\|--E B------
3871	// Result: -----------\|--- ------- (Good candidate)
3872
3873	// Mark special case.
3874	bool SrcMaskFull = (MBSrc - MESrc == `1`) \|\| (MBSrc == `0` && MESrc == `31`);
3875
3876	// For other MBMI > MEMI cases, just return.
3877	if ((MBMI > MEMI) && !SrcMaskFull)
3878	return false;
3879
3880	// Handle MBMI <= MEMI cases.
3881	APInt MaskMI = APInt::getBitsSetWithWrap(numBits: `32`, loBit: `32` - MEMI - `1`, hiBit: `32` - MBMI);
3882	// In MI, we only need low 32 bits of SrcMI, just consider about low 32
3883	// bit of SrcMI mask. Note that in APInt, lowerest bit is at index 0,
3884	// while in PowerPC ISA, lowerest bit is at index 63.
3885	APInt MaskSrc = APInt::getBitsSetWithWrap(numBits: `32`, loBit: `32` - MESrc - `1`, hiBit: `32` - MBSrc);
3886
3887	APInt RotatedSrcMask = MaskSrc.rotl(rotateAmt: SHMI);
3888	APInt FinalMask = RotatedSrcMask & MaskMI;
3889	uint32_t NewMB, NewME;
3890	bool Simplified = false;
3891
3892	// If final mask is 0, MI result should be 0 too.
3893	if (FinalMask.isZero()) {
3894	bool Is64Bit =
3895	(MI.getOpcode() == PPC::RLWINM8 \|\| MI.getOpcode() == PPC::RLWINM8_rec);
3896	Simplified = true;
3897	LLVM_DEBUG(dbgs() << "Replace Instr: ");
3898	LLVM_DEBUG(MI.dump());
3899
3900	if (MI.getOpcode() == PPC::RLWINM \|\| MI.getOpcode() == PPC::RLWINM8) {
3901	// Replace MI with "LI 0"
3902	MI.removeOperand(OpNo: `4`);
3903	MI.removeOperand(OpNo: `3`);
3904	MI.removeOperand(OpNo: `2`);
3905	MI.getOperand(i: `1`).ChangeToImmediate(ImmVal: `0`);
3906	MI.setDesc(get(Opcode: Is64Bit ? PPC::LI8 : PPC::LI));
3907	} else {
3908	// Replace MI with "ANDI_rec reg, 0"
3909	MI.removeOperand(OpNo: `4`);
3910	MI.removeOperand(OpNo: `3`);
3911	MI.getOperand(i: `2`).setImm(`0`);
3912	MI.setDesc(get(Opcode: Is64Bit ? PPC::ANDI8_rec : PPC::ANDI_rec));
3913	MI.getOperand(i: `1`).setReg(SrcMI->getOperand(i: `1`).getReg());
3914	if (SrcMI->getOperand(i: `1`).isKill()) {
3915	MI.getOperand(i: `1`).setIsKill(true);
3916	SrcMI->getOperand(i: `1`).setIsKill(false);
3917	} else
3918	// About to replace MI.getOperand(1), clear its kill flag.
3919	MI.getOperand(i: `1`).setIsKill(false);
3920	}
3921
3922	LLVM_DEBUG(dbgs() << "With: ");
3923	LLVM_DEBUG(MI.dump());
3924
3925	} else if ((isRunOfOnes(Val: (unsigned)(FinalMask.getZExtValue()), MB&: NewMB, ME&: NewME) &&
3926	NewMB <= NewME) \|\|
3927	SrcMaskFull) {
3928	// Here we only handle MBMI <= MEMI case, so NewMB must be no bigger
3929	// than NewME. Otherwise we get a 64 bit value after folding, but MI
3930	// return a 32 bit value.
3931	Simplified = true;
3932	LLVM_DEBUG(dbgs() << "Converting Instr: ");
3933	LLVM_DEBUG(MI.dump());
3934
3935	uint16_t NewSH = (SHSrc + SHMI) % `32`;
3936	MI.getOperand(i: `2`).setImm(NewSH);
3937	// If SrcMI mask is full, no need to update MBMI and MEMI.
3938	if (!SrcMaskFull) {
3939	MI.getOperand(i: `3`).setImm(NewMB);
3940	MI.getOperand(i: `4`).setImm(NewME);
3941	}
3942	MI.getOperand(i: `1`).setReg(SrcMI->getOperand(i: `1`).getReg());
3943	if (SrcMI->getOperand(i: `1`).isKill()) {
3944	MI.getOperand(i: `1`).setIsKill(true);
3945	SrcMI->getOperand(i: `1`).setIsKill(false);
3946	} else
3947	// About to replace MI.getOperand(1), clear its kill flag.
3948	MI.getOperand(i: `1`).setIsKill(false);
3949
3950	LLVM_DEBUG(dbgs() << "To: ");
3951	LLVM_DEBUG(MI.dump());
3952	}
3953	if (Simplified & MRI->use_nodbg_empty(RegNo: FoldingReg) &&
3954	!SrcMI->hasImplicitDef()) {
3955	// If FoldingReg has no non-debug use and it has no implicit def (it
3956	// is not RLWINMO or RLWINM8o), it's safe to delete its def SrcMI.
3957	// Otherwise keep it.
3958	*ToErase = SrcMI;
3959	LLVM_DEBUG(dbgs() << "Delete dead instruction: ");
3960	LLVM_DEBUG(SrcMI->dump());
3961	}
3962	return Simplified;
3963	}
3964
3965	bool PPCInstrInfo::instrHasImmForm(unsigned Opc, bool IsVFReg,
3966	ImmInstrInfo &III, bool PostRA) const {
3967	// The vast majority of the instructions would need their operand 2 replaced
3968	// with an immediate when switching to the reg+imm form. A marked exception
3969	// are the update form loads/stores for which a constant operand 2 would need
3970	// to turn into a displacement and move operand 1 to the operand 2 position.
3971	III.ImmOpNo = `2`;
3972	III.OpNoForForwarding = `2`;
3973	III.ImmWidth = `16`;
3974	III.ImmMustBeMultipleOf = `1`;
3975	III.TruncateImmTo = `0`;
3976	III.IsSummingOperands = false;
3977	switch (Opc) {
3978	default: return false;
3979	case PPC::ADD4:
3980	case PPC::ADD8:
3981	III.SignedImm = true;
3982	III.ZeroIsSpecialOrig = `0`;
3983	III.ZeroIsSpecialNew = `1`;
3984	III.IsCommutative = true;
3985	III.IsSummingOperands = true;
3986	III.ImmOpcode = Opc == PPC::ADD4 ? PPC::ADDI : PPC::ADDI8;
3987	break;
3988	case PPC::ADDC:
3989	case PPC::ADDC8:
3990	III.SignedImm = true;
3991	III.ZeroIsSpecialOrig = `0`;
3992	III.ZeroIsSpecialNew = `0`;
3993	III.IsCommutative = true;
3994	III.IsSummingOperands = true;
3995	III.ImmOpcode = Opc == PPC::ADDC ? PPC::ADDIC : PPC::ADDIC8;
3996	break;
3997	case PPC::ADDC_rec:
3998	III.SignedImm = true;
3999	III.ZeroIsSpecialOrig = `0`;
4000	III.ZeroIsSpecialNew = `0`;
4001	III.IsCommutative = true;
4002	III.IsSummingOperands = true;
4003	III.ImmOpcode = PPC::ADDIC_rec;
4004	break;
4005	case PPC::SUBFC:
4006	case PPC::SUBFC8:
4007	III.SignedImm = true;
4008	III.ZeroIsSpecialOrig = `0`;
4009	III.ZeroIsSpecialNew = `0`;
4010	III.IsCommutative = false;
4011	III.ImmOpcode = Opc == PPC::SUBFC ? PPC::SUBFIC : PPC::SUBFIC8;
4012	break;
4013	case PPC::CMPW:
4014	case PPC::CMPD:
4015	III.SignedImm = true;
4016	III.ZeroIsSpecialOrig = `0`;
4017	III.ZeroIsSpecialNew = `0`;
4018	III.IsCommutative = false;
4019	III.ImmOpcode = Opc == PPC::CMPW ? PPC::CMPWI : PPC::CMPDI;
4020	break;
4021	case PPC::CMPLW:
4022	case PPC::CMPLD:
4023	III.SignedImm = false;
4024	III.ZeroIsSpecialOrig = `0`;
4025	III.ZeroIsSpecialNew = `0`;
4026	III.IsCommutative = false;
4027	III.ImmOpcode = Opc == PPC::CMPLW ? PPC::CMPLWI : PPC::CMPLDI;
4028	break;
4029	case PPC::AND_rec:
4030	case PPC::AND8_rec:
4031	case PPC::OR:
4032	case PPC::OR8:
4033	case PPC::XOR:
4034	case PPC::XOR8:
4035	III.SignedImm = false;
4036	III.ZeroIsSpecialOrig = `0`;
4037	III.ZeroIsSpecialNew = `0`;
4038	III.IsCommutative = true;
4039	switch(Opc) {
4040	default: llvm_unreachable("Unknown opcode");
4041	case PPC::AND_rec:
4042	III.ImmOpcode = PPC::ANDI_rec;
4043	break;
4044	case PPC::AND8_rec:
4045	III.ImmOpcode = PPC::ANDI8_rec;
4046	break;
4047	case PPC::OR: III.ImmOpcode = PPC::ORI; break;
4048	case PPC::OR8: III.ImmOpcode = PPC::ORI8; break;
4049	case PPC::XOR: III.ImmOpcode = PPC::XORI; break;
4050	case PPC::XOR8: III.ImmOpcode = PPC::XORI8; break;
4051	}
4052	break;
4053	case PPC::RLWNM:
4054	case PPC::RLWNM8:
4055	case PPC::RLWNM_rec:
4056	case PPC::RLWNM8_rec:
4057	case PPC::SLW:
4058	case PPC::SLW8:
4059	case PPC::SLW_rec:
4060	case PPC::SLW8_rec:
4061	case PPC::SRW:
4062	case PPC::SRW8:
4063	case PPC::SRW_rec:
4064	case PPC::SRW8_rec:
4065	case PPC::SRAW:
4066	case PPC::SRAW_rec:
4067	III.SignedImm = false;
4068	III.ZeroIsSpecialOrig = `0`;
4069	III.ZeroIsSpecialNew = `0`;
4070	III.IsCommutative = false;
4071	// This isn't actually true, but the instructions ignore any of the
4072	// upper bits, so any immediate loaded with an LI is acceptable.
4073	// This does not apply to shift right algebraic because a value
4074	// out of range will produce a -1/0.
4075	III.ImmWidth = `16`;
4076	if (Opc == PPC::RLWNM \|\| Opc == PPC::RLWNM8 \|\| Opc == PPC::RLWNM_rec \|\|
4077	Opc == PPC::RLWNM8_rec)
4078	III.TruncateImmTo = `5`;
4079	else
4080	III.TruncateImmTo = `6`;
4081	switch(Opc) {
4082	default: llvm_unreachable("Unknown opcode");
4083	case PPC::RLWNM: III.ImmOpcode = PPC::RLWINM; break;
4084	case PPC::RLWNM8: III.ImmOpcode = PPC::RLWINM8; break;
4085	case PPC::RLWNM_rec:
4086	III.ImmOpcode = PPC::RLWINM_rec;
4087	break;
4088	case PPC::RLWNM8_rec:
4089	III.ImmOpcode = PPC::RLWINM8_rec;
4090	break;
4091	case PPC::SLW: III.ImmOpcode = PPC::RLWINM; break;
4092	case PPC::SLW8: III.ImmOpcode = PPC::RLWINM8; break;
4093	case PPC::SLW_rec:
4094	III.ImmOpcode = PPC::RLWINM_rec;
4095	break;
4096	case PPC::SLW8_rec:
4097	III.ImmOpcode = PPC::RLWINM8_rec;
4098	break;
4099	case PPC::SRW: III.ImmOpcode = PPC::RLWINM; break;
4100	case PPC::SRW8: III.ImmOpcode = PPC::RLWINM8; break;
4101	case PPC::SRW_rec:
4102	III.ImmOpcode = PPC::RLWINM_rec;
4103	break;
4104	case PPC::SRW8_rec:
4105	III.ImmOpcode = PPC::RLWINM8_rec;
4106	break;
4107	case PPC::SRAW:
4108	III.ImmWidth = `5`;
4109	III.TruncateImmTo = `0`;
4110	III.ImmOpcode = PPC::SRAWI;
4111	break;
4112	case PPC::SRAW_rec:
4113	III.ImmWidth = `5`;
4114	III.TruncateImmTo = `0`;
4115	III.ImmOpcode = PPC::SRAWI_rec;
4116	break;
4117	}
4118	break;
4119	case PPC::RLDCL:
4120	case PPC::RLDCL_rec:
4121	case PPC::RLDCR:
4122	case PPC::RLDCR_rec:
4123	case PPC::SLD:
4124	case PPC::SLD_rec:
4125	case PPC::SRD:
4126	case PPC::SRD_rec:
4127	case PPC::SRAD:
4128	case PPC::SRAD_rec:
4129	III.SignedImm = false;
4130	III.ZeroIsSpecialOrig = `0`;
4131	III.ZeroIsSpecialNew = `0`;
4132	III.IsCommutative = false;
4133	// This isn't actually true, but the instructions ignore any of the
4134	// upper bits, so any immediate loaded with an LI is acceptable.
4135	// This does not apply to shift right algebraic because a value
4136	// out of range will produce a -1/0.
4137	III.ImmWidth = `16`;
4138	if (Opc == PPC::RLDCL \|\| Opc == PPC::RLDCL_rec \|\| Opc == PPC::RLDCR \|\|
4139	Opc == PPC::RLDCR_rec)
4140	III.TruncateImmTo = `6`;
4141	else
4142	III.TruncateImmTo = `7`;
4143	switch(Opc) {
4144	default: llvm_unreachable("Unknown opcode");
4145	case PPC::RLDCL: III.ImmOpcode = PPC::RLDICL; break;
4146	case PPC::RLDCL_rec:
4147	III.ImmOpcode = PPC::RLDICL_rec;
4148	break;
4149	case PPC::RLDCR: III.ImmOpcode = PPC::RLDICR; break;
4150	case PPC::RLDCR_rec:
4151	III.ImmOpcode = PPC::RLDICR_rec;
4152	break;
4153	case PPC::SLD: III.ImmOpcode = PPC::RLDICR; break;
4154	case PPC::SLD_rec:
4155	III.ImmOpcode = PPC::RLDICR_rec;
4156	break;
4157	case PPC::SRD: III.ImmOpcode = PPC::RLDICL; break;
4158	case PPC::SRD_rec:
4159	III.ImmOpcode = PPC::RLDICL_rec;
4160	break;
4161	case PPC::SRAD:
4162	III.ImmWidth = `6`;
4163	III.TruncateImmTo = `0`;
4164	III.ImmOpcode = PPC::SRADI;
4165	break;
4166	case PPC::SRAD_rec:
4167	III.ImmWidth = `6`;
4168	III.TruncateImmTo = `0`;
4169	III.ImmOpcode = PPC::SRADI_rec;
4170	break;
4171	}
4172	break;
4173	// Loads and stores:
4174	case PPC::LBZX:
4175	case PPC::LBZX8:
4176	case PPC::LHZX:
4177	case PPC::LHZX8:
4178	case PPC::LHAX:
4179	case PPC::LHAX8:
4180	case PPC::LWZX:
4181	case PPC::LWZX8:
4182	case PPC::LWAX:
4183	case PPC::LDX:
4184	case PPC::LFSX:
4185	case PPC::LFDX:
4186	case PPC::STBX:
4187	case PPC::STBX8:
4188	case PPC::STHX:
4189	case PPC::STHX8:
4190	case PPC::STWX:
4191	case PPC::STWX8:
4192	case PPC::STDX:
4193	case PPC::STFSX:
4194	case PPC::STFDX:
4195	III.SignedImm = true;
4196	III.ZeroIsSpecialOrig = `1`;
4197	III.ZeroIsSpecialNew = `2`;
4198	III.IsCommutative = true;
4199	III.IsSummingOperands = true;
4200	III.ImmOpNo = `1`;
4201	III.OpNoForForwarding = `2`;
4202	switch(Opc) {
4203	default: llvm_unreachable("Unknown opcode");
4204	case PPC::LBZX: III.ImmOpcode = PPC::LBZ; break;
4205	case PPC::LBZX8: III.ImmOpcode = PPC::LBZ8; break;
4206	case PPC::LHZX: III.ImmOpcode = PPC::LHZ; break;
4207	case PPC::LHZX8: III.ImmOpcode = PPC::LHZ8; break;
4208	case PPC::LHAX: III.ImmOpcode = PPC::LHA; break;
4209	case PPC::LHAX8: III.ImmOpcode = PPC::LHA8; break;
4210	case PPC::LWZX: III.ImmOpcode = PPC::LWZ; break;
4211	case PPC::LWZX8: III.ImmOpcode = PPC::LWZ8; break;
4212	case PPC::LWAX:
4213	III.ImmOpcode = PPC::LWA;
4214	III.ImmMustBeMultipleOf = `4`;
4215	break;
4216	case PPC::LDX: III.ImmOpcode = PPC::LD; III.ImmMustBeMultipleOf = `4`; break;
4217	case PPC::LFSX: III.ImmOpcode = PPC::LFS; break;
4218	case PPC::LFDX: III.ImmOpcode = PPC::LFD; break;
4219	case PPC::STBX: III.ImmOpcode = PPC::STB; break;
4220	case PPC::STBX8: III.ImmOpcode = PPC::STB8; break;
4221	case PPC::STHX: III.ImmOpcode = PPC::STH; break;
4222	case PPC::STHX8: III.ImmOpcode = PPC::STH8; break;
4223	case PPC::STWX: III.ImmOpcode = PPC::STW; break;
4224	case PPC::STWX8: III.ImmOpcode = PPC::STW8; break;
4225	case PPC::STDX:
4226	III.ImmOpcode = PPC::STD;
4227	III.ImmMustBeMultipleOf = `4`;
4228	break;
4229	case PPC::STFSX: III.ImmOpcode = PPC::STFS; break;
4230	case PPC::STFDX: III.ImmOpcode = PPC::STFD; break;
4231	}
4232	break;
4233	case PPC::LBZUX:
4234	case PPC::LBZUX8:
4235	case PPC::LHZUX:
4236	case PPC::LHZUX8:
4237	case PPC::LHAUX:
4238	case PPC::LHAUX8:
4239	case PPC::LWZUX:
4240	case PPC::LWZUX8:
4241	case PPC::LDUX:
4242	case PPC::LFSUX:
4243	case PPC::LFDUX:
4244	case PPC::STBUX:
4245	case PPC::STBUX8:
4246	case PPC::STHUX:
4247	case PPC::STHUX8:
4248	case PPC::STWUX:
4249	case PPC::STWUX8:
4250	case PPC::STDUX:
4251	case PPC::STFSUX:
4252	case PPC::STFDUX:
4253	III.SignedImm = true;
4254	III.ZeroIsSpecialOrig = `2`;
4255	III.ZeroIsSpecialNew = `3`;
4256	III.IsCommutative = false;
4257	III.IsSummingOperands = true;
4258	III.ImmOpNo = `2`;
4259	III.OpNoForForwarding = `3`;
4260	switch(Opc) {
4261	default: llvm_unreachable("Unknown opcode");
4262	case PPC::LBZUX: III.ImmOpcode = PPC::LBZU; break;
4263	case PPC::LBZUX8: III.ImmOpcode = PPC::LBZU8; break;
4264	case PPC::LHZUX: III.ImmOpcode = PPC::LHZU; break;
4265	case PPC::LHZUX8: III.ImmOpcode = PPC::LHZU8; break;
4266	case PPC::LHAUX: III.ImmOpcode = PPC::LHAU; break;
4267	case PPC::LHAUX8: III.ImmOpcode = PPC::LHAU8; break;
4268	case PPC::LWZUX: III.ImmOpcode = PPC::LWZU; break;
4269	case PPC::LWZUX8: III.ImmOpcode = PPC::LWZU8; break;
4270	case PPC::LDUX:
4271	III.ImmOpcode = PPC::LDU;
4272	III.ImmMustBeMultipleOf = `4`;
4273	break;
4274	case PPC::LFSUX: III.ImmOpcode = PPC::LFSU; break;
4275	case PPC::LFDUX: III.ImmOpcode = PPC::LFDU; break;
4276	case PPC::STBUX: III.ImmOpcode = PPC::STBU; break;
4277	case PPC::STBUX8: III.ImmOpcode = PPC::STBU8; break;
4278	case PPC::STHUX: III.ImmOpcode = PPC::STHU; break;
4279	case PPC::STHUX8: III.ImmOpcode = PPC::STHU8; break;
4280	case PPC::STWUX: III.ImmOpcode = PPC::STWU; break;
4281	case PPC::STWUX8: III.ImmOpcode = PPC::STWU8; break;
4282	case PPC::STDUX:
4283	III.ImmOpcode = PPC::STDU;
4284	III.ImmMustBeMultipleOf = `4`;
4285	break;
4286	case PPC::STFSUX: III.ImmOpcode = PPC::STFSU; break;
4287	case PPC::STFDUX: III.ImmOpcode = PPC::STFDU; break;
4288	}
4289	break;
4290	// Power9 and up only. For some of these, the X-Form version has access to all
4291	// 64 VSR's whereas the D-Form only has access to the VR's. We replace those
4292	// with pseudo-ops pre-ra and for post-ra, we check that the register loaded
4293	// into or stored from is one of the VR registers.
4294	case PPC::LXVX:
4295	case PPC::LXSSPX:
4296	case PPC::LXSDX:
4297	case PPC::STXVX:
4298	case PPC::STXSSPX:
4299	case PPC::STXSDX:
4300	case PPC::XFLOADf32:
4301	case PPC::XFLOADf64:
4302	case PPC::XFSTOREf32:
4303	case PPC::XFSTOREf64:
4304	if (!Subtarget.hasP9Vector())
4305	return false;
4306	III.SignedImm = true;
4307	III.ZeroIsSpecialOrig = `1`;
4308	III.ZeroIsSpecialNew = `2`;
4309	III.IsCommutative = true;
4310	III.IsSummingOperands = true;
4311	III.ImmOpNo = `1`;
4312	III.OpNoForForwarding = `2`;
4313	III.ImmMustBeMultipleOf = `4`;
4314	switch(Opc) {
4315	default: llvm_unreachable("Unknown opcode");
4316	case PPC::LXVX:
4317	III.ImmOpcode = PPC::LXV;
4318	III.ImmMustBeMultipleOf = `16`;
4319	break;
4320	case PPC::LXSSPX:
4321	if (PostRA) {
4322	if (IsVFReg)
4323	III.ImmOpcode = PPC::LXSSP;
4324	else {
4325	III.ImmOpcode = PPC::LFS;
4326	III.ImmMustBeMultipleOf = `1`;
4327	}
4328	break;
4329	}
4330	[[fallthrough]];
4331	case PPC::XFLOADf32:
4332	III.ImmOpcode = PPC::DFLOADf32;
4333	break;
4334	case PPC::LXSDX:
4335	if (PostRA) {
4336	if (IsVFReg)
4337	III.ImmOpcode = PPC::LXSD;
4338	else {
4339	III.ImmOpcode = PPC::LFD;
4340	III.ImmMustBeMultipleOf = `1`;
4341	}
4342	break;
4343	}
4344	[[fallthrough]];
4345	case PPC::XFLOADf64:
4346	III.ImmOpcode = PPC::DFLOADf64;
4347	break;
4348	case PPC::STXVX:
4349	III.ImmOpcode = PPC::STXV;
4350	III.ImmMustBeMultipleOf = `16`;
4351	break;
4352	case PPC::STXSSPX:
4353	if (PostRA) {
4354	if (IsVFReg)
4355	III.ImmOpcode = PPC::STXSSP;
4356	else {
4357	III.ImmOpcode = PPC::STFS;
4358	III.ImmMustBeMultipleOf = `1`;
4359	}
4360	break;
4361	}
4362	[[fallthrough]];
4363	case PPC::XFSTOREf32:
4364	III.ImmOpcode = PPC::DFSTOREf32;
4365	break;
4366	case PPC::STXSDX:
4367	if (PostRA) {
4368	if (IsVFReg)
4369	III.ImmOpcode = PPC::STXSD;
4370	else {
4371	III.ImmOpcode = PPC::STFD;
4372	III.ImmMustBeMultipleOf = `1`;
4373	}
4374	break;
4375	}
4376	[[fallthrough]];
4377	case PPC::XFSTOREf64:
4378	III.ImmOpcode = PPC::DFSTOREf64;
4379	break;
4380	}
4381	break;
4382	}
4383	return true;
4384	}
4385
4386	// Utility function for swaping two arbitrary operands of an instruction.
4387	static void swapMIOperands(MachineInstr &MI, unsigned Op1, unsigned Op2) {
4388	assert(Op1 != Op2 && "Cannot swap operand with itself.");
4389
4390	unsigned MaxOp = std::max(a: Op1, b: Op2);
4391	unsigned MinOp = std::min(a: Op1, b: Op2);
4392	MachineOperand MOp1 = MI.getOperand(i: MinOp);
4393	MachineOperand MOp2 = MI.getOperand(i: MaxOp);
4394	MI.removeOperand(OpNo: std::max(a: Op1, b: Op2));
4395	MI.removeOperand(OpNo: std::min(a: Op1, b: Op2));
4396
4397	// If the operands we are swapping are the two at the end (the common case)
4398	// we can just remove both and add them in the opposite order.
4399	if (MaxOp - MinOp == `1` && MI.getNumOperands() == MinOp) {
4400	MI.addOperand(Op: MOp2);
4401	MI.addOperand(Op: MOp1);
4402	} else {
4403	// Store all operands in a temporary vector, remove them and re-add in the
4404	// right order.
4405	SmallVector<MachineOperand, `2`> MOps;
4406	unsigned TotalOps = MI.getNumOperands() + `2`; // We've already removed 2 ops.
4407	for (unsigned i = MI.getNumOperands() - `1`; i >= MinOp; i--) {
4408	MOps.push_back(Elt: MI.getOperand(i));
4409	MI.removeOperand(OpNo: i);
4410	}
4411	// MOp2 needs to be added next.
4412	MI.addOperand(Op: MOp2);
4413	// Now add the rest.
4414	for (unsigned i = MI.getNumOperands(); i < TotalOps; i++) {
4415	if (i == MaxOp)
4416	MI.addOperand(Op: MOp1);
4417	else {
4418	MI.addOperand(Op: MOps.back());
4419	MOps.pop_back();
4420	}
4421	}
4422	}
4423	}
4424
4425	// Check if the 'MI' that has the index OpNoForForwarding
4426	// meets the requirement described in the ImmInstrInfo.
4427	bool PPCInstrInfo::isUseMIElgibleForForwarding(MachineInstr &MI,
4428	const ImmInstrInfo &III,
4429	unsigned OpNoForForwarding
4430	) const {
4431	// As the algorithm of checking for PPC::ZERO/PPC::ZERO8
4432	// would not work pre-RA, we can only do the check post RA.
4433	MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
4434	if (MRI.isSSA())
4435	return false;
4436
4437	// Cannot do the transform if MI isn't summing the operands.
4438	if (!III.IsSummingOperands)
4439	return false;
4440
4441	// The instruction we are trying to replace must have the ZeroIsSpecialOrig set.
4442	if (!III.ZeroIsSpecialOrig)
4443	return false;
4444
4445	// We cannot do the transform if the operand we are trying to replace
4446	// isn't the same as the operand the instruction allows.
4447	if (OpNoForForwarding != III.OpNoForForwarding)
4448	return false;
4449
4450	// Check if the instruction we are trying to transform really has
4451	// the special zero register as its operand.
4452	if (MI.getOperand(i: III.ZeroIsSpecialOrig).getReg() != PPC::ZERO &&
4453	MI.getOperand(i: III.ZeroIsSpecialOrig).getReg() != PPC::ZERO8)
4454	return false;
4455
4456	// This machine instruction is convertible if it is,
4457	// 1. summing the operands.
4458	// 2. one of the operands is special zero register.
4459	// 3. the operand we are trying to replace is allowed by the MI.
4460	return true;
4461	}
4462
4463	// Check if the DefMI is the add inst and set the ImmMO and RegMO
4464	// accordingly.
4465	bool PPCInstrInfo::isDefMIElgibleForForwarding(MachineInstr &DefMI,
4466	const ImmInstrInfo &III,
4467	MachineOperand *&ImmMO,
4468	MachineOperand &RegMO) const* {
4469	unsigned Opc = DefMI.getOpcode();
4470	if (Opc != PPC::ADDItocL8 && Opc != PPC::ADDI && Opc != PPC::ADDI8)
4471	return false;
4472
4473	// Skip the optimization of transformTo[NewImm\|Imm]FormFedByAdd for ADDItocL8
4474	// on AIX which is used for toc-data access. TODO: Follow up to see if it can
4475	// apply for AIX toc-data as well.
4476	if (Opc == PPC::ADDItocL8 && Subtarget.isAIX())
4477	return false;
4478
4479	assert(DefMI.getNumOperands() >= `3` &&
4480	"Add inst must have at least three operands");
4481	RegMO = &DefMI.getOperand(i: `1`);
4482	ImmMO = &DefMI.getOperand(i: `2`);
4483
4484	// Before RA, ADDI first operand could be a frame index.
4485	if (!RegMO->isReg())
4486	return false;
4487
4488	// This DefMI is elgible for forwarding if it is:
4489	// 1. add inst
4490	// 2. one of the operands is Imm/CPI/Global.
4491	return isAnImmediateOperand(MO: *ImmMO);
4492	}
4493
4494	bool PPCInstrInfo::isRegElgibleForForwarding(
4495	const MachineOperand &RegMO, const MachineInstr &DefMI,
4496	const MachineInstr &MI, bool KillDefMI,
4497	bool &IsFwdFeederRegKilled, bool &SeenIntermediateUse) const {
4498	// x = addi y, imm
4499	// ...
4500	// z = lfdx 0, x -> z = lfd imm(y)
4501	// The Reg "y" can be forwarded to the MI(z) only when there is no DEF
4502	// of "y" between the DEF of "x" and "z".
4503	// The query is only valid post RA.
4504	const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
4505	if (MRI.isSSA())
4506	return false;
4507
4508	Register Reg = RegMO.getReg();
4509
4510	// Walking the inst in reverse(MI-->DefMI) to get the last DEF of the Reg.
4511	MachineBasicBlock::const_reverse_iterator It = MI;
4512	MachineBasicBlock::const_reverse_iterator E = MI.getParent()->rend();
4513	It ++;
4514	for (; It != E; ++It) {
4515	if (It ->modifiesRegister(Reg, TRI: &getRegisterInfo()) && (&*It) != &DefMI)
4516	return false;
4517	else if (It ->killsRegister(Reg, TRI: &getRegisterInfo()) && (&*It) != &DefMI)
4518	IsFwdFeederRegKilled = true;
4519	if (It ->readsRegister(Reg, TRI: &getRegisterInfo()) && (&*It) != &DefMI)
4520	SeenIntermediateUse = true;
4521	// Made it to DefMI without encountering a clobber.
4522	if ((&*It) == &DefMI)
4523	break;
4524	}
4525	assert((&*It) == &DefMI && "DefMI is missing");
4526
4527	// If DefMI also defines the register to be forwarded, we can only forward it
4528	// if DefMI is being erased.
4529	if (DefMI.modifiesRegister(Reg, TRI: &getRegisterInfo()))
4530	return KillDefMI;
4531
4532	return true;
4533	}
4534
4535	bool PPCInstrInfo::isImmElgibleForForwarding(const MachineOperand &ImmMO,
4536	const MachineInstr &DefMI,
4537	const ImmInstrInfo &III,
4538	int64_t &Imm,
4539	int64_t BaseImm) const {
4540	assert(isAnImmediateOperand(ImmMO) && "ImmMO is NOT an immediate");
4541	if (DefMI.getOpcode() == PPC::ADDItocL8) {
4542	// The operand for ADDItocL8 is CPI, which isn't imm at compiling time,
4543	// However, we know that, it is 16-bit width, and has the alignment of 4.
4544	// Check if the instruction met the requirement.
4545	if (III.ImmMustBeMultipleOf > `4` \|\|
4546	III.TruncateImmTo \|\| III.ImmWidth != `16`)
4547	return false;
4548
4549	// Going from XForm to DForm loads means that the displacement needs to be
4550	// not just an immediate but also a multiple of 4, or 16 depending on the
4551	// load. A DForm load cannot be represented if it is a multiple of say 2.
4552	// XForm loads do not have this restriction.
4553	if (ImmMO.isGlobal()) {
4554	const DataLayout &DL = ImmMO.getGlobal()->getDataLayout();
4555	if (ImmMO.getGlobal()->getPointerAlignment(DL) < III.ImmMustBeMultipleOf)
4556	return false;
4557	}
4558
4559	return true;
4560	}
4561
4562	if (ImmMO.isImm()) {
4563	// It is Imm, we need to check if the Imm fit the range.
4564	// Sign-extend to 64-bits.
4565	// DefMI may be folded with another imm form instruction, the result Imm is
4566	// the sum of Imm of DefMI and BaseImm which is from imm form instruction.
4567	APInt ActualValue(`64`, ImmMO.getImm() + BaseImm, true);
4568	if (III.SignedImm && !ActualValue.isSignedIntN(N: III.ImmWidth))
4569	return false;
4570	if (!III.SignedImm && !ActualValue.isIntN(N: III.ImmWidth))
4571	return false;
4572	Imm = SignExtend64<`16`>(x: ImmMO.getImm() + BaseImm);
4573
4574	if (Imm % III.ImmMustBeMultipleOf)
4575	return false;
4576	if (III.TruncateImmTo)
4577	Imm &= ((`1` << III.TruncateImmTo) - `1`);
4578	}
4579	else
4580	return false;
4581
4582	// This ImmMO is forwarded if it meets the requriement describle
4583	// in ImmInstrInfo
4584	return true;
4585	}
4586
4587	bool PPCInstrInfo::simplifyToLI(MachineInstr &MI, MachineInstr &DefMI,
4588	unsigned OpNoForForwarding,
4589	MachineInstr *KilledDef) const* {
4590	if ((DefMI.getOpcode() != PPC::LI && DefMI.getOpcode() != PPC::LI8) \|\|
4591	!DefMI.getOperand(i: `1`).isImm())
4592	return false;
4593
4594	MachineFunction *MF = MI.getParent()->getParent();
4595	MachineRegisterInfo *MRI = &MF->getRegInfo();
4596	bool PostRA = !MRI->isSSA();
4597
4598	int64_t Immediate = DefMI.getOperand(i: `1`).getImm();
4599	// Sign-extend to 64-bits.
4600	int64_t SExtImm = SignExtend64<`16`>(x: Immediate);
4601
4602	bool ReplaceWithLI = false;
4603	bool Is64BitLI = false;
4604	int64_t NewImm = `0`;
4605	bool SetCR = false;
4606	unsigned Opc = MI.getOpcode();
4607	switch (Opc) {
4608	default:
4609	return false;
4610
4611	// FIXME: Any branches conditional on such a comparison can be made
4612	// unconditional. At this time, this happens too infrequently to be worth
4613	// the implementation effort, but if that ever changes, we could convert
4614	// such a pattern here.
4615	case PPC::CMPWI:
4616	case PPC::CMPLWI:
4617	case PPC::CMPDI:
4618	case PPC::CMPLDI: {
4619	// Doing this post-RA would require dataflow analysis to reliably find uses
4620	// of the CR register set by the compare.
4621	// No need to fixup killed/dead flag since this transformation is only valid
4622	// before RA.
4623	if (PostRA)
4624	return false;
4625	// If a compare-immediate is fed by an immediate and is itself an input of
4626	// an ISEL (the most common case) into a COPY of the correct register.
4627	bool Changed = false;
4628	Register DefReg = MI.getOperand(i: `0`).getReg();
4629	int64_t Comparand = MI.getOperand(i: `2`).getImm();
4630	int64_t SExtComparand = ((uint64_t)Comparand & ~`0x7FFFuLL`) != `0`
4631	? (Comparand \| `0xFFFFFFFFFFFF0000`)
4632	: Comparand;
4633
4634	for (auto &CompareUseMI : MRI->use_instructions(Reg: DefReg)) {
4635	unsigned UseOpc = CompareUseMI.getOpcode();
4636	if (UseOpc != PPC::ISEL && UseOpc != PPC::ISEL8)
4637	continue;
4638	unsigned CRSubReg = CompareUseMI.getOperand(i: `3`).getSubReg();
4639	Register TrueReg = CompareUseMI.getOperand(i: `1`).getReg();
4640	Register FalseReg = CompareUseMI.getOperand(i: `2`).getReg();
4641	unsigned RegToCopy =
4642	selectReg(Imm1: SExtImm, Imm2: SExtComparand, CompareOpc: Opc, TrueReg, FalseReg, CRSubReg);
4643	if (RegToCopy == PPC::NoRegister)
4644	continue;
4645	// Can't use PPC::COPY to copy PPC::ZERO[8]. Convert it to LI[8] 0.
4646	if (RegToCopy == PPC::ZERO \|\| RegToCopy == PPC::ZERO8) {
4647	CompareUseMI.setDesc(get(Opcode: UseOpc == PPC::ISEL8 ? PPC::LI8 : PPC::LI));
4648	replaceInstrOperandWithImm(MI&: CompareUseMI, OpNo: `1`, Imm: `0`);
4649	CompareUseMI.removeOperand(OpNo: `3`);
4650	CompareUseMI.removeOperand(OpNo: `2`);
4651	continue;
4652	}
4653	LLVM_DEBUG(
4654	dbgs() << "Found LI -> CMPI -> ISEL, replacing with a copy.\n");
4655	LLVM_DEBUG(DefMI.dump(); MI.dump(); CompareUseMI.dump());
4656	LLVM_DEBUG(dbgs() << "Is converted to:\n");
4657	// Convert to copy and remove unneeded operands.
4658	CompareUseMI.setDesc(get(Opcode: PPC::COPY));
4659	CompareUseMI.removeOperand(OpNo: `3`);
4660	CompareUseMI.removeOperand(OpNo: RegToCopy == TrueReg ? `2` : `1`);
4661	CmpIselsConverted ++;
4662	Changed = true;
4663	LLVM_DEBUG(CompareUseMI.dump());
4664	}
4665	if (Changed)
4666	return true;
4667	// This may end up incremented multiple times since this function is called
4668	// during a fixed-point transformation, but it is only meant to indicate the
4669	// presence of this opportunity.
4670	MissedConvertibleImmediateInstrs ++;
4671	return false;
4672	}
4673
4674	// Immediate forms - may simply be convertable to an LI.
4675	case PPC::ADDI:
4676	case PPC::ADDI8: {
4677	// Does the sum fit in a 16-bit signed field?
4678	int64_t Addend = MI.getOperand(i: `2`).getImm();
4679	if (isInt<`16`>(x: Addend + SExtImm)) {
4680	ReplaceWithLI = true;
4681	Is64BitLI = Opc == PPC::ADDI8;
4682	NewImm = Addend + SExtImm;
4683	break;
4684	}
4685	return false;
4686	}
4687	case PPC::SUBFIC:
4688	case PPC::SUBFIC8: {
4689	// Only transform this if the CARRY implicit operand is dead.
4690	if (MI.getNumOperands() > `3` && !MI.getOperand(i: `3`).isDead())
4691	return false;
4692	int64_t Minuend = MI.getOperand(i: `2`).getImm();
4693	if (isInt<`16`>(x: Minuend - SExtImm)) {
4694	ReplaceWithLI = true;
4695	Is64BitLI = Opc == PPC::SUBFIC8;
4696	NewImm = Minuend - SExtImm;
4697	break;
4698	}
4699	return false;
4700	}
4701	case PPC::RLDICL:
4702	case PPC::RLDICL_rec:
4703	case PPC::RLDICL_32:
4704	case PPC::RLDICL_32_64: {
4705	// Use APInt's rotate function.
4706	int64_t SH = MI.getOperand(i: `2`).getImm();
4707	int64_t MB = MI.getOperand(i: `3`).getImm();
4708	APInt InVal((Opc == PPC::RLDICL \|\| Opc == PPC::RLDICL_rec) ? `64` : `32`,
4709	SExtImm, true);
4710	InVal = InVal.rotl(rotateAmt: SH);
4711	uint64_t Mask = MB == `0` ? -`1LLU` : (`1LLU` << (`63` - MB + `1`)) - `1`;
4712	InVal &= Mask;
4713	// Can't replace negative values with an LI as that will sign-extend
4714	// and not clear the left bits. If we're setting the CR bit, we will use
4715	// ANDI_rec which won't sign extend, so that's safe.
4716	if (isUInt<`15`>(x: InVal.getSExtValue()) \|\|
4717	(Opc == PPC::RLDICL_rec && isUInt<`16`>(x: InVal.getSExtValue()))) {
4718	ReplaceWithLI = true;
4719	Is64BitLI = Opc != PPC::RLDICL_32;
4720	NewImm = InVal.getSExtValue();
4721	SetCR = Opc == PPC::RLDICL_rec;
4722	break;
4723	}
4724	return false;
4725	}
4726	case PPC::RLWINM:
4727	case PPC::RLWINM8:
4728	case PPC::RLWINM_rec:
4729	case PPC::RLWINM8_rec: {
4730	int64_t SH = MI.getOperand(i: `2`).getImm();
4731	int64_t MB = MI.getOperand(i: `3`).getImm();
4732	int64_t ME = MI.getOperand(i: `4`).getImm();
4733	APInt InVal(`32`, SExtImm, true);
4734	InVal = InVal.rotl(rotateAmt: SH);
4735	APInt Mask = APInt::getBitsSetWithWrap(numBits: `32`, loBit: `32` - ME - `1`, hiBit: `32` - MB);
4736	InVal &= Mask;
4737	// Can't replace negative values with an LI as that will sign-extend
4738	// and not clear the left bits. If we're setting the CR bit, we will use
4739	// ANDI_rec which won't sign extend, so that's safe.
4740	bool ValueFits = isUInt<`15`>(x: InVal.getSExtValue());
4741	ValueFits \|= ((Opc == PPC::RLWINM_rec \|\| Opc == PPC::RLWINM8_rec) &&
4742	isUInt<`16`>(x: InVal.getSExtValue()));
4743	if (ValueFits) {
4744	ReplaceWithLI = true;
4745	Is64BitLI = Opc == PPC::RLWINM8 \|\| Opc == PPC::RLWINM8_rec;
4746	NewImm = InVal.getSExtValue();
4747	SetCR = Opc == PPC::RLWINM_rec \|\| Opc == PPC::RLWINM8_rec;
4748	break;
4749	}
4750	return false;
4751	}
4752	case PPC::ORI:
4753	case PPC::ORI8:
4754	case PPC::XORI:
4755	case PPC::XORI8: {
4756	int64_t LogicalImm = MI.getOperand(i: `2`).getImm();
4757	int64_t Result = `0`;
4758	if (Opc == PPC::ORI \|\| Opc == PPC::ORI8)
4759	Result = LogicalImm \| SExtImm;
4760	else
4761	Result = LogicalImm ^ SExtImm;
4762	if (isInt<`16`>(x: Result)) {
4763	ReplaceWithLI = true;
4764	Is64BitLI = Opc == PPC::ORI8 \|\| Opc == PPC::XORI8;
4765	NewImm = Result;
4766	break;
4767	}
4768	return false;
4769	}
4770	}
4771
4772	if (ReplaceWithLI) {
4773	// We need to be careful with CR-setting instructions we're replacing.
4774	if (SetCR) {
4775	// We don't know anything about uses when we're out of SSA, so only
4776	// replace if the new immediate will be reproduced.
4777	bool ImmChanged = (SExtImm & NewImm) != NewImm;
4778	if (PostRA && ImmChanged)
4779	return false;
4780
4781	if (!PostRA) {
4782	// If the defining load-immediate has no other uses, we can just replace
4783	// the immediate with the new immediate.
4784	if (MRI->hasOneUse(RegNo: DefMI.getOperand(i: `0`).getReg()))
4785	DefMI.getOperand(i: `1`).setImm(NewImm);
4786
4787	// If we're not using the GPR result of the CR-setting instruction, we
4788	// just need to and with zero/non-zero depending on the new immediate.
4789	else if (MRI->use_empty(RegNo: MI.getOperand(i: `0`).getReg())) {
4790	if (NewImm) {
4791	assert(Immediate && "Transformation converted zero to non-zero?");
4792	NewImm = Immediate;
4793	}
4794	} else if (ImmChanged)
4795	return false;
4796	}
4797	}
4798
4799	LLVM_DEBUG(dbgs() << "Replacing constant instruction:\n");
4800	LLVM_DEBUG(MI.dump());
4801	LLVM_DEBUG(dbgs() << "Fed by:\n");
4802	LLVM_DEBUG(DefMI.dump());
4803	LoadImmediateInfo LII;
4804	LII.Imm = NewImm;
4805	LII.Is64Bit = Is64BitLI;
4806	LII.SetCR = SetCR;
4807	// If we're setting the CR, the original load-immediate must be kept (as an
4808	// operand to ANDI_rec/ANDI8_rec).
4809	if (KilledDef && SetCR)
4810	KilledDef = nullptr*;
4811	replaceInstrWithLI(MI, LII);
4812
4813	if (PostRA)
4814	recomputeLivenessFlags(MBB&: *MI.getParent());
4815
4816	LLVM_DEBUG(dbgs() << "With:\n");
4817	LLVM_DEBUG(MI.dump());
4818	return true;
4819	}
4820	return false;
4821	}
4822
4823	bool PPCInstrInfo::transformToNewImmFormFedByAdd(
4824	MachineInstr &MI, MachineInstr &DefMI, unsigned OpNoForForwarding) const {
4825	MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo();
4826	bool PostRA = !MRI->isSSA();
4827	// FIXME: extend this to post-ra. Need to do some change in getForwardingDefMI
4828	// for post-ra.
4829	if (PostRA)
4830	return false;
4831
4832	// Only handle load/store.
4833	if (!MI.mayLoadOrStore())
4834	return false;
4835
4836	unsigned XFormOpcode = RI.getMappedIdxOpcForImmOpc(ImmOpcode: MI.getOpcode());
4837
4838	assert((XFormOpcode != PPC::INSTRUCTION_LIST_END) &&
4839	"MI must have x-form opcode");
4840
4841	// get Imm Form info.
4842	ImmInstrInfo III;
4843	bool IsVFReg = MI.getOperand(i: `0`).isReg()
4844	? PPC::isVFRegister(Reg: MI.getOperand(i: `0`).getReg())
4845	: false;
4846
4847	if (!instrHasImmForm(Opc: XFormOpcode, IsVFReg, III, PostRA))
4848	return false;
4849
4850	if (!III.IsSummingOperands)
4851	return false;
4852
4853	if (OpNoForForwarding != III.OpNoForForwarding)
4854	return false;
4855
4856	MachineOperand ImmOperandMI = MI.getOperand(i: III.ImmOpNo);
4857	if (!ImmOperandMI.isImm())
4858	return false;
4859
4860	// Check DefMI.
4861	MachineOperand ImmMO = nullptr*;
4862	MachineOperand RegMO = nullptr*;
4863	if (!isDefMIElgibleForForwarding(DefMI, III, ImmMO, RegMO))
4864	return false;
4865	assert(ImmMO && RegMO && "Imm and Reg operand must have been set");
4866
4867	// Check Imm.
4868	// Set ImmBase from imm instruction as base and get new Imm inside
4869	// isImmElgibleForForwarding.
4870	int64_t ImmBase = ImmOperandMI.getImm();
4871	int64_t Imm = `0`;
4872	if (!isImmElgibleForForwarding(ImmMO: *ImmMO, DefMI, III, Imm, BaseImm: ImmBase))
4873	return false;
4874
4875	// Do the transform
4876	LLVM_DEBUG(dbgs() << "Replacing existing reg+imm instruction:\n");
4877	LLVM_DEBUG(MI.dump());
4878	LLVM_DEBUG(dbgs() << "Fed by:\n");
4879	LLVM_DEBUG(DefMI.dump());
4880
4881	MI.getOperand(i: III.OpNoForForwarding).setReg(RegMO->getReg());
4882	MI.getOperand(i: III.ImmOpNo).setImm(Imm);
4883
4884	LLVM_DEBUG(dbgs() << "With:\n");
4885	LLVM_DEBUG(MI.dump());
4886	return true;
4887	}
4888
4889	// If an X-Form instruction is fed by an add-immediate and one of its operands
4890	// is the literal zero, attempt to forward the source of the add-immediate to
4891	// the corresponding D-Form instruction with the displacement coming from
4892	// the immediate being added.
4893	bool PPCInstrInfo::transformToImmFormFedByAdd(
4894	MachineInstr &MI, const ImmInstrInfo &III, unsigned OpNoForForwarding,
4895	MachineInstr &DefMI, bool KillDefMI) const {
4896	// RegMO ImmMO
4897	// \| \|
4898	// x = addi reg, imm <----- DefMI
4899	// y = op 0 , x <----- MI
4900	// \|
4901	// OpNoForForwarding
4902	// Check if the MI meet the requirement described in the III.
4903	if (!isUseMIElgibleForForwarding(MI, III, OpNoForForwarding))
4904	return false;
4905
4906	// Check if the DefMI meet the requirement
4907	// described in the III. If yes, set the ImmMO and RegMO accordingly.
4908	MachineOperand ImmMO = nullptr*;
4909	MachineOperand RegMO = nullptr*;
4910	if (!isDefMIElgibleForForwarding(DefMI, III, ImmMO, RegMO))
4911	return false;
4912	assert(ImmMO && RegMO && "Imm and Reg operand must have been set");
4913
4914	// As we get the Imm operand now, we need to check if the ImmMO meet
4915	// the requirement described in the III. If yes set the Imm.
4916	int64_t Imm = `0`;
4917	if (!isImmElgibleForForwarding(ImmMO: *ImmMO, DefMI, III, Imm))
4918	return false;
4919
4920	bool IsFwdFeederRegKilled = false;
4921	bool SeenIntermediateUse = false;
4922	// Check if the RegMO can be forwarded to MI.
4923	if (!isRegElgibleForForwarding(RegMO: *RegMO, DefMI, MI, KillDefMI,
4924	IsFwdFeederRegKilled, SeenIntermediateUse))
4925	return false;
4926
4927	MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
4928	bool PostRA = !MRI.isSSA();
4929
4930	// We know that, the MI and DefMI both meet the pattern, and
4931	// the Imm also meet the requirement with the new Imm-form.
4932	// It is safe to do the transformation now.
4933	LLVM_DEBUG(dbgs() << "Replacing indexed instruction:\n");
4934	LLVM_DEBUG(MI.dump());
4935	LLVM_DEBUG(dbgs() << "Fed by:\n");
4936	LLVM_DEBUG(DefMI.dump());
4937
4938	// Update the base reg first.
4939	MI.getOperand(i: III.OpNoForForwarding).ChangeToRegister(Reg: RegMO->getReg(),
4940	isDef: false, isImp: false,
4941	isKill: RegMO->isKill());
4942
4943	// Then, update the imm.
4944	if (ImmMO->isImm()) {
4945	// If the ImmMO is Imm, change the operand that has ZERO to that Imm
4946	// directly.
4947	replaceInstrOperandWithImm(MI, OpNo: III.ZeroIsSpecialOrig, Imm);
4948	}
4949	else {
4950	// Otherwise, it is Constant Pool Index(CPI) or Global,
4951	// which is relocation in fact. We need to replace the special zero
4952	// register with ImmMO.
4953	// Before that, we need to fixup the target flags for imm.
4954	// For some reason, we miss to set the flag for the ImmMO if it is CPI.
4955	if (DefMI.getOpcode() == PPC::ADDItocL8)
4956	ImmMO->setTargetFlags(PPCII::MO_TOC_LO);
4957
4958	// MI didn't have the interface such as MI.setOperand(i) though
4959	// it has MI.getOperand(i). To repalce the ZERO MachineOperand with
4960	// ImmMO, we need to remove ZERO operand and all the operands behind it,
4961	// and, add the ImmMO, then, move back all the operands behind ZERO.
4962	SmallVector<MachineOperand, `2`> MOps;
4963	for (unsigned i = MI.getNumOperands() - `1`; i >= III.ZeroIsSpecialOrig; i--) {
4964	MOps.push_back(Elt: MI.getOperand(i));
4965	MI.removeOperand(OpNo: i);
4966	}
4967
4968	// Remove the last MO in the list, which is ZERO operand in fact.
4969	MOps.pop_back();
4970	// Add the imm operand.
4971	MI.addOperand(Op: *ImmMO);
4972	// Now add the rest back.
4973	for (auto &MO : MOps)
4974	MI.addOperand(Op: MO);
4975	}
4976
4977	// Update the opcode.
4978	MI.setDesc(get(Opcode: III.ImmOpcode));
4979
4980	if (PostRA)
4981	recomputeLivenessFlags(MBB&: *MI.getParent());
4982	LLVM_DEBUG(dbgs() << "With:\n");
4983	LLVM_DEBUG(MI.dump());
4984
4985	return true;
4986	}
4987
4988	bool PPCInstrInfo::transformToImmFormFedByLI(MachineInstr &MI,
4989	const ImmInstrInfo &III,
4990	unsigned ConstantOpNo,
4991	MachineInstr &DefMI) const {
4992	// DefMI must be LI or LI8.
4993	if ((DefMI.getOpcode() != PPC::LI && DefMI.getOpcode() != PPC::LI8) \|\|
4994	!DefMI.getOperand(i: `1`).isImm())
4995	return false;
4996
4997	// Get Imm operand and Sign-extend to 64-bits.
4998	int64_t Imm = SignExtend64<`16`>(x: DefMI.getOperand(i: `1`).getImm());
4999
5000	MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
5001	bool PostRA = !MRI.isSSA();
5002	// Exit early if we can't convert this.
5003	if ((ConstantOpNo != III.OpNoForForwarding) && !III.IsCommutative)
5004	return false;
5005	if (Imm % III.ImmMustBeMultipleOf)
5006	return false;
5007	if (III.TruncateImmTo)
5008	Imm &= ((`1` << III.TruncateImmTo) - `1`);
5009	if (III.SignedImm) {
5010	APInt ActualValue(`64`, Imm, true);
5011	if (!ActualValue.isSignedIntN(N: III.ImmWidth))
5012	return false;
5013	} else {
5014	uint64_t UnsignedMax = (`1` << III.ImmWidth) - `1`;
5015	if ((uint64_t)Imm > UnsignedMax)
5016	return false;
5017	}
5018
5019	// If we're post-RA, the instructions don't agree on whether register zero is
5020	// special, we can transform this as long as the register operand that will
5021	// end up in the location where zero is special isn't R0.
5022	if (PostRA && III.ZeroIsSpecialOrig != III.ZeroIsSpecialNew) {
5023	unsigned PosForOrigZero = III.ZeroIsSpecialOrig ? III.ZeroIsSpecialOrig :
5024	III.ZeroIsSpecialNew + `1`;
5025	Register OrigZeroReg = MI.getOperand(i: PosForOrigZero).getReg();
5026	Register NewZeroReg = MI.getOperand(i: III.ZeroIsSpecialNew).getReg();
5027	// If R0 is in the operand where zero is special for the new instruction,
5028	// it is unsafe to transform if the constant operand isn't that operand.
5029	if ((NewZeroReg == PPC::R0 \|\| NewZeroReg == PPC::X0) &&
5030	ConstantOpNo != III.ZeroIsSpecialNew)
5031	return false;
5032	if ((OrigZeroReg == PPC::R0 \|\| OrigZeroReg == PPC::X0) &&
5033	ConstantOpNo != PosForOrigZero)
5034	return false;
5035	}
5036
5037	unsigned Opc = MI.getOpcode();
5038	bool SpecialShift32 = Opc == PPC::SLW \|\| Opc == PPC::SLW_rec \|\|
5039	Opc == PPC::SRW \|\| Opc == PPC::SRW_rec \|\|
5040	Opc == PPC::SLW8 \|\| Opc == PPC::SLW8_rec \|\|
5041	Opc == PPC::SRW8 \|\| Opc == PPC::SRW8_rec;
5042	bool SpecialShift64 = Opc == PPC::SLD \|\| Opc == PPC::SLD_rec \|\|
5043	Opc == PPC::SRD \|\| Opc == PPC::SRD_rec;
5044	bool SetCR = Opc == PPC::SLW_rec \|\| Opc == PPC::SRW_rec \|\|
5045	Opc == PPC::SLD_rec \|\| Opc == PPC::SRD_rec;
5046	bool RightShift = Opc == PPC::SRW \|\| Opc == PPC::SRW_rec \|\| Opc == PPC::SRD \|\|
5047	Opc == PPC::SRD_rec;
5048
5049	LLVM_DEBUG(dbgs() << "Replacing reg+reg instruction: ");
5050	LLVM_DEBUG(MI.dump());
5051	LLVM_DEBUG(dbgs() << "Fed by load-immediate: ");
5052	LLVM_DEBUG(DefMI.dump());
5053	MI.setDesc(get(Opcode: III.ImmOpcode));
5054	if (ConstantOpNo == III.OpNoForForwarding) {
5055	// Converting shifts to immediate form is a bit tricky since they may do
5056	// one of three things:
5057	// 1. If the shift amount is between OpSize and 2OpSize, the result is zero*
5058	// 2. If the shift amount is zero, the result is unchanged (save for maybe
5059	// setting CR0)
5060	// 3. If the shift amount is in [1, OpSize), it's just a shift
5061	if (SpecialShift32 \|\| SpecialShift64) {
5062	LoadImmediateInfo LII;
5063	LII.Imm = `0`;
5064	LII.SetCR = SetCR;
5065	LII.Is64Bit = SpecialShift64;
5066	uint64_t ShAmt = Imm & (SpecialShift32 ? `0x1F` : `0x3F`);
5067	if (Imm & (SpecialShift32 ? `0x20` : `0x40`))
5068	replaceInstrWithLI(MI, LII);
5069	// Shifts by zero don't change the value. If we don't need to set CR0,
5070	// just convert this to a COPY. Can't do this post-RA since we've already
5071	// cleaned up the copies.
5072	else if (!SetCR && ShAmt == `0` && !PostRA) {
5073	MI.removeOperand(OpNo: `2`);
5074	MI.setDesc(get(Opcode: PPC::COPY));
5075	} else {
5076	// The 32 bit and 64 bit instructions are quite different.
5077	if (SpecialShift32) {
5078	// Left shifts use (N, 0, 31-N).
5079	// Right shifts use (32-N, N, 31) if 0 < N < 32.
5080	// use (0, 0, 31) if N == 0.
5081	uint64_t SH = ShAmt == `0` ? `0` : RightShift ? `32` - ShAmt : ShAmt;
5082	uint64_t MB = RightShift ? ShAmt : `0`;
5083	uint64_t ME = RightShift ? `31` : `31` - ShAmt;
5084	replaceInstrOperandWithImm(MI, OpNo: III.OpNoForForwarding, Imm: SH);
5085	MachineInstrBuilder (*MI.getParent()->getParent(), MI).addImm(Val: MB)
5086	.addImm(Val: ME);
5087	} else {
5088	// Left shifts use (N, 63-N).
5089	// Right shifts use (64-N, N) if 0 < N < 64.
5090	// use (0, 0) if N == 0.
5091	uint64_t SH = ShAmt == `0` ? `0` : RightShift ? `64` - ShAmt : ShAmt;
5092	uint64_t ME = RightShift ? ShAmt : `63` - ShAmt;
5093	replaceInstrOperandWithImm(MI, OpNo: III.OpNoForForwarding, Imm: SH);
5094	MachineInstrBuilder (*MI.getParent()->getParent(), MI).addImm(Val: ME);
5095	}
5096	}
5097	} else
5098	replaceInstrOperandWithImm(MI, OpNo: ConstantOpNo, Imm);
5099	}
5100	// Convert commutative instructions (switch the operands and convert the
5101	// desired one to an immediate.
5102	else if (III.IsCommutative) {
5103	replaceInstrOperandWithImm(MI, OpNo: ConstantOpNo, Imm);
5104	swapMIOperands(MI, Op1: ConstantOpNo, Op2: III.OpNoForForwarding);
5105	} else
5106	llvm_unreachable("Should have exited early!");
5107
5108	// For instructions for which the constant register replaces a different
5109	// operand than where the immediate goes, we need to swap them.
5110	if (III.OpNoForForwarding != III.ImmOpNo)
5111	swapMIOperands(MI, Op1: III.OpNoForForwarding, Op2: III.ImmOpNo);
5112
5113	// If the special R0/X0 register index are different for original instruction
5114	// and new instruction, we need to fix up the register class in new
5115	// instruction.
5116	if (!PostRA && III.ZeroIsSpecialOrig != III.ZeroIsSpecialNew) {
5117	if (III.ZeroIsSpecialNew) {
5118	// If operand at III.ZeroIsSpecialNew is physical reg(eg: ZERO/ZERO8), no
5119	// need to fix up register class.
5120	Register RegToModify = MI.getOperand(i: III.ZeroIsSpecialNew).getReg();
5121	if (RegToModify.isVirtual()) {
5122	const TargetRegisterClass *NewRC =
5123	MRI.getRegClass(Reg: RegToModify)->hasSuperClassEq(RC: &PPC::GPRCRegClass) ?
5124	&PPC::GPRC_and_GPRC_NOR0RegClass : &PPC::G8RC_and_G8RC_NOX0RegClass;
5125	MRI.setRegClass(Reg: RegToModify, RC: NewRC);
5126	}
5127	}
5128	}
5129
5130	if (PostRA)
5131	recomputeLivenessFlags(MBB&: *MI.getParent());
5132
5133	LLVM_DEBUG(dbgs() << "With: ");
5134	LLVM_DEBUG(MI.dump());
5135	LLVM_DEBUG(dbgs() << "\n");
5136	return true;
5137	}
5138
5139	const TargetRegisterClass *
5140	PPCInstrInfo::updatedRC(const TargetRegisterClass RC) const* {
5141	if (Subtarget.hasVSX() && RC == &PPC::VRRCRegClass)
5142	return &PPC::VSRCRegClass;
5143	return RC;
5144	}
5145
5146	int PPCInstrInfo::getRecordFormOpcode(unsigned Opcode) {
5147	return PPC::getRecordFormOpcode(Opcode);
5148	}
5149
5150	static bool isOpZeroOfSubwordPreincLoad(int Opcode) {
5151	return (Opcode == PPC::LBZU \|\| Opcode == PPC::LBZUX \|\| Opcode == PPC::LBZU8 \|\|
5152	Opcode == PPC::LBZUX8 \|\| Opcode == PPC::LHZU \|\|
5153	Opcode == PPC::LHZUX \|\| Opcode == PPC::LHZU8 \|\|
5154	Opcode == PPC::LHZUX8);
5155	}
5156
5157	// This function checks for sign extension from 32 bits to 64 bits.
5158	static bool definedBySignExtendingOp(const unsigned Reg,
5159	const MachineRegisterInfo *MRI) {
5160	if (!Register::isVirtualRegister(Reg))
5161	return false;
5162
5163	MachineInstr *MI = MRI->getVRegDef(Reg);
5164	if (!MI)
5165	return false;
5166
5167	int Opcode = MI->getOpcode();
5168	const PPCInstrInfo *TII =
5169	MI->getMF()->getSubtarget<PPCSubtarget>().getInstrInfo();
5170	if (TII->isSExt32To64(Opcode))
5171	return true;
5172
5173	// The first def of LBZU/LHZU is sign extended.
5174	if (isOpZeroOfSubwordPreincLoad(Opcode) && MI->getOperand(i: `0`).getReg() == Reg)
5175	return true;
5176
5177	// RLDICL generates sign-extended output if it clears at least
5178	// 33 bits from the left (MSB).
5179	if (Opcode == PPC::RLDICL && MI->getOperand(i: `3`).getImm() >= `33`)
5180	return true;
5181
5182	// If at least one bit from left in a lower word is masked out,
5183	// all of 0 to 32-th bits of the output are cleared.
5184	// Hence the output is already sign extended.
5185	if ((Opcode == PPC::RLWINM \|\| Opcode == PPC::RLWINM_rec \|\|
5186	Opcode == PPC::RLWNM \|\| Opcode == PPC::RLWNM_rec) &&
5187	MI->getOperand(i: `3`).getImm() > `0` &&
5188	MI->getOperand(i: `3`).getImm() <= MI->getOperand(i: `4`).getImm())
5189	return true;
5190
5191	// If the most significant bit of immediate in ANDIS is zero,
5192	// all of 0 to 32-th bits are cleared.
5193	if (Opcode == PPC::ANDIS_rec \|\| Opcode == PPC::ANDIS8_rec) {
5194	uint16_t Imm = MI->getOperand(i: `2`).getImm();
5195	if ((Imm & `0x8000`) == `0`)
5196	return true;
5197	}
5198
5199	return false;
5200	}
5201
5202	// This function checks the machine instruction that defines the input register
5203	// Reg. If that machine instruction always outputs a value that has only zeros
5204	// in the higher 32 bits then this function will return true.
5205	static bool definedByZeroExtendingOp(const unsigned Reg,
5206	const MachineRegisterInfo *MRI) {
5207	if (!Register::isVirtualRegister(Reg))
5208	return false;
5209
5210	MachineInstr *MI = MRI->getVRegDef(Reg);
5211	if (!MI)
5212	return false;
5213
5214	int Opcode = MI->getOpcode();
5215	const PPCInstrInfo *TII =
5216	MI->getMF()->getSubtarget<PPCSubtarget>().getInstrInfo();
5217	if (TII->isZExt32To64(Opcode))
5218	return true;
5219
5220	// The first def of LBZU/LHZU/LWZU are zero extended.
5221	if ((isOpZeroOfSubwordPreincLoad(Opcode) \|\| Opcode == PPC::LWZU \|\|
5222	Opcode == PPC::LWZUX \|\| Opcode == PPC::LWZU8 \|\| Opcode == PPC::LWZUX8) &&
5223	MI->getOperand(i: `0`).getReg() == Reg)
5224	return true;
5225
5226	// The 16-bit immediate is sign-extended in li/lis.
5227	// If the most significant bit is zero, all higher bits are zero.
5228	if (Opcode == PPC::LI \|\| Opcode == PPC::LI8 \|\|
5229	Opcode == PPC::LIS \|\| Opcode == PPC::LIS8) {
5230	int64_t Imm = MI->getOperand(i: `1`).getImm();
5231	if (((uint64_t)Imm & ~`0x7FFFuLL`) == `0`)
5232	return true;
5233	}
5234
5235	// We have some variations of rotate-and-mask instructions
5236	// that clear higher 32-bits.
5237	if ((Opcode == PPC::RLDICL \|\| Opcode == PPC::RLDICL_rec \|\|
5238	Opcode == PPC::RLDCL \|\| Opcode == PPC::RLDCL_rec \|\|
5239	Opcode == PPC::RLDICL_32_64) &&
5240	MI->getOperand(i: `3`).getImm() >= `32`)
5241	return true;
5242
5243	if ((Opcode == PPC::RLDIC \|\| Opcode == PPC::RLDIC_rec) &&
5244	MI->getOperand(i: `3`).getImm() >= `32` &&
5245	MI->getOperand(i: `3`).getImm() <= `63` - MI->getOperand(i: `2`).getImm())
5246	return true;
5247
5248	if ((Opcode == PPC::RLWINM \|\| Opcode == PPC::RLWINM_rec \|\|
5249	Opcode == PPC::RLWNM \|\| Opcode == PPC::RLWNM_rec \|\|
5250	Opcode == PPC::RLWINM8 \|\| Opcode == PPC::RLWNM8) &&
5251	MI->getOperand(i: `3`).getImm() <= MI->getOperand(i: `4`).getImm())
5252	return true;
5253
5254	return false;
5255	}
5256
5257	// This function returns true if the input MachineInstr is a TOC save
5258	// instruction.
5259	bool PPCInstrInfo::isTOCSaveMI(const MachineInstr &MI) const {
5260	if (!MI.getOperand(i: `1`).isImm() \|\| !MI.getOperand(i: `2`).isReg())
5261	return false;
5262	unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5263	unsigned StackOffset = MI.getOperand(i: `1`).getImm();
5264	Register StackReg = MI.getOperand(i: `2`).getReg();
5265	Register SPReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
5266	if (StackReg == SPReg && StackOffset == TOCSaveOffset)
5267	return true;
5268
5269	return false;
5270	}
5271
5272	// We limit the max depth to track incoming values of PHIs or binary ops
5273	// (e.g. AND) to avoid excessive cost.
5274	const unsigned MAX_BINOP_DEPTH = `1`;
5275
5276	// This function will promote the instruction which defines the register `Reg`
5277	// in the parameter from a 32-bit to a 64-bit instruction if needed. The logic
5278	// used to check whether an instruction needs to be promoted or not is similar
5279	// to the logic used to check whether or not a defined register is sign or zero
5280	// extended within the function PPCInstrInfo::isSignOrZeroExtended.
5281	// Additionally, the `promoteInstr32To64ForElimEXTSW` function is recursive.
5282	// BinOpDepth does not count all of the recursions. The parameter BinOpDepth is
5283	// incremented only when `promoteInstr32To64ForElimEXTSW` calls itself more
5284	// than once. This is done to prevent exponential recursion.
5285	void PPCInstrInfo::promoteInstr32To64ForElimEXTSW(const Register &Reg,
5286	MachineRegisterInfo *MRI,
5287	unsigned BinOpDepth,
5288	LiveVariables LV) const* {
5289	if (!Reg.isVirtual())
5290	return;
5291
5292	MachineInstr *MI = MRI->getVRegDef(Reg);
5293	if (!MI)
5294	return;
5295
5296	unsigned Opcode = MI->getOpcode();
5297
5298	switch (Opcode) {
5299	case PPC::OR:
5300	case PPC::ISEL:
5301	case PPC::OR8:
5302	case PPC::PHI: {
5303	if (BinOpDepth >= MAX_BINOP_DEPTH)
5304	break;
5305	unsigned OperandEnd = `3`, OperandStride = `1`;
5306	if (Opcode == PPC::PHI) {
5307	OperandEnd = MI->getNumOperands();
5308	OperandStride = `2`;
5309	}
5310
5311	for (unsigned I = `1`; I < OperandEnd; I += OperandStride) {
5312	assert(MI->getOperand(I).isReg() && "Operand must be register");
5313	promoteInstr32To64ForElimEXTSW(Reg: MI->getOperand(i: I).getReg(), MRI,
5314	BinOpDepth: BinOpDepth + `1`, LV);
5315	}
5316
5317	break;
5318	}
5319	case PPC::COPY: {
5320	// Refers to the logic of the `case PPC::COPY` statement in the function
5321	// PPCInstrInfo::isSignOrZeroExtended().
5322
5323	Register SrcReg = MI->getOperand(i: `1`).getReg();
5324	// In both ELFv1 and v2 ABI, method parameters and the return value
5325	// are sign- or zero-extended.
5326	const MachineFunction *MF = MI->getMF();
5327	if (!MF->getSubtarget<PPCSubtarget>().isSVR4ABI()) {
5328	// If this is a copy from another register, we recursively promote the
5329	// source.
5330	promoteInstr32To64ForElimEXTSW(Reg: SrcReg, MRI, BinOpDepth, LV);
5331	return;
5332	}
5333
5334	// From here on everything is SVR4ABI. COPY will be eliminated in the other
5335	// pass, we do not need promote the COPY pseudo opcode.
5336
5337	if (SrcReg != PPC::X3)
5338	// If this is a copy from another register, we recursively promote the
5339	// source.
5340	promoteInstr32To64ForElimEXTSW(Reg: SrcReg, MRI, BinOpDepth, LV);
5341	return;
5342	}
5343	case PPC::ORI:
5344	case PPC::XORI:
5345	case PPC::ORIS:
5346	case PPC::XORIS:
5347	case PPC::ORI8:
5348	case PPC::XORI8:
5349	case PPC::ORIS8:
5350	case PPC::XORIS8:
5351	promoteInstr32To64ForElimEXTSW(Reg: MI->getOperand(i: `1`).getReg(), MRI, BinOpDepth,
5352	LV);
5353	break;
5354	case PPC::AND:
5355	case PPC::AND8:
5356	if (BinOpDepth >= MAX_BINOP_DEPTH)
5357	break;
5358
5359	promoteInstr32To64ForElimEXTSW(Reg: MI->getOperand(i: `1`).getReg(), MRI,
5360	BinOpDepth: BinOpDepth + `1`, LV);
5361	promoteInstr32To64ForElimEXTSW(Reg: MI->getOperand(i: `2`).getReg(), MRI,
5362	BinOpDepth: BinOpDepth + `1`, LV);
5363	break;
5364	}
5365
5366	const TargetRegisterClass *RC = MRI->getRegClass(Reg);
5367	if (RC == &PPC::G8RCRegClass \|\| RC == &PPC::G8RC_and_G8RC_NOX0RegClass)
5368	return;
5369
5370	const PPCInstrInfo *TII =
5371	MI->getMF()->getSubtarget<PPCSubtarget>().getInstrInfo();
5372
5373	// Map the 32bit to 64bit opcodes for instructions that are not signed or zero
5374	// extended themselves, but may have operands who's destination registers of
5375	// signed or zero extended instructions.
5376	std::unordered_map<unsigned, unsigned> OpcodeMap = {
5377	{PPC::OR, PPC::OR8}, {PPC::ISEL, PPC::ISEL8},
5378	{PPC::ORI, PPC::ORI8}, {PPC::XORI, PPC::XORI8},
5379	{PPC::ORIS, PPC::ORIS8}, {PPC::XORIS, PPC::XORIS8},
5380	{PPC::AND, PPC::AND8}};
5381
5382	int NewOpcode = -`1`;
5383	auto It = OpcodeMap.find(x: Opcode);
5384	if (It != OpcodeMap.end()) {
5385	// Set the new opcode to the mapped 64-bit version.
5386	NewOpcode = It ->second;
5387	} else {
5388	if (!TII->isSExt32To64(Opcode))
5389	return;
5390
5391	// The TableGen function `get64BitInstrFromSignedExt32BitInstr` is used to
5392	// map the 32-bit instruction with the `SExt32To64` flag to the 64-bit
5393	// instruction with the same opcode.
5394	NewOpcode = PPC::get64BitInstrFromSignedExt32BitInstr(Opcode);
5395	}
5396
5397	assert(NewOpcode != -`1` &&
5398	"Must have a 64-bit opcode to map the 32-bit opcode!");
5399
5400	const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
5401	const MCInstrDesc &MCID = TII->get(Opcode: NewOpcode);
5402	const TargetRegisterClass *NewRC =
5403	TRI->getRegClass(i: MCID.operands()[`0`].RegClass);
5404
5405	Register SrcReg = MI->getOperand(i: `0`).getReg();
5406	const TargetRegisterClass *SrcRC = MRI->getRegClass(Reg: SrcReg);
5407
5408	// If the register class of the defined register in the 32-bit instruction
5409	// is the same as the register class of the defined register in the promoted
5410	// 64-bit instruction, we do not need to promote the instruction.
5411	if (NewRC == SrcRC)
5412	return;
5413
5414	DebugLoc DL = MI->getDebugLoc();
5415	auto MBB = MI->getParent();
5416
5417	// Since the pseudo-opcode of the instruction is promoted from 32-bit to
5418	// 64-bit, if the source reg class of the original instruction belongs to
5419	// PPC::GRCRegClass or PPC::GPRC_and_GPRC_NOR0RegClass, we need to promote
5420	// the operand to PPC::G8CRegClass or PPC::G8RC_and_G8RC_NOR0RegClass,
5421	// respectively.
5422	DenseMap<unsigned, Register> PromoteRegs;
5423	for (unsigned i = `1`; i < MI->getNumOperands(); i++) {
5424	MachineOperand &Operand = MI->getOperand(i);
5425	if (!Operand.isReg())
5426	continue;
5427
5428	Register OperandReg = Operand.getReg();
5429	if (!OperandReg.isVirtual())
5430	continue;
5431
5432	const TargetRegisterClass *NewUsedRegRC =
5433	TRI->getRegClass(i: MCID.operands()[i].RegClass);
5434	const TargetRegisterClass *OrgRC = MRI->getRegClass(Reg: OperandReg);
5435	if (NewUsedRegRC != OrgRC && (OrgRC == &PPC::GPRCRegClass \|\|
5436	OrgRC == &PPC::GPRC_and_GPRC_NOR0RegClass)) {
5437	// Promote the used 32-bit register to 64-bit register.
5438	Register TmpReg = MRI->createVirtualRegister(RegClass: NewUsedRegRC);
5439	Register DstTmpReg = MRI->createVirtualRegister(RegClass: NewUsedRegRC);
5440	BuildMI(BB&: *MBB, I: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::IMPLICIT_DEF), DestReg: TmpReg);
5441	BuildMI(BB&: *MBB, I: MI, MIMD: DL, MCID: TII->get(Opcode: PPC::INSERT_SUBREG), DestReg: DstTmpReg)
5442	.addReg(RegNo: TmpReg)
5443	.addReg(RegNo: OperandReg)
5444	.addImm(Val: PPC::sub_32);
5445	PromoteRegs [i] = DstTmpReg;
5446	}
5447	}
5448
5449	Register NewDefinedReg = MRI->createVirtualRegister(RegClass: NewRC);
5450
5451	BuildMI(BB&: *MBB, I: MI, MIMD: DL, MCID: TII->get(Opcode: NewOpcode), DestReg: NewDefinedReg);
5452	MachineBasicBlock::instr_iterator Iter(MI);
5453	--Iter;
5454	MachineInstrBuilder MIBuilder(*Iter ->getMF(), Iter);
5455	for (unsigned i = `1`; i < MI->getNumOperands(); i++) {
5456	if (auto It = PromoteRegs.find(Val: i); It != PromoteRegs.end())
5457	MIBuilder.addReg(RegNo: It ->second, flags: RegState::Kill);
5458	else
5459	Iter ->addOperand(Op: MI->getOperand(i));
5460	}
5461
5462	for (unsigned i = `1`; i < Iter ->getNumOperands(); i++) {
5463	MachineOperand &Operand = Iter ->getOperand(i);
5464	if (!Operand.isReg())
5465	continue;
5466	Register OperandReg = Operand.getReg();
5467	if (!OperandReg.isVirtual())
5468	continue;
5469	LV->recomputeForSingleDefVirtReg(Reg: OperandReg);
5470	}
5471
5472	MI->eraseFromParent();
5473
5474	// A defined register may be used by other instructions that are 32-bit.
5475	// After the defined register is promoted to 64-bit for the promoted
5476	// instruction, we need to demote the 64-bit defined register back to a
5477	// 32-bit register
5478	BuildMI(BB&: *MBB, I: ++Iter, MIMD: DL, MCID: TII->get(Opcode: PPC::COPY), DestReg: SrcReg)
5479	.addReg(RegNo: NewDefinedReg, flags: RegState::Kill, SubReg: PPC::sub_32);
5480	LV->recomputeForSingleDefVirtReg(Reg: NewDefinedReg);
5481	}
5482
5483	// The isSignOrZeroExtended function is recursive. The parameter BinOpDepth
5484	// does not count all of the recursions. The parameter BinOpDepth is incremented
5485	// only when isSignOrZeroExtended calls itself more than once. This is done to
5486	// prevent expontential recursion. There is no parameter to track linear
5487	// recursion.
5488	std::pair<bool, bool>
5489	PPCInstrInfo::isSignOrZeroExtended(const unsigned Reg,
5490	const unsigned BinOpDepth,
5491	const MachineRegisterInfo MRI) const* {
5492	if (!Register::isVirtualRegister(Reg))
5493	return std::pair<bool, bool>(false, false);
5494
5495	MachineInstr *MI = MRI->getVRegDef(Reg);
5496	if (!MI)
5497	return std::pair<bool, bool>(false, false);
5498
5499	bool IsSExt = definedBySignExtendingOp(Reg, MRI);
5500	bool IsZExt = definedByZeroExtendingOp(Reg, MRI);
5501
5502	// If we know the instruction always returns sign- and zero-extended result,
5503	// return here.
5504	if (IsSExt && IsZExt)
5505	return std::pair<bool, bool>(IsSExt, IsZExt);
5506
5507	switch (MI->getOpcode()) {
5508	case PPC::COPY: {
5509	Register SrcReg = MI->getOperand(i: `1`).getReg();
5510
5511	// In both ELFv1 and v2 ABI, method parameters and the return value
5512	// are sign- or zero-extended.
5513	const MachineFunction *MF = MI->getMF();
5514
5515	if (!MF->getSubtarget<PPCSubtarget>().isSVR4ABI()) {
5516	// If this is a copy from another register, we recursively check source.
5517	auto SrcExt = isSignOrZeroExtended(Reg: SrcReg, BinOpDepth, MRI);
5518	return std::pair<bool, bool>(SrcExt.first \|\| IsSExt,
5519	SrcExt.second \|\| IsZExt);
5520	}
5521
5522	// From here on everything is SVR4ABI
5523	const PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
5524	// We check the ZExt/SExt flags for a method parameter.
5525	if (MI->getParent()->getBasicBlock() ==
5526	&MF->getFunction().getEntryBlock()) {
5527	Register VReg = MI->getOperand(i: `0`).getReg();
5528	if (MF->getRegInfo().isLiveIn(Reg: VReg)) {
5529	IsSExt \|= FuncInfo->isLiveInSExt(VReg);
5530	IsZExt \|= FuncInfo->isLiveInZExt(VReg);
5531	return std::pair<bool, bool>(IsSExt, IsZExt);
5532	}
5533	}
5534
5535	if (SrcReg != PPC::X3) {
5536	// If this is a copy from another register, we recursively check source.
5537	auto SrcExt = isSignOrZeroExtended(Reg: SrcReg, BinOpDepth, MRI);
5538	return std::pair<bool, bool>(SrcExt.first \|\| IsSExt,
5539	SrcExt.second \|\| IsZExt);
5540	}
5541
5542	// For a method return value, we check the ZExt/SExt flags in attribute.
5543	// We assume the following code sequence for method call.
5544	// ADJCALLSTACKDOWN 32, implicit dead %r1, implicit %r1
5545	// BL8_NOP @func,...
5546	// ADJCALLSTACKUP 32, 0, implicit dead %r1, implicit %r1
5547	// %5 = COPY %x3; G8RC:%5
5548	const MachineBasicBlock *MBB = MI->getParent();
5549	std::pair<bool, bool> IsExtendPair = std::pair<bool, bool>(IsSExt, IsZExt);
5550	MachineBasicBlock::const_instr_iterator II =
5551	MachineBasicBlock::const_instr_iterator (MI);
5552	if (II == MBB->instr_begin() \|\| (--II)->getOpcode() != PPC::ADJCALLSTACKUP)
5553	return IsExtendPair;
5554
5555	const MachineInstr &CallMI = *(--II);
5556	if (!CallMI.isCall() \|\| !CallMI.getOperand(i: `0`).isGlobal())
5557	return IsExtendPair;
5558
5559	const Function *CalleeFn =
5560	dyn_cast_if_present<Function>(Val: CallMI.getOperand(i: `0`).getGlobal());
5561	if (!CalleeFn)
5562	return IsExtendPair;
5563	const IntegerType *IntTy = dyn_cast<IntegerType>(Val: CalleeFn->getReturnType());
5564	if (IntTy && IntTy->getBitWidth() <= `32`) {
5565	const AttributeSet &Attrs = CalleeFn->getAttributes().getRetAttrs();
5566	IsSExt \|= Attrs.hasAttribute(Kind: Attribute::SExt);
5567	IsZExt \|= Attrs.hasAttribute(Kind: Attribute::ZExt);
5568	return std::pair<bool, bool>(IsSExt, IsZExt);
5569	}
5570
5571	return IsExtendPair;
5572	}
5573
5574	// OR, XOR with 16-bit immediate does not change the upper 48 bits.
5575	// So, we track the operand register as we do for register copy.
5576	case PPC::ORI:
5577	case PPC::XORI:
5578	case PPC::ORI8:
5579	case PPC::XORI8: {
5580	Register SrcReg = MI->getOperand(i: `1`).getReg();
5581	auto SrcExt = isSignOrZeroExtended(Reg: SrcReg, BinOpDepth, MRI);
5582	return std::pair<bool, bool>(SrcExt.first \|\| IsSExt,
5583	SrcExt.second \|\| IsZExt);
5584	}
5585
5586	// OR, XOR with shifted 16-bit immediate does not change the upper
5587	// 32 bits. So, we track the operand register for zero extension.
5588	// For sign extension when the MSB of the immediate is zero, we also
5589	// track the operand register since the upper 33 bits are unchanged.
5590	case PPC::ORIS:
5591	case PPC::XORIS:
5592	case PPC::ORIS8:
5593	case PPC::XORIS8: {
5594	Register SrcReg = MI->getOperand(i: `1`).getReg();
5595	auto SrcExt = isSignOrZeroExtended(Reg: SrcReg, BinOpDepth, MRI);
5596	uint16_t Imm = MI->getOperand(i: `2`).getImm();
5597	if (Imm & `0x8000`)
5598	return std::pair<bool, bool>(false, SrcExt.second \|\| IsZExt);
5599	else
5600	return std::pair<bool, bool>(SrcExt.first \|\| IsSExt,
5601	SrcExt.second \|\| IsZExt);
5602	}
5603
5604	// If all incoming values are sign-/zero-extended,
5605	// the output of OR, ISEL or PHI is also sign-/zero-extended.
5606	case PPC::OR:
5607	case PPC::OR8:
5608	case PPC::ISEL:
5609	case PPC::PHI: {
5610	if (BinOpDepth >= MAX_BINOP_DEPTH)
5611	return std::pair<bool, bool>(false, false);
5612
5613	// The input registers for PHI are operand 1, 3, ...
5614	// The input registers for others are operand 1 and 2.
5615	unsigned OperandEnd = `3`, OperandStride = `1`;
5616	if (MI->getOpcode() == PPC::PHI) {
5617	OperandEnd = MI->getNumOperands();
5618	OperandStride = `2`;
5619	}
5620
5621	IsSExt = true;
5622	IsZExt = true;
5623	for (unsigned I = `1`; I != OperandEnd; I += OperandStride) {
5624	if (!MI->getOperand(i: I).isReg())
5625	return std::pair<bool, bool>(false, false);
5626
5627	Register SrcReg = MI->getOperand(i: I).getReg();
5628	auto SrcExt = isSignOrZeroExtended(Reg: SrcReg, BinOpDepth: BinOpDepth + `1`, MRI);
5629	IsSExt &= SrcExt.first;
5630	IsZExt &= SrcExt.second;
5631	}
5632	return std::pair<bool, bool>(IsSExt, IsZExt);
5633	}
5634
5635	// If at least one of the incoming values of an AND is zero extended
5636	// then the output is also zero-extended. If both of the incoming values
5637	// are sign-extended then the output is also sign extended.
5638	case PPC::AND:
5639	case PPC::AND8: {
5640	if (BinOpDepth >= MAX_BINOP_DEPTH)
5641	return std::pair<bool, bool>(false, false);
5642
5643	Register SrcReg1 = MI->getOperand(i: `1`).getReg();
5644	Register SrcReg2 = MI->getOperand(i: `2`).getReg();
5645	auto Src1Ext = isSignOrZeroExtended(Reg: SrcReg1, BinOpDepth: BinOpDepth + `1`, MRI);
5646	auto Src2Ext = isSignOrZeroExtended(Reg: SrcReg2, BinOpDepth: BinOpDepth + `1`, MRI);
5647	return std::pair<bool, bool>(Src1Ext.first && Src2Ext.first,
5648	Src1Ext.second \|\| Src2Ext.second);
5649	}
5650
5651	default:
5652	break;
5653	}
5654	return std::pair<bool, bool>(IsSExt, IsZExt);
5655	}
5656
5657	bool PPCInstrInfo::isBDNZ(unsigned Opcode) const {
5658	return (Opcode == (Subtarget.isPPC64() ? PPC::BDNZ8 : PPC::BDNZ));
5659	}
5660
5661	namespace {
5662	class PPCPipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
5663	MachineInstr Loop, EndLoop, *LoopCount;
5664	MachineFunction *MF;
5665	const TargetInstrInfo *TII;
5666	int64_t TripCount;
5667
5668	public:
5669	PPCPipelinerLoopInfo(MachineInstr Loop, MachineInstr EndLoop,
5670	MachineInstr *LoopCount)
5671	: Loop(Loop), EndLoop(EndLoop), LoopCount(LoopCount),
5672	MF(Loop->getParent()->getParent()),
5673	TII(MF->getSubtarget().getInstrInfo()) {
5674	// Inspect the Loop instruction up-front, as it may be deleted when we call
5675	// createTripCountGreaterCondition.
5676	if (LoopCount->getOpcode() == PPC::LI8 \|\| LoopCount->getOpcode() == PPC::LI)
5677	TripCount = LoopCount->getOperand(i: `1`).getImm();
5678	else
5679	TripCount = -`1`;
5680	}
5681
5682	bool shouldIgnoreForPipelining(const MachineInstr MI) const* override {
5683	// Only ignore the terminator.
5684	return MI == EndLoop;
5685	}
5686
5687	std::optional<bool> createTripCountGreaterCondition(
5688	int TC, MachineBasicBlock &MBB,
5689	SmallVectorImpl<MachineOperand> &Cond) override {
5690	if (TripCount == -`1`) {
5691	// Since BDZ/BDZ8 that we will insert will also decrease the ctr by 1,
5692	// so we don't need to generate any thing here.
5693	Cond.push_back(Elt: MachineOperand::CreateImm(Val: `0`));
5694	Cond.push_back(Elt: MachineOperand::CreateReg(
5695	Reg: MF->getSubtarget<PPCSubtarget>().isPPC64() ? PPC::CTR8 : PPC::CTR,
5696	isDef: true));
5697	return {};
5698	}
5699
5700	return TripCount > TC;
5701	}
5702
5703	void setPreheader(MachineBasicBlock *NewPreheader) override {
5704	// Do nothing. We want the LOOP setup instruction to stay in the old
5705	// preheader, so we can use BDZ in the prologs to adapt the loop trip count.
5706	}
5707
5708	void adjustTripCount(int TripCountAdjust) override {
5709	// If the loop trip count is a compile-time value, then just change the
5710	// value.
5711	if (LoopCount->getOpcode() == PPC::LI8 \|\|
5712	LoopCount->getOpcode() == PPC::LI) {
5713	int64_t TripCount = LoopCount->getOperand(i: `1`).getImm() + TripCountAdjust;
5714	LoopCount->getOperand(i: `1`).setImm(TripCount);
5715	return;
5716	}
5717
5718	// Since BDZ/BDZ8 that we will insert will also decrease the ctr by 1,
5719	// so we don't need to generate any thing here.
5720	}
5721
5722	void disposed(LiveIntervals *LIS) override {
5723	if (LIS) {
5724	LIS->RemoveMachineInstrFromMaps(MI&: *Loop);
5725	LIS->RemoveMachineInstrFromMaps(MI&: *LoopCount);
5726	}
5727	Loop->eraseFromParent();
5728	// Ensure the loop setup instruction is deleted too.
5729	LoopCount->eraseFromParent();
5730	}
5731	};
5732	} // namespace
5733
5734	std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
5735	PPCInstrInfo::analyzeLoopForPipelining(MachineBasicBlock LoopBB) const* {
5736	// We really "analyze" only hardware loops right now.
5737	MachineBasicBlock::iterator I = LoopBB->getFirstTerminator();
5738	MachineBasicBlock Preheader = LoopBB->pred_begin();
5739	if (Preheader == LoopBB)
5740	Preheader = *std::next(x: LoopBB->pred_begin());
5741	MachineFunction *MF = Preheader->getParent();
5742
5743	if (I != LoopBB->end() && isBDNZ(Opcode: I ->getOpcode())) {
5744	SmallPtrSet<MachineBasicBlock *, `8`> Visited;
5745	if (MachineInstr LoopInst = findLoopInstr(PreHeader&: Preheader, Visited)) {
5746	Register LoopCountReg = LoopInst->getOperand(i: `0`).getReg();
5747	MachineRegisterInfo &MRI = MF->getRegInfo();
5748	MachineInstr *LoopCount = MRI.getUniqueVRegDef(Reg: LoopCountReg);
5749	return std::make_unique<PPCPipelinerLoopInfo>(args&: LoopInst, args: &*I, args&: LoopCount);
5750	}
5751	}
5752	return nullptr;
5753	}
5754
5755	MachineInstr *PPCInstrInfo::findLoopInstr(
5756	MachineBasicBlock &PreHeader,
5757	SmallPtrSet<MachineBasicBlock , `8`> &Visited) const* {
5758
5759	unsigned LOOPi = (Subtarget.isPPC64() ? PPC::MTCTR8loop : PPC::MTCTRloop);
5760
5761	// The loop set-up instruction should be in preheader
5762	for (auto &I : PreHeader.instrs())
5763	if (I.getOpcode() == LOOPi)
5764	return &I;
5765	return nullptr;
5766	}
5767
5768	// Return true if get the base operand, byte offset of an instruction and the
5769	// memory width. Width is the size of memory that is being loaded/stored.
5770	bool PPCInstrInfo::getMemOperandWithOffsetWidth(
5771	const MachineInstr &LdSt, const MachineOperand *&BaseReg, int64_t &Offset,
5772	LocationSize &Width, const TargetRegisterInfo TRI) const* {
5773	if (!LdSt.mayLoadOrStore() \|\| LdSt.getNumExplicitOperands() != `3`)
5774	return false;
5775
5776	// Handle only loads/stores with base register followed by immediate offset.
5777	if (!LdSt.getOperand(i: `1`).isImm() \|\|
5778	(!LdSt.getOperand(i: `2`).isReg() && !LdSt.getOperand(i: `2`).isFI()))
5779	return false;
5780	if (!LdSt.getOperand(i: `1`).isImm() \|\|
5781	(!LdSt.getOperand(i: `2`).isReg() && !LdSt.getOperand(i: `2`).isFI()))
5782	return false;
5783
5784	if (!LdSt.hasOneMemOperand())
5785	return false;
5786
5787	Width = (*LdSt.memoperands_begin())->getSize();
5788	Offset = LdSt.getOperand(i: `1`).getImm();
5789	BaseReg = &LdSt.getOperand(i: `2`);
5790	return true;
5791	}
5792
5793	bool PPCInstrInfo::areMemAccessesTriviallyDisjoint(
5794	const MachineInstr &MIa, const MachineInstr &MIb) const {
5795	assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
5796	assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
5797
5798	if (MIa.hasUnmodeledSideEffects() \|\| MIb.hasUnmodeledSideEffects() \|\|
5799	MIa.hasOrderedMemoryRef() \|\| MIb.hasOrderedMemoryRef())
5800	return false;
5801
5802	// Retrieve the base register, offset from the base register and width. Width
5803	// is the size of memory that is being loaded/stored (e.g. 1, 2, 4). If
5804	// base registers are identical, and the offset of a lower memory access +
5805	// the width doesn't overlap the offset of a higher memory access,
5806	// then the memory accesses are different.
5807	const TargetRegisterInfo *TRI = &getRegisterInfo();
5808	const MachineOperand BaseOpA = nullptr, BaseOpB = nullptr;
5809	int64_t OffsetA = `0`, OffsetB = `0`;
5810	LocationSize WidthA = LocationSize::precise(Value: `0`),
5811	WidthB = LocationSize::precise(Value: `0`);
5812	if (getMemOperandWithOffsetWidth(LdSt: MIa, BaseReg&: BaseOpA, Offset&: OffsetA, Width&: WidthA, TRI) &&
5813	getMemOperandWithOffsetWidth(LdSt: MIb, BaseReg&: BaseOpB, Offset&: OffsetB, Width&: WidthB, TRI)) {
5814	if (BaseOpA->isIdenticalTo(Other: *BaseOpB)) {
5815	int LowOffset = std::min(a: OffsetA, b: OffsetB);
5816	int HighOffset = std::max(a: OffsetA, b: OffsetB);
5817	LocationSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
5818	if (LowWidth.hasValue() &&
5819	LowOffset + (int)LowWidth.getValue() <= HighOffset)
5820	return true;
5821	}
5822	}
5823	return false;
5824	}
5825

Browse the source code of llvm_projects/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp