PPCFastISel.cpp source code [llvm_projects/llvm/lib/Target/PowerPC/PPCFastISel.cpp]

1	//===-- PPCFastISel.cpp - PowerPC FastISel implementation -----------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file defines the PowerPC-specific support for the FastISel class. Some
10	// of the target-specific code is generated by tablegen in the file
11	// PPCGenFastISel.inc, which is #included here.
12	//
13	//===----------------------------------------------------------------------===//
14
15	#include "MCTargetDesc/PPCPredicates.h"
16	#include "PPC.h"
17	#include "PPCCallingConv.h"
18	#include "PPCISelLowering.h"
19	#include "PPCMachineFunctionInfo.h"
20	#include "PPCSubtarget.h"
21	#include "llvm/CodeGen/CallingConvLower.h"
22	#include "llvm/CodeGen/FastISel.h"
23	#include "llvm/CodeGen/FunctionLoweringInfo.h"
24	#include "llvm/CodeGen/MachineConstantPool.h"
25	#include "llvm/CodeGen/MachineFrameInfo.h"
26	#include "llvm/CodeGen/MachineInstrBuilder.h"
27	#include "llvm/CodeGen/MachineRegisterInfo.h"
28	#include "llvm/CodeGen/TargetLowering.h"
29	#include "llvm/IR/CallingConv.h"
30	#include "llvm/IR/GetElementPtrTypeIterator.h"
31	#include "llvm/IR/GlobalVariable.h"
32	#include "llvm/IR/Operator.h"
33	#include "llvm/Target/TargetMachine.h"
34
35	//===----------------------------------------------------------------------===//
36	//
37	// TBD:
38	// fastLowerArguments: Handle simple cases.
39	// PPCMaterializeGV: Handle TLS.
40	// SelectCall: Handle function pointers.
41	// SelectCall: Handle multi-register return values.
42	// SelectCall: Optimize away nops for local calls.
43	// processCallArgs: Handle bit-converted arguments.
44	// finishCall: Handle multi-register return values.
45	// PPCComputeAddress: Handle parameter references as FrameIndex's.
46	// PPCEmitCmp: Handle immediate as operand 1.
47	// SelectCall: Handle small byval arguments.
48	// SelectIntrinsicCall: Implement.
49	// SelectSelect: Implement.
50	// Consider factoring isTypeLegal into the base class.
51	// Implement switches and jump tables.
52	//
53	//===----------------------------------------------------------------------===//
54	using namespace llvm;
55
56	#define DEBUG_TYPE "ppcfastisel"
57
58	namespace {
59
60	struct Address {
61	enum {
62	RegBase,
63	FrameIndexBase
64	} BaseType;
65
66	union {
67	unsigned Reg;
68	int FI;
69	} Base;
70
71	int64_t Offset;
72
73	// Innocuous defaults for our address.
74	Address()
75	: BaseType(RegBase), Offset(`0`) {
76	Base.Reg = `0`;
77	}
78	};
79
80	class PPCFastISel final : public FastISel {
81
82	const TargetMachine &TM;
83	const PPCSubtarget *Subtarget;
84	PPCFunctionInfo *PPCFuncInfo;
85	const TargetInstrInfo &TII;
86	const TargetLowering &TLI;
87	LLVMContext *Context;
88
89	public:
90	explicit PPCFastISel(FunctionLoweringInfo &FuncInfo,
91	const TargetLibraryInfo *LibInfo)
92	: FastISel (FuncInfo, LibInfo), TM(FuncInfo.MF->getTarget()),
93	Subtarget(&FuncInfo.MF->getSubtarget<PPCSubtarget>()),
94	PPCFuncInfo(FuncInfo.MF->getInfo<PPCFunctionInfo>()),
95	TII(Subtarget->getInstrInfo()), TLI(Subtarget->getTargetLowering()),
96	Context(&FuncInfo.Fn->getContext()) {}
97
98	// Backend specific FastISel code.
99	private:
100	bool fastSelectInstruction(const Instruction *I) override;
101	Register fastMaterializeConstant(const Constant *C) override;
102	Register fastMaterializeAlloca(const AllocaInst *AI) override;
103	bool tryToFoldLoadIntoMI(MachineInstr MI, unsigned* OpNo,
104	const LoadInst *LI) override;
105	bool fastLowerArguments() override;
106	Register fastEmit_i(MVT Ty, MVT RetTy, unsigned Opc, uint64_t Imm) override;
107	Register fastEmitInst_ri(unsigned MachineInstOpcode,
108	const TargetRegisterClass *RC, Register Op0,
109	uint64_t Imm);
110	Register fastEmitInst_r(unsigned MachineInstOpcode,
111	const TargetRegisterClass *RC, Register Op0);
112	Register fastEmitInst_rr(unsigned MachineInstOpcode,
113	const TargetRegisterClass *RC, Register Op0,
114	Register Op1);
115
116	bool fastLowerCall(CallLoweringInfo &CLI) override;
117
118	// Instruction selection routines.
119	private:
120	bool SelectLoad(const Instruction *I);
121	bool SelectStore(const Instruction *I);
122	bool SelectBranch(const Instruction *I);
123	bool SelectIndirectBr(const Instruction *I);
124	bool SelectFPExt(const Instruction *I);
125	bool SelectFPTrunc(const Instruction *I);
126	bool SelectIToFP(const Instruction I, bool* IsSigned);
127	bool SelectFPToI(const Instruction I, bool* IsSigned);
128	bool SelectBinaryIntOp(const Instruction I, unsigned* ISDOpcode);
129	bool SelectRet(const Instruction *I);
130	bool SelectTrunc(const Instruction *I);
131	bool SelectIntExt(const Instruction *I);
132
133	// Utility routines.
134	private:
135	bool isTypeLegal(Type *Ty, MVT &VT);
136	bool isLoadTypeLegal(Type *Ty, MVT &VT);
137	bool isValueAvailable(const Value V) const*;
138	bool isVSFRCRegClass(const TargetRegisterClass RC) const* {
139	return RC->getID() == PPC::VSFRCRegClassID;
140	}
141	bool isVSSRCRegClass(const TargetRegisterClass RC) const* {
142	return RC->getID() == PPC::VSSRCRegClassID;
143	}
144	Register copyRegToRegClass(const TargetRegisterClass *ToRC, Register SrcReg,
145	unsigned Flag = `0`, unsigned SubReg = `0`) {
146	Register TmpReg = createResultReg(RC: ToRC);
147	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
148	MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: TmpReg).addReg(RegNo: SrcReg, flags: Flag, SubReg);
149	return TmpReg;
150	}
151	bool PPCEmitCmp(const Value Src1Value, const* Value Src2Value, bool* isZExt,
152	Register DestReg, const PPC::Predicate Pred);
153	bool PPCEmitLoad(MVT VT, Register &ResultReg, Address &Addr,
154	const TargetRegisterClass RC, bool* IsZExt = true,
155	unsigned FP64LoadOpc = PPC::LFD);
156	bool PPCEmitStore(MVT VT, Register SrcReg, Address &Addr);
157	bool PPCComputeAddress(const Value *Obj, Address &Addr);
158	void PPCSimplifyAddress(Address &Addr, bool &UseOffset, Register &IndexReg);
159	bool PPCEmitIntExt(MVT SrcVT, Register SrcReg, MVT DestVT, Register DestReg,
160	bool IsZExt);
161	Register PPCMaterializeFP(const ConstantFP *CFP, MVT VT);
162	Register PPCMaterializeGV(const GlobalValue *GV, MVT VT);
163	Register PPCMaterializeInt(const ConstantInt *CI, MVT VT,
164	bool UseSExt = true);
165	Register PPCMaterialize32BitInt(int64_t Imm, const TargetRegisterClass *RC);
166	Register PPCMaterialize64BitInt(int64_t Imm, const TargetRegisterClass *RC);
167	Register PPCMoveToIntReg(const Instruction *I, MVT VT, Register SrcReg,
168	bool IsSigned);
169	Register PPCMoveToFPReg(MVT VT, Register SrcReg, bool IsSigned);
170
171	// Call handling routines.
172	private:
173	bool processCallArgs(SmallVectorImpl<Value *> &Args,
174	SmallVectorImpl<Register> &ArgRegs,
175	SmallVectorImpl<MVT> &ArgVTs,
176	SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
177	SmallVectorImpl<unsigned> &RegArgs, CallingConv::ID CC,
178	unsigned &NumBytes, bool IsVarArg);
179	bool finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumBytes);
180
181	private:
182	#include "PPCGenFastISel.inc"
183
184	};
185
186	} // end anonymous namespace
187
188	static std::optional<PPC::Predicate> getComparePred(CmpInst::Predicate Pred) {
189	switch (Pred) {
190	// These are not representable with any single compare.
191	case CmpInst::FCMP_FALSE:
192	case CmpInst::FCMP_TRUE:
193	// Major concern about the following 6 cases is NaN result. The comparison
194	// result consists of 4 bits, indicating lt, eq, gt and un (unordered),
195	// only one of which will be set. The result is generated by fcmpu
196	// instruction. However, bc instruction only inspects one of the first 3
197	// bits, so when un is set, bc instruction may jump to an undesired
198	// place.
199	//
200	// More specifically, if we expect an unordered comparison and un is set, we
201	// expect to always go to true branch; in such case UEQ, UGT and ULT still
202	// give false, which are undesired; but UNE, UGE, ULE happen to give true,
203	// since they are tested by inspecting !eq, !lt, !gt, respectively.
204	//
205	// Similarly, for ordered comparison, when un is set, we always expect the
206	// result to be false. In such case OGT, OLT and OEQ is good, since they are
207	// actually testing GT, LT, and EQ respectively, which are false. OGE, OLE
208	// and ONE are tested through !lt, !gt and !eq, and these are true.
209	case CmpInst::FCMP_UEQ:
210	case CmpInst::FCMP_UGT:
211	case CmpInst::FCMP_ULT:
212	case CmpInst::FCMP_OGE:
213	case CmpInst::FCMP_OLE:
214	case CmpInst::FCMP_ONE:
215	default:
216	return std::nullopt;
217
218	case CmpInst::FCMP_OEQ:
219	case CmpInst::ICMP_EQ:
220	return PPC::PRED_EQ;
221
222	case CmpInst::FCMP_OGT:
223	case CmpInst::ICMP_UGT:
224	case CmpInst::ICMP_SGT:
225	return PPC::PRED_GT;
226
227	case CmpInst::FCMP_UGE:
228	case CmpInst::ICMP_UGE:
229	case CmpInst::ICMP_SGE:
230	return PPC::PRED_GE;
231
232	case CmpInst::FCMP_OLT:
233	case CmpInst::ICMP_ULT:
234	case CmpInst::ICMP_SLT:
235	return PPC::PRED_LT;
236
237	case CmpInst::FCMP_ULE:
238	case CmpInst::ICMP_ULE:
239	case CmpInst::ICMP_SLE:
240	return PPC::PRED_LE;
241
242	case CmpInst::FCMP_UNE:
243	case CmpInst::ICMP_NE:
244	return PPC::PRED_NE;
245
246	case CmpInst::FCMP_ORD:
247	return PPC::PRED_NU;
248
249	case CmpInst::FCMP_UNO:
250	return PPC::PRED_UN;
251	}
252	}
253
254	// Determine whether the type Ty is simple enough to be handled by
255	// fast-isel, and return its equivalent machine type in VT.
256	// FIXME: Copied directly from ARM -- factor into base class?
257	bool PPCFastISel::isTypeLegal(Type *Ty, MVT &VT) {
258	EVT Evt = TLI.getValueType(DL, Ty, AllowUnknown: true);
259
260	// Only handle simple types.
261	if (Evt == MVT::Other \|\| !Evt.isSimple()) return false;
262	VT = Evt.getSimpleVT();
263
264	// Handle all legal types, i.e. a register that will directly hold this
265	// value.
266	return TLI.isTypeLegal(VT);
267	}
268
269	// Determine whether the type Ty is simple enough to be handled by
270	// fast-isel as a load target, and return its equivalent machine type in VT.
271	bool PPCFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) {
272	if (isTypeLegal(Ty, VT)) return true;
273
274	// If this is a type than can be sign or zero-extended to a basic operation
275	// go ahead and accept it now.
276	if (VT == MVT::i8 \|\| VT == MVT::i16 \|\| VT == MVT::i32) {
277	return true;
278	}
279
280	return false;
281	}
282
283	bool PPCFastISel::isValueAvailable(const Value V) const* {
284	if (!isa<Instruction>(Val: V))
285	return true;
286
287	const auto *I = cast<Instruction>(Val: V);
288	return FuncInfo.getMBB(BB: I->getParent()) == FuncInfo.MBB;
289	}
290
291	// Given a value Obj, create an Address object Addr that represents its
292	// address. Return false if we can't handle it.
293	bool PPCFastISel::PPCComputeAddress(const Value *Obj, Address &Addr) {
294	const User U = nullptr*;
295	unsigned Opcode = Instruction::UserOp1;
296	if (const Instruction *I = dyn_cast<Instruction>(Val: Obj)) {
297	// Don't walk into other basic blocks unless the object is an alloca from
298	// another block, otherwise it may not have a virtual register assigned.
299	if (FuncInfo.StaticAllocaMap.count(Val: static_cast<const AllocaInst *>(Obj)) \|\|
300	FuncInfo.getMBB(BB: I->getParent()) == FuncInfo.MBB) {
301	Opcode = I->getOpcode();
302	U = I;
303	}
304	} else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Val: Obj)) {
305	Opcode = C->getOpcode();
306	U = C;
307	}
308
309	switch (Opcode) {
310	default:
311	break;
312	case Instruction::BitCast:
313	// Look through bitcasts.
314	return PPCComputeAddress(Obj: U->getOperand(i: `0`), Addr);
315	case Instruction::IntToPtr:
316	// Look past no-op inttoptrs.
317	if (TLI.getValueType(DL, Ty: U->getOperand(i: `0`)->getType()) ==
318	TLI.getPointerTy(DL))
319	return PPCComputeAddress(Obj: U->getOperand(i: `0`), Addr);
320	break;
321	case Instruction::PtrToInt:
322	// Look past no-op ptrtoints.
323	if (TLI.getValueType(DL, Ty: U->getType()) == TLI.getPointerTy(DL))
324	return PPCComputeAddress(Obj: U->getOperand(i: `0`), Addr);
325	break;
326	case Instruction::GetElementPtr: {
327	Address SavedAddr = Addr;
328	int64_t TmpOffset = Addr.Offset;
329
330	// Iterate through the GEP folding the constants into offsets where
331	// we can.
332	gep_type_iterator GTI = gep_type_begin(GEP: U);
333	for (User::const_op_iterator II = U->op_begin() + `1`, IE = U->op_end();
334	II != IE; ++II, ++GTI) {
335	const Value Op = II;
336	if (StructType *STy = GTI.getStructTypeOrNull()) {
337	const StructLayout *SL = DL.getStructLayout(Ty: STy);
338	unsigned Idx = cast<ConstantInt>(Val: Op)->getZExtValue();
339	TmpOffset += SL->getElementOffset(Idx);
340	} else {
341	uint64_t S = GTI.getSequentialElementStride(DL);
342	for (;;) {
343	if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val: Op)) {
344	// Constant-offset addressing.
345	TmpOffset += CI->getSExtValue() * S;
346	break;
347	}
348	if (canFoldAddIntoGEP(GEP: U, Add: Op)) {
349	// A compatible add with a constant operand. Fold the constant.
350	ConstantInt *CI =
351	cast<ConstantInt>(Val: cast<AddOperator>(Val: Op)->getOperand(i_nocapture: `1`));
352	TmpOffset += CI->getSExtValue() * S;
353	// Iterate on the other operand.
354	Op = cast<AddOperator>(Val: Op)->getOperand(i_nocapture: `0`);
355	continue;
356	}
357	// Unsupported
358	goto unsupported_gep;
359	}
360	}
361	}
362
363	// Try to grab the base operand now.
364	Addr.Offset = TmpOffset;
365	if (PPCComputeAddress(Obj: U->getOperand(i: `0`), Addr)) return true;
366
367	// We failed, restore everything and try the other options.
368	Addr = SavedAddr;
369
370	unsupported_gep:
371	break;
372	}
373	case Instruction::Alloca: {
374	const AllocaInst *AI = cast<AllocaInst>(Val: Obj);
375	DenseMap<const AllocaInst, int*>::iterator SI =
376	FuncInfo.StaticAllocaMap.find(Val: AI);
377	if (SI != FuncInfo.StaticAllocaMap.end()) {
378	Addr.BaseType = Address::FrameIndexBase;
379	Addr.Base.FI = SI ->second;
380	return true;
381	}
382	break;
383	}
384	}
385
386	// FIXME: References to parameters fall through to the behavior
387	// below. They should be able to reference a frame index since
388	// they are stored to the stack, so we can get "ld rx, offset(r1)"
389	// instead of "addi ry, r1, offset / ld rx, 0(ry)". Obj will
390	// just contain the parameter. Try to handle this with a FI.
391
392	// Try to get this in a register if nothing else has worked.
393	if (Addr.Base.Reg == `0`)
394	Addr.Base.Reg = getRegForValue(V: Obj);
395
396	// Prevent assignment of base register to X0, which is inappropriate
397	// for loads and stores alike.
398	if (Addr.Base.Reg != `0`)
399	MRI.setRegClass(Reg: Addr.Base.Reg, RC: &PPC::G8RC_and_G8RC_NOX0RegClass);
400
401	return Addr.Base.Reg != `0`;
402	}
403
404	// Fix up some addresses that can't be used directly. For example, if
405	// an offset won't fit in an instruction field, we may need to move it
406	// into an index register.
407	void PPCFastISel::PPCSimplifyAddress(Address &Addr, bool &UseOffset,
408	Register &IndexReg) {
409
410	// Check whether the offset fits in the instruction field.
411	if (!isInt<`16`>(x: Addr.Offset))
412	UseOffset = false;
413
414	// If this is a stack pointer and the offset needs to be simplified then
415	// put the alloca address into a register, set the base type back to
416	// register and continue. This should almost never happen.
417	if (!UseOffset && Addr.BaseType == Address::FrameIndexBase) {
418	Register ResultReg = createResultReg(RC: &PPC::G8RC_and_G8RC_NOX0RegClass);
419	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::ADDI8),
420	DestReg: ResultReg).addFrameIndex(Idx: Addr.Base.FI).addImm(Val: `0`);
421	Addr.Base.Reg = ResultReg;
422	Addr.BaseType = Address::RegBase;
423	}
424
425	if (!UseOffset) {
426	IntegerType OffsetTy = Type::getInt64Ty(C&: Context);
427	const ConstantInt *Offset = ConstantInt::getSigned(Ty: OffsetTy, V: Addr.Offset);
428	IndexReg = PPCMaterializeInt(CI: Offset, VT: MVT::i64);
429	assert(IndexReg && "Unexpected error in PPCMaterializeInt!");
430	}
431	}
432
433	// Emit a load instruction if possible, returning true if we succeeded,
434	// otherwise false. See commentary below for how the register class of
435	// the load is determined.
436	bool PPCFastISel::PPCEmitLoad(MVT VT, Register &ResultReg, Address &Addr,
437	const TargetRegisterClass *RC,
438	bool IsZExt, unsigned FP64LoadOpc) {
439	unsigned Opc;
440	bool UseOffset = true;
441	bool HasSPE = Subtarget->hasSPE();
442
443	// If ResultReg is given, it determines the register class of the load.
444	// Otherwise, RC is the register class to use. If the result of the
445	// load isn't anticipated in this block, both may be zero, in which
446	// case we must make a conservative guess. In particular, don't assign
447	// R0 or X0 to the result register, as the result may be used in a load,
448	// store, add-immediate, or isel that won't permit this. (Though
449	// perhaps the spill and reload of live-exit values would handle this?)
450	const TargetRegisterClass *UseRC =
451	(ResultReg ? MRI.getRegClass(Reg: ResultReg) :
452	(RC ? RC :
453	(VT == MVT::f64 ? (HasSPE ? &PPC::SPERCRegClass : &PPC::F8RCRegClass) :
454	(VT == MVT::f32 ? (HasSPE ? &PPC::GPRCRegClass : &PPC::F4RCRegClass) :
455	(VT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass :
456	&PPC::GPRC_and_GPRC_NOR0RegClass)))));
457
458	bool Is32BitInt = UseRC->hasSuperClassEq(RC: &PPC::GPRCRegClass);
459
460	switch (VT.SimpleTy) {
461	default: // e.g., vector types not handled
462	return false;
463	case MVT::i8:
464	Opc = Is32BitInt ? PPC::LBZ : PPC::LBZ8;
465	break;
466	case MVT::i16:
467	Opc = (IsZExt ? (Is32BitInt ? PPC::LHZ : PPC::LHZ8)
468	: (Is32BitInt ? PPC::LHA : PPC::LHA8));
469	break;
470	case MVT::i32:
471	Opc = (IsZExt ? (Is32BitInt ? PPC::LWZ : PPC::LWZ8)
472	: (Is32BitInt ? PPC::LWA_32 : PPC::LWA));
473	if ((Opc == PPC::LWA \|\| Opc == PPC::LWA_32) && ((Addr.Offset & `3`) != `0`))
474	UseOffset = false;
475	break;
476	case MVT::i64:
477	Opc = PPC::LD;
478	assert(UseRC->hasSuperClassEq(&PPC::G8RCRegClass) &&
479	"64-bit load with 32-bit target??");
480	UseOffset = ((Addr.Offset & `3`) == `0`);
481	break;
482	case MVT::f32:
483	Opc = Subtarget->hasSPE() ? PPC::SPELWZ : PPC::LFS;
484	break;
485	case MVT::f64:
486	Opc = FP64LoadOpc;
487	break;
488	}
489
490	// If necessary, materialize the offset into a register and use
491	// the indexed form. Also handle stack pointers with special needs.
492	Register IndexReg;
493	PPCSimplifyAddress(Addr, UseOffset, IndexReg);
494
495	// If this is a potential VSX load with an offset of 0, a VSX indexed load can
496	// be used.
497	bool IsVSSRC = isVSSRCRegClass(RC: UseRC);
498	bool IsVSFRC = isVSFRCRegClass(RC: UseRC);
499	bool Is32VSXLoad = IsVSSRC && Opc == PPC::LFS;
500	bool Is64VSXLoad = IsVSFRC && Opc == PPC::LFD;
501	if ((Is32VSXLoad \|\| Is64VSXLoad) &&
502	(Addr.BaseType != Address::FrameIndexBase) && UseOffset &&
503	(Addr.Offset == `0`)) {
504	UseOffset = false;
505	}
506
507	if (!ResultReg)
508	ResultReg = createResultReg(RC: UseRC);
509
510	// Note: If we still have a frame index here, we know the offset is
511	// in range, as otherwise PPCSimplifyAddress would have converted it
512	// into a RegBase.
513	if (Addr.BaseType == Address::FrameIndexBase) {
514	// VSX only provides an indexed load.
515	if (Is32VSXLoad \|\| Is64VSXLoad) return false;
516
517	MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
518	PtrInfo: MachinePointerInfo::getFixedStack(MF&: *FuncInfo.MF, FI: Addr.Base.FI,
519	Offset: Addr.Offset),
520	F: MachineMemOperand::MOLoad, Size: MFI.getObjectSize(ObjectIdx: Addr.Base.FI),
521	BaseAlignment: MFI.getObjectAlign(ObjectIdx: Addr.Base.FI));
522
523	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg: ResultReg)
524	.addImm(Val: Addr.Offset).addFrameIndex(Idx: Addr.Base.FI).addMemOperand(MMO);
525
526	// Base reg with offset in range.
527	} else if (UseOffset) {
528	// VSX only provides an indexed load.
529	if (Is32VSXLoad \|\| Is64VSXLoad) return false;
530
531	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg: ResultReg)
532	.addImm(Val: Addr.Offset).addReg(RegNo: Addr.Base.Reg);
533
534	// Indexed form.
535	} else {
536	// Get the RR opcode corresponding to the RI one. FIXME: It would be
537	// preferable to use the ImmToIdxMap from PPCRegisterInfo.cpp, but it
538	// is hard to get at.
539	switch (Opc) {
540	default: llvm_unreachable("Unexpected opcode!");
541	case PPC::LBZ: Opc = PPC::LBZX; break;
542	case PPC::LBZ8: Opc = PPC::LBZX8; break;
543	case PPC::LHZ: Opc = PPC::LHZX; break;
544	case PPC::LHZ8: Opc = PPC::LHZX8; break;
545	case PPC::LHA: Opc = PPC::LHAX; break;
546	case PPC::LHA8: Opc = PPC::LHAX8; break;
547	case PPC::LWZ: Opc = PPC::LWZX; break;
548	case PPC::LWZ8: Opc = PPC::LWZX8; break;
549	case PPC::LWA: Opc = PPC::LWAX; break;
550	case PPC::LWA_32: Opc = PPC::LWAX_32; break;
551	case PPC::LD: Opc = PPC::LDX; break;
552	case PPC::LFS: Opc = IsVSSRC ? PPC::LXSSPX : PPC::LFSX; break;
553	case PPC::LFD: Opc = IsVSFRC ? PPC::LXSDX : PPC::LFDX; break;
554	case PPC::EVLDD: Opc = PPC::EVLDDX; break;
555	case PPC::SPELWZ: Opc = PPC::SPELWZX; break;
556	}
557
558	auto MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc),
559	DestReg: ResultReg);
560
561	// If we have an index register defined we use it in the store inst,
562	// otherwise we use X0 as base as it makes the vector instructions to
563	// use zero in the computation of the effective address regardless the
564	// content of the register.
565	if (IndexReg)
566	MIB.addReg(RegNo: Addr.Base.Reg).addReg(RegNo: IndexReg);
567	else
568	MIB.addReg(RegNo: PPC::ZERO8).addReg(RegNo: Addr.Base.Reg);
569	}
570
571	return true;
572	}
573
574	// Attempt to fast-select a load instruction.
575	bool PPCFastISel::SelectLoad(const Instruction *I) {
576	// FIXME: No atomic loads are supported.
577	if (cast<LoadInst>(Val: I)->isAtomic())
578	return false;
579
580	// Verify we have a legal type before going any further.
581	MVT VT;
582	if (!isLoadTypeLegal(Ty: I->getType(), VT))
583	return false;
584
585	// See if we can handle this address.
586	Address Addr;
587	if (!PPCComputeAddress(Obj: I->getOperand(i: `0`), Addr))
588	return false;
589
590	// Look at the currently assigned register for this instruction
591	// to determine the required register class. This is necessary
592	// to constrain RA from using R0/X0 when this is not legal.
593	Register AssignedReg = FuncInfo.ValueMap [I];
594	const TargetRegisterClass *RC =
595	AssignedReg ? MRI.getRegClass(Reg: AssignedReg) : nullptr;
596
597	Register ResultReg = `0`;
598	if (!PPCEmitLoad(VT, ResultReg, Addr, RC, IsZExt: true,
599	FP64LoadOpc: Subtarget->hasSPE() ? PPC::EVLDD : PPC::LFD))
600	return false;
601	updateValueMap(I, Reg: ResultReg);
602	return true;
603	}
604
605	// Emit a store instruction to store SrcReg at Addr.
606	bool PPCFastISel::PPCEmitStore(MVT VT, Register SrcReg, Address &Addr) {
607	assert(SrcReg && "Nothing to store!");
608	unsigned Opc;
609	bool UseOffset = true;
610
611	const TargetRegisterClass *RC = MRI.getRegClass(Reg: SrcReg);
612	bool Is32BitInt = RC->hasSuperClassEq(RC: &PPC::GPRCRegClass);
613
614	switch (VT.SimpleTy) {
615	default: // e.g., vector types not handled
616	return false;
617	case MVT::i8:
618	Opc = Is32BitInt ? PPC::STB : PPC::STB8;
619	break;
620	case MVT::i16:
621	Opc = Is32BitInt ? PPC::STH : PPC::STH8;
622	break;
623	case MVT::i32:
624	assert(Is32BitInt && "Not GPRC for i32??");
625	Opc = PPC::STW;
626	break;
627	case MVT::i64:
628	Opc = PPC::STD;
629	UseOffset = ((Addr.Offset & `3`) == `0`);
630	break;
631	case MVT::f32:
632	Opc = Subtarget->hasSPE() ? PPC::SPESTW : PPC::STFS;
633	break;
634	case MVT::f64:
635	Opc = Subtarget->hasSPE() ? PPC::EVSTDD : PPC::STFD;
636	break;
637	}
638
639	// If necessary, materialize the offset into a register and use
640	// the indexed form. Also handle stack pointers with special needs.
641	Register IndexReg;
642	PPCSimplifyAddress(Addr, UseOffset, IndexReg);
643
644	// If this is a potential VSX store with an offset of 0, a VSX indexed store
645	// can be used.
646	bool IsVSSRC = isVSSRCRegClass(RC);
647	bool IsVSFRC = isVSFRCRegClass(RC);
648	bool Is32VSXStore = IsVSSRC && Opc == PPC::STFS;
649	bool Is64VSXStore = IsVSFRC && Opc == PPC::STFD;
650	if ((Is32VSXStore \|\| Is64VSXStore) &&
651	(Addr.BaseType != Address::FrameIndexBase) && UseOffset &&
652	(Addr.Offset == `0`)) {
653	UseOffset = false;
654	}
655
656	// Note: If we still have a frame index here, we know the offset is
657	// in range, as otherwise PPCSimplifyAddress would have converted it
658	// into a RegBase.
659	if (Addr.BaseType == Address::FrameIndexBase) {
660	// VSX only provides an indexed store.
661	if (Is32VSXStore \|\| Is64VSXStore) return false;
662
663	MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
664	PtrInfo: MachinePointerInfo::getFixedStack(MF&: *FuncInfo.MF, FI: Addr.Base.FI,
665	Offset: Addr.Offset),
666	F: MachineMemOperand::MOStore, Size: MFI.getObjectSize(ObjectIdx: Addr.Base.FI),
667	BaseAlignment: MFI.getObjectAlign(ObjectIdx: Addr.Base.FI));
668
669	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc))
670	.addReg(RegNo: SrcReg)
671	.addImm(Val: Addr.Offset)
672	.addFrameIndex(Idx: Addr.Base.FI)
673	.addMemOperand(MMO);
674
675	// Base reg with offset in range.
676	} else if (UseOffset) {
677	// VSX only provides an indexed store.
678	if (Is32VSXStore \|\| Is64VSXStore)
679	return false;
680
681	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc))
682	.addReg(RegNo: SrcReg).addImm(Val: Addr.Offset).addReg(RegNo: Addr.Base.Reg);
683
684	// Indexed form.
685	} else {
686	// Get the RR opcode corresponding to the RI one. FIXME: It would be
687	// preferable to use the ImmToIdxMap from PPCRegisterInfo.cpp, but it
688	// is hard to get at.
689	switch (Opc) {
690	default: llvm_unreachable("Unexpected opcode!");
691	case PPC::STB: Opc = PPC::STBX; break;
692	case PPC::STH : Opc = PPC::STHX; break;
693	case PPC::STW : Opc = PPC::STWX; break;
694	case PPC::STB8: Opc = PPC::STBX8; break;
695	case PPC::STH8: Opc = PPC::STHX8; break;
696	case PPC::STW8: Opc = PPC::STWX8; break;
697	case PPC::STD: Opc = PPC::STDX; break;
698	case PPC::STFS: Opc = IsVSSRC ? PPC::STXSSPX : PPC::STFSX; break;
699	case PPC::STFD: Opc = IsVSFRC ? PPC::STXSDX : PPC::STFDX; break;
700	case PPC::EVSTDD: Opc = PPC::EVSTDDX; break;
701	case PPC::SPESTW: Opc = PPC::SPESTWX; break;
702	}
703
704	auto MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc))
705	.addReg(RegNo: SrcReg);
706
707	// If we have an index register defined we use it in the store inst,
708	// otherwise we use X0 as base as it makes the vector instructions to
709	// use zero in the computation of the effective address regardless the
710	// content of the register.
711	if (IndexReg)
712	MIB.addReg(RegNo: Addr.Base.Reg).addReg(RegNo: IndexReg);
713	else
714	MIB.addReg(RegNo: PPC::ZERO8).addReg(RegNo: Addr.Base.Reg);
715	}
716
717	return true;
718	}
719
720	// Attempt to fast-select a store instruction.
721	bool PPCFastISel::SelectStore(const Instruction *I) {
722	Value *Op0 = I->getOperand(i: `0`);
723	Register SrcReg;
724
725	// FIXME: No atomics loads are supported.
726	if (cast<StoreInst>(Val: I)->isAtomic())
727	return false;
728
729	// Verify we have a legal type before going any further.
730	MVT VT;
731	if (!isLoadTypeLegal(Ty: Op0->getType(), VT))
732	return false;
733
734	// Get the value to be stored into a register.
735	SrcReg = getRegForValue(V: Op0);
736	if (!SrcReg)
737	return false;
738
739	// See if we can handle this address.
740	Address Addr;
741	if (!PPCComputeAddress(Obj: I->getOperand(i: `1`), Addr))
742	return false;
743
744	if (!PPCEmitStore(VT, SrcReg, Addr))
745	return false;
746
747	return true;
748	}
749
750	// Attempt to fast-select a branch instruction.
751	bool PPCFastISel::SelectBranch(const Instruction *I) {
752	const BranchInst *BI = cast<BranchInst>(Val: I);
753	MachineBasicBlock *BrBB = FuncInfo.MBB;
754	MachineBasicBlock *TBB = FuncInfo.getMBB(BB: BI->getSuccessor(i: `0`));
755	MachineBasicBlock *FBB = FuncInfo.getMBB(BB: BI->getSuccessor(i: `1`));
756
757	// For now, just try the simplest case where it's fed by a compare.
758	if (const CmpInst *CI = dyn_cast<CmpInst>(Val: BI->getCondition())) {
759	if (isValueAvailable(V: CI)) {
760	std::optional<PPC::Predicate> OptPPCPred =
761	getComparePred(Pred: CI->getPredicate());
762	if (!OptPPCPred)
763	return false;
764
765	PPC::Predicate PPCPred = *OptPPCPred;
766
767	// Take advantage of fall-through opportunities.
768	if (FuncInfo.MBB->isLayoutSuccessor(MBB: TBB)) {
769	std::swap(a&: TBB, b&: FBB);
770	PPCPred = PPC::InvertPredicate(Opcode: PPCPred);
771	}
772
773	Register CondReg = createResultReg(RC: &PPC::CRRCRegClass);
774
775	if (!PPCEmitCmp(Src1Value: CI->getOperand(i_nocapture: `0`), Src2Value: CI->getOperand(i_nocapture: `1`), isZExt: CI->isUnsigned(),
776	DestReg: CondReg, Pred: PPCPred))
777	return false;
778
779	BuildMI(BB&: *BrBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::BCC))
780	.addImm(Val: Subtarget->hasSPE() ? PPC::PRED_SPE : PPCPred)
781	.addReg(RegNo: CondReg)
782	.addMBB(MBB: TBB);
783	finishCondBranch(BranchBB: BI->getParent(), TrueMBB: TBB, FalseMBB: FBB);
784	return true;
785	}
786	} else if (const ConstantInt *CI =
787	dyn_cast<ConstantInt>(Val: BI->getCondition())) {
788	uint64_t Imm = CI->getZExtValue();
789	MachineBasicBlock *Target = (Imm == `0`) ? FBB : TBB;
790	fastEmitBranch(MSucc: Target, DbgLoc: MIMD.getDL());
791	return true;
792	}
793
794	// FIXME: ARM looks for a case where the block containing the compare
795	// has been split from the block containing the branch. If this happens,
796	// there is a vreg available containing the result of the compare. I'm
797	// not sure we can do much, as we've lost the predicate information with
798	// the compare instruction -- we have a 4-bit CR but don't know which bit
799	// to test here.
800	return false;
801	}
802
803	// Attempt to emit a compare of the two source values. Signed and unsigned
804	// comparisons are supported. Return false if we can't handle it.
805	bool PPCFastISel::PPCEmitCmp(const Value SrcValue1, const* Value *SrcValue2,
806	bool IsZExt, Register DestReg,
807	const PPC::Predicate Pred) {
808	Type *Ty = SrcValue1->getType();
809	EVT SrcEVT = TLI.getValueType(DL, Ty, AllowUnknown: true);
810	if (!SrcEVT.isSimple())
811	return false;
812	MVT SrcVT = SrcEVT.getSimpleVT();
813
814	if (SrcVT == MVT::i1 && Subtarget->useCRBits())
815	return false;
816
817	// See if operand 2 is an immediate encodeable in the compare.
818	// FIXME: Operands are not in canonical order at -O0, so an immediate
819	// operand in position 1 is a lost opportunity for now. We are
820	// similar to ARM in this regard.
821	int64_t Imm = `0`;
822	bool UseImm = false;
823	const bool HasSPE = Subtarget->hasSPE();
824
825	// Only 16-bit integer constants can be represented in compares for
826	// PowerPC. Others will be materialized into a register.
827	if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(Val: SrcValue2)) {
828	if (SrcVT == MVT::i64 \|\| SrcVT == MVT::i32 \|\| SrcVT == MVT::i16 \|\|
829	SrcVT == MVT::i8 \|\| SrcVT == MVT::i1) {
830	const APInt &CIVal = ConstInt->getValue();
831	Imm = (IsZExt) ? (int64_t)CIVal.getZExtValue() :
832	(int64_t)CIVal.getSExtValue();
833	if ((IsZExt && isUInt<`16`>(x: Imm)) \|\| (!IsZExt && isInt<`16`>(x: Imm)))
834	UseImm = true;
835	}
836	}
837
838	Register SrcReg1 = getRegForValue(V: SrcValue1);
839	if (!SrcReg1)
840	return false;
841
842	Register SrcReg2;
843	if (!UseImm) {
844	SrcReg2 = getRegForValue(V: SrcValue2);
845	if (!SrcReg2)
846	return false;
847	}
848
849	unsigned CmpOpc;
850	bool NeedsExt = false;
851
852	auto RC1 = MRI.getRegClass(Reg: SrcReg1);
853	auto RC2 = SrcReg2 != `0` ? MRI.getRegClass(Reg: SrcReg2) : nullptr;
854
855	switch (SrcVT.SimpleTy) {
856	default: return false;
857	case MVT::f32:
858	if (HasSPE) {
859	switch (Pred) {
860	default: return false;
861	case PPC::PRED_EQ:
862	CmpOpc = PPC::EFSCMPEQ;
863	break;
864	case PPC::PRED_LT:
865	CmpOpc = PPC::EFSCMPLT;
866	break;
867	case PPC::PRED_GT:
868	CmpOpc = PPC::EFSCMPGT;
869	break;
870	}
871	} else {
872	CmpOpc = PPC::FCMPUS;
873	if (isVSSRCRegClass(RC: RC1))
874	SrcReg1 = copyRegToRegClass(ToRC: &PPC::F4RCRegClass, SrcReg: SrcReg1);
875	if (RC2 && isVSSRCRegClass(RC: RC2))
876	SrcReg2 = copyRegToRegClass(ToRC: &PPC::F4RCRegClass, SrcReg: SrcReg2);
877	}
878	break;
879	case MVT::f64:
880	if (HasSPE) {
881	switch (Pred) {
882	default: return false;
883	case PPC::PRED_EQ:
884	CmpOpc = PPC::EFDCMPEQ;
885	break;
886	case PPC::PRED_LT:
887	CmpOpc = PPC::EFDCMPLT;
888	break;
889	case PPC::PRED_GT:
890	CmpOpc = PPC::EFDCMPGT;
891	break;
892	}
893	} else if (isVSFRCRegClass(RC: RC1) \|\| (RC2 && isVSFRCRegClass(RC: RC2))) {
894	CmpOpc = PPC::XSCMPUDP;
895	} else {
896	CmpOpc = PPC::FCMPUD;
897	}
898	break;
899	case MVT::i1:
900	case MVT::i8:
901	case MVT::i16:
902	NeedsExt = true;
903	[[fallthrough]];
904	case MVT::i32:
905	if (!UseImm)
906	CmpOpc = IsZExt ? PPC::CMPLW : PPC::CMPW;
907	else
908	CmpOpc = IsZExt ? PPC::CMPLWI : PPC::CMPWI;
909	break;
910	case MVT::i64:
911	if (!UseImm)
912	CmpOpc = IsZExt ? PPC::CMPLD : PPC::CMPD;
913	else
914	CmpOpc = IsZExt ? PPC::CMPLDI : PPC::CMPDI;
915	break;
916	}
917
918	if (NeedsExt) {
919	Register ExtReg = createResultReg(RC: &PPC::GPRCRegClass);
920	if (!PPCEmitIntExt(SrcVT, SrcReg: SrcReg1, DestVT: MVT::i32, DestReg: ExtReg, IsZExt))
921	return false;
922	SrcReg1 = ExtReg;
923
924	if (!UseImm) {
925	Register ExtReg = createResultReg(RC: &PPC::GPRCRegClass);
926	if (!PPCEmitIntExt(SrcVT, SrcReg: SrcReg2, DestVT: MVT::i32, DestReg: ExtReg, IsZExt))
927	return false;
928	SrcReg2 = ExtReg;
929	}
930	}
931
932	if (!UseImm)
933	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: CmpOpc), DestReg)
934	.addReg(RegNo: SrcReg1).addReg(RegNo: SrcReg2);
935	else
936	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: CmpOpc), DestReg)
937	.addReg(RegNo: SrcReg1).addImm(Val: Imm);
938
939	return true;
940	}
941
942	// Attempt to fast-select a floating-point extend instruction.
943	bool PPCFastISel::SelectFPExt(const Instruction *I) {
944	Value *Src = I->getOperand(i: `0`);
945	EVT SrcVT = TLI.getValueType(DL, Ty: Src->getType(), AllowUnknown: true);
946	EVT DestVT = TLI.getValueType(DL, Ty: I->getType(), AllowUnknown: true);
947
948	if (SrcVT != MVT::f32 \|\| DestVT != MVT::f64)
949	return false;
950
951	Register SrcReg = getRegForValue(V: Src);
952	if (!SrcReg)
953	return false;
954
955	// No code is generated for a FP extend.
956	updateValueMap(I, Reg: SrcReg);
957	return true;
958	}
959
960	// Attempt to fast-select a floating-point truncate instruction.
961	bool PPCFastISel::SelectFPTrunc(const Instruction *I) {
962	Value *Src = I->getOperand(i: `0`);
963	EVT SrcVT = TLI.getValueType(DL, Ty: Src->getType(), AllowUnknown: true);
964	EVT DestVT = TLI.getValueType(DL, Ty: I->getType(), AllowUnknown: true);
965
966	if (SrcVT != MVT::f64 \|\| DestVT != MVT::f32)
967	return false;
968
969	Register SrcReg = getRegForValue(V: Src);
970	if (!SrcReg)
971	return false;
972
973	// Round the result to single precision.
974	Register DestReg;
975	auto RC = MRI.getRegClass(Reg: SrcReg);
976	if (Subtarget->hasSPE()) {
977	DestReg = createResultReg(RC: &PPC::GPRCRegClass);
978	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::EFSCFD),
979	DestReg)
980	.addReg(RegNo: SrcReg);
981	} else if (Subtarget->hasP8Vector() && isVSFRCRegClass(RC)) {
982	DestReg = createResultReg(RC: &PPC::VSSRCRegClass);
983	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::XSRSP),
984	DestReg)
985	.addReg(RegNo: SrcReg);
986	} else {
987	SrcReg = copyRegToRegClass(ToRC: &PPC::F8RCRegClass, SrcReg);
988	DestReg = createResultReg(RC: &PPC::F4RCRegClass);
989	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
990	MCID: TII.get(Opcode: PPC::FRSP), DestReg)
991	.addReg(RegNo: SrcReg);
992	}
993
994	updateValueMap(I, Reg: DestReg);
995	return true;
996	}
997
998	// Move an i32 or i64 value in a GPR to an f64 value in an FPR.
999	// FIXME: When direct register moves are implemented (see PowerISA 2.07),
1000	// those should be used instead of moving via a stack slot when the
1001	// subtarget permits.
1002	// FIXME: The code here is sloppy for the 4-byte case. Can use a 4-byte
1003	// stack slot and 4-byte store/load sequence. Or just sext the 4-byte
1004	// case to 8 bytes which produces tighter code but wastes stack space.
1005	Register PPCFastISel::PPCMoveToFPReg(MVT SrcVT, Register SrcReg,
1006	bool IsSigned) {
1007
1008	// If necessary, extend 32-bit int to 64-bit.
1009	if (SrcVT == MVT::i32) {
1010	Register TmpReg = createResultReg(RC: &PPC::G8RCRegClass);
1011	if (!PPCEmitIntExt(SrcVT: MVT::i32, SrcReg, DestVT: MVT::i64, DestReg: TmpReg, IsZExt: !IsSigned))
1012	return Register ();
1013	SrcReg = TmpReg;
1014	}
1015
1016	// Get a stack slot 8 bytes wide, aligned on an 8-byte boundary.
1017	Address Addr;
1018	Addr.BaseType = Address::FrameIndexBase;
1019	Addr.Base.FI = MFI.CreateStackObject(Size: `8`, Alignment: Align (`8`), isSpillSlot: false);
1020
1021	// Store the value from the GPR.
1022	if (!PPCEmitStore(VT: MVT::i64, SrcReg, Addr))
1023	return Register ();
1024
1025	// Load the integer value into an FPR. The kind of load used depends
1026	// on a number of conditions.
1027	unsigned LoadOpc = PPC::LFD;
1028
1029	if (SrcVT == MVT::i32) {
1030	if (!IsSigned) {
1031	LoadOpc = PPC::LFIWZX;
1032	Addr.Offset = (Subtarget->isLittleEndian()) ? `0` : `4`;
1033	} else if (Subtarget->hasLFIWAX()) {
1034	LoadOpc = PPC::LFIWAX;
1035	Addr.Offset = (Subtarget->isLittleEndian()) ? `0` : `4`;
1036	}
1037	}
1038
1039	const TargetRegisterClass *RC = &PPC::F8RCRegClass;
1040	Register ResultReg;
1041	if (!PPCEmitLoad(VT: MVT::f64, ResultReg, Addr, RC, IsZExt: !IsSigned, FP64LoadOpc: LoadOpc))
1042	return Register ();
1043
1044	return ResultReg;
1045	}
1046
1047	// Attempt to fast-select an integer-to-floating-point conversion.
1048	// FIXME: Once fast-isel has better support for VSX, conversions using
1049	// direct moves should be implemented.
1050	bool PPCFastISel::SelectIToFP(const Instruction I, bool* IsSigned) {
1051	MVT DstVT;
1052	Type *DstTy = I->getType();
1053	if (!isTypeLegal(Ty: DstTy, VT&: DstVT))
1054	return false;
1055
1056	if (DstVT != MVT::f32 && DstVT != MVT::f64)
1057	return false;
1058
1059	Value *Src = I->getOperand(i: `0`);
1060	EVT SrcEVT = TLI.getValueType(DL, Ty: Src->getType(), AllowUnknown: true);
1061	if (!SrcEVT.isSimple())
1062	return false;
1063
1064	MVT SrcVT = SrcEVT.getSimpleVT();
1065
1066	if (SrcVT != MVT::i8 && SrcVT != MVT::i16 &&
1067	SrcVT != MVT::i32 && SrcVT != MVT::i64)
1068	return false;
1069
1070	Register SrcReg = getRegForValue(V: Src);
1071	if (!SrcReg)
1072	return false;
1073
1074	// Shortcut for SPE. Doesn't need to store/load, since it's all in the GPRs
1075	if (Subtarget->hasSPE()) {
1076	unsigned Opc;
1077	if (DstVT == MVT::f32)
1078	Opc = IsSigned ? PPC::EFSCFSI : PPC::EFSCFUI;
1079	else
1080	Opc = IsSigned ? PPC::EFDCFSI : PPC::EFDCFUI;
1081
1082	Register DestReg = createResultReg(RC: &PPC::SPERCRegClass);
1083	// Generate the convert.
1084	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg)
1085	.addReg(RegNo: SrcReg);
1086	updateValueMap(I, Reg: DestReg);
1087	return true;
1088	}
1089
1090	// We can only lower an unsigned convert if we have the newer
1091	// floating-point conversion operations.
1092	if (!IsSigned && !Subtarget->hasFPCVT())
1093	return false;
1094
1095	// FIXME: For now we require the newer floating-point conversion operations
1096	// (which are present only on P7 and A2 server models) when converting
1097	// to single-precision float. Otherwise we have to generate a lot of
1098	// fiddly code to avoid double rounding. If necessary, the fiddly code
1099	// can be found in PPCTargetLowering::LowerINT_TO_FP().
1100	if (DstVT == MVT::f32 && !Subtarget->hasFPCVT())
1101	return false;
1102
1103	// Extend the input if necessary.
1104	if (SrcVT == MVT::i8 \|\| SrcVT == MVT::i16) {
1105	Register TmpReg = createResultReg(RC: &PPC::G8RCRegClass);
1106	if (!PPCEmitIntExt(SrcVT, SrcReg, DestVT: MVT::i64, DestReg: TmpReg, IsZExt: !IsSigned))
1107	return false;
1108	SrcVT = MVT::i64;
1109	SrcReg = TmpReg;
1110	}
1111
1112	// Move the integer value to an FPR.
1113	Register FPReg = PPCMoveToFPReg(SrcVT, SrcReg, IsSigned);
1114	if (!FPReg)
1115	return false;
1116
1117	// Determine the opcode for the conversion.
1118	const TargetRegisterClass *RC = &PPC::F8RCRegClass;
1119	Register DestReg = createResultReg(RC);
1120	unsigned Opc;
1121
1122	if (DstVT == MVT::f32)
1123	Opc = IsSigned ? PPC::FCFIDS : PPC::FCFIDUS;
1124	else
1125	Opc = IsSigned ? PPC::FCFID : PPC::FCFIDU;
1126
1127	// Generate the convert.
1128	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg)
1129	.addReg(RegNo: FPReg);
1130
1131	updateValueMap(I, Reg: DestReg);
1132	return true;
1133	}
1134
1135	// Move the floating-point value in SrcReg into an integer destination
1136	// register, and return the register (or zero if we can't handle it).
1137	// FIXME: When direct register moves are implemented (see PowerISA 2.07),
1138	// those should be used instead of moving via a stack slot when the
1139	// subtarget permits.
1140	Register PPCFastISel::PPCMoveToIntReg(const Instruction *I, MVT VT,
1141	Register SrcReg, bool IsSigned) {
1142	// Get a stack slot 8 bytes wide, aligned on an 8-byte boundary.
1143	// Note that if have STFIWX available, we could use a 4-byte stack
1144	// slot for i32, but this being fast-isel we'll just go with the
1145	// easiest code gen possible.
1146	Address Addr;
1147	Addr.BaseType = Address::FrameIndexBase;
1148	Addr.Base.FI = MFI.CreateStackObject(Size: `8`, Alignment: Align (`8`), isSpillSlot: false);
1149
1150	// Store the value from the FPR.
1151	if (!PPCEmitStore(VT: MVT::f64, SrcReg, Addr))
1152	return Register ();
1153
1154	// Reload it into a GPR. If we want an i32 on big endian, modify the
1155	// address to have a 4-byte offset so we load from the right place.
1156	if (VT == MVT::i32)
1157	Addr.Offset = (Subtarget->isLittleEndian()) ? `0` : `4`;
1158
1159	// Look at the currently assigned register for this instruction
1160	// to determine the required register class.
1161	Register AssignedReg = FuncInfo.ValueMap [I];
1162	const TargetRegisterClass *RC =
1163	AssignedReg ? MRI.getRegClass(Reg: AssignedReg) : nullptr;
1164
1165	Register ResultReg;
1166	if (!PPCEmitLoad(VT, ResultReg, Addr, RC, IsZExt: !IsSigned))
1167	return Register ();
1168
1169	return ResultReg;
1170	}
1171
1172	// Attempt to fast-select a floating-point-to-integer conversion.
1173	// FIXME: Once fast-isel has better support for VSX, conversions using
1174	// direct moves should be implemented.
1175	bool PPCFastISel::SelectFPToI(const Instruction I, bool* IsSigned) {
1176	MVT DstVT, SrcVT;
1177	Type *DstTy = I->getType();
1178	if (!isTypeLegal(Ty: DstTy, VT&: DstVT))
1179	return false;
1180
1181	if (DstVT != MVT::i32 && DstVT != MVT::i64)
1182	return false;
1183
1184	// If we don't have FCTIDUZ, or SPE, and we need it, punt to SelectionDAG.
1185	if (DstVT == MVT::i64 && !IsSigned && !Subtarget->hasFPCVT() &&
1186	!Subtarget->hasSPE())
1187	return false;
1188
1189	Value *Src = I->getOperand(i: `0`);
1190	Type *SrcTy = Src->getType();
1191	if (!isTypeLegal(Ty: SrcTy, VT&: SrcVT))
1192	return false;
1193
1194	if (SrcVT != MVT::f32 && SrcVT != MVT::f64)
1195	return false;
1196
1197	Register SrcReg = getRegForValue(V: Src);
1198	if (!SrcReg)
1199	return false;
1200
1201	// Convert f32 to f64 or convert VSSRC to VSFRC if necessary. This is just a
1202	// meaningless copy to get the register class right.
1203	const TargetRegisterClass *InRC = MRI.getRegClass(Reg: SrcReg);
1204	if (InRC == &PPC::F4RCRegClass)
1205	SrcReg = copyRegToRegClass(ToRC: &PPC::F8RCRegClass, SrcReg);
1206	else if (InRC == &PPC::VSSRCRegClass)
1207	SrcReg = copyRegToRegClass(ToRC: &PPC::VSFRCRegClass, SrcReg);
1208
1209	// Determine the opcode for the conversion, which takes place
1210	// entirely within FPRs or VSRs.
1211	Register DestReg;
1212	unsigned Opc;
1213	auto RC = MRI.getRegClass(Reg: SrcReg);
1214
1215	if (Subtarget->hasSPE()) {
1216	DestReg = createResultReg(RC: &PPC::GPRCRegClass);
1217	if (IsSigned)
1218	Opc = InRC == &PPC::GPRCRegClass ? PPC::EFSCTSIZ : PPC::EFDCTSIZ;
1219	else
1220	Opc = InRC == &PPC::GPRCRegClass ? PPC::EFSCTUIZ : PPC::EFDCTUIZ;
1221	} else if (isVSFRCRegClass(RC)) {
1222	DestReg = createResultReg(RC: &PPC::VSFRCRegClass);
1223	if (DstVT == MVT::i32)
1224	Opc = IsSigned ? PPC::XSCVDPSXWS : PPC::XSCVDPUXWS;
1225	else
1226	Opc = IsSigned ? PPC::XSCVDPSXDS : PPC::XSCVDPUXDS;
1227	} else {
1228	DestReg = createResultReg(RC: &PPC::F8RCRegClass);
1229	if (DstVT == MVT::i32)
1230	if (IsSigned)
1231	Opc = PPC::FCTIWZ;
1232	else
1233	Opc = Subtarget->hasFPCVT() ? PPC::FCTIWUZ : PPC::FCTIDZ;
1234	else
1235	Opc = IsSigned ? PPC::FCTIDZ : PPC::FCTIDUZ;
1236	}
1237
1238	// Generate the convert.
1239	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg)
1240	.addReg(RegNo: SrcReg);
1241
1242	// Now move the integer value from a float register to an integer register.
1243	Register IntReg = Subtarget->hasSPE()
1244	? DestReg
1245	: PPCMoveToIntReg(I, VT: DstVT, SrcReg: DestReg, IsSigned);
1246
1247	if (!IntReg)
1248	return false;
1249
1250	updateValueMap(I, Reg: IntReg);
1251	return true;
1252	}
1253
1254	// Attempt to fast-select a binary integer operation that isn't already
1255	// handled automatically.
1256	bool PPCFastISel::SelectBinaryIntOp(const Instruction I, unsigned* ISDOpcode) {
1257	EVT DestVT = TLI.getValueType(DL, Ty: I->getType(), AllowUnknown: true);
1258
1259	// We can get here in the case when we have a binary operation on a non-legal
1260	// type and the target independent selector doesn't know how to handle it.
1261	if (DestVT != MVT::i16 && DestVT != MVT::i8)
1262	return false;
1263
1264	// Look at the currently assigned register for this instruction
1265	// to determine the required register class. If there is no register,
1266	// make a conservative choice (don't assign R0).
1267	Register AssignedReg = FuncInfo.ValueMap [I];
1268	const TargetRegisterClass *RC =
1269	(AssignedReg ? MRI.getRegClass(Reg: AssignedReg) :
1270	&PPC::GPRC_and_GPRC_NOR0RegClass);
1271	bool IsGPRC = RC->hasSuperClassEq(RC: &PPC::GPRCRegClass);
1272
1273	unsigned Opc;
1274	switch (ISDOpcode) {
1275	default: return false;
1276	case ISD::ADD:
1277	Opc = IsGPRC ? PPC::ADD4 : PPC::ADD8;
1278	break;
1279	case ISD::OR:
1280	Opc = IsGPRC ? PPC::OR : PPC::OR8;
1281	break;
1282	case ISD::SUB:
1283	Opc = IsGPRC ? PPC::SUBF : PPC::SUBF8;
1284	break;
1285	}
1286
1287	Register ResultReg = createResultReg(RC: RC ? RC : &PPC::G8RCRegClass);
1288	Register SrcReg1 = getRegForValue(V: I->getOperand(i: `0`));
1289	if (!SrcReg1)
1290	return false;
1291
1292	// Handle case of small immediate operand.
1293	if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(Val: I->getOperand(i: `1`))) {
1294	const APInt &CIVal = ConstInt->getValue();
1295	int Imm = (int)CIVal.getSExtValue();
1296	bool UseImm = true;
1297	if (isInt<`16`>(x: Imm)) {
1298	switch (Opc) {
1299	default:
1300	llvm_unreachable("Missing case!");
1301	case PPC::ADD4:
1302	Opc = PPC::ADDI;
1303	MRI.setRegClass(Reg: SrcReg1, RC: &PPC::GPRC_and_GPRC_NOR0RegClass);
1304	break;
1305	case PPC::ADD8:
1306	Opc = PPC::ADDI8;
1307	MRI.setRegClass(Reg: SrcReg1, RC: &PPC::G8RC_and_G8RC_NOX0RegClass);
1308	break;
1309	case PPC::OR:
1310	Opc = PPC::ORI;
1311	break;
1312	case PPC::OR8:
1313	Opc = PPC::ORI8;
1314	break;
1315	case PPC::SUBF:
1316	if (Imm == -`32768`)
1317	UseImm = false;
1318	else {
1319	Opc = PPC::ADDI;
1320	MRI.setRegClass(Reg: SrcReg1, RC: &PPC::GPRC_and_GPRC_NOR0RegClass);
1321	Imm = -Imm;
1322	}
1323	break;
1324	case PPC::SUBF8:
1325	if (Imm == -`32768`)
1326	UseImm = false;
1327	else {
1328	Opc = PPC::ADDI8;
1329	MRI.setRegClass(Reg: SrcReg1, RC: &PPC::G8RC_and_G8RC_NOX0RegClass);
1330	Imm = -Imm;
1331	}
1332	break;
1333	}
1334
1335	if (UseImm) {
1336	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc),
1337	DestReg: ResultReg)
1338	.addReg(RegNo: SrcReg1)
1339	.addImm(Val: Imm);
1340	updateValueMap(I, Reg: ResultReg);
1341	return true;
1342	}
1343	}
1344	}
1345
1346	// Reg-reg case.
1347	Register SrcReg2 = getRegForValue(V: I->getOperand(i: `1`));
1348	if (!SrcReg2)
1349	return false;
1350
1351	// Reverse operands for subtract-from.
1352	if (ISDOpcode == ISD::SUB)
1353	std::swap(a&: SrcReg1, b&: SrcReg2);
1354
1355	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg: ResultReg)
1356	.addReg(RegNo: SrcReg1).addReg(RegNo: SrcReg2);
1357	updateValueMap(I, Reg: ResultReg);
1358	return true;
1359	}
1360
1361	// Handle arguments to a call that we're attempting to fast-select.
1362	// Return false if the arguments are too complex for us at the moment.
1363	bool PPCFastISel::processCallArgs(SmallVectorImpl<Value *> &Args,
1364	SmallVectorImpl<Register> &ArgRegs,
1365	SmallVectorImpl<MVT> &ArgVTs,
1366	SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
1367	SmallVectorImpl<unsigned> &RegArgs,
1368	CallingConv::ID CC, unsigned &NumBytes,
1369	bool IsVarArg) {
1370	SmallVector<CCValAssign, `16`> ArgLocs;
1371	CCState CCInfo(CC, IsVarArg, FuncInfo.MF, ArgLocs, Context);
1372
1373	// Reserve space for the linkage area on the stack.
1374	unsigned LinkageSize = Subtarget->getFrameLowering()->getLinkageSize();
1375	CCInfo.AllocateStack(Size: LinkageSize, Alignment: Align (`8`));
1376
1377	CCInfo.AnalyzeCallOperands(ArgVTs, Flags&: ArgFlags, Fn: CC_PPC64_ELF_FIS);
1378
1379	// Bail out if we can't handle any of the arguments.
1380	for (const CCValAssign &VA : ArgLocs) {
1381	MVT ArgVT = ArgVTs [VA.getValNo()];
1382
1383	// Skip vector arguments for now, as well as long double and
1384	// uint128_t, and anything that isn't passed in a register.
1385	if (ArgVT.isVector() \|\| ArgVT.getSizeInBits() > `64` \|\| ArgVT == MVT::i1 \|\|
1386	!VA.isRegLoc() \|\| VA.needsCustom())
1387	return false;
1388
1389	// Skip bit-converted arguments for now.
1390	if (VA.getLocInfo() == CCValAssign::BCvt)
1391	return false;
1392	}
1393
1394	// Get a count of how many bytes are to be pushed onto the stack.
1395	NumBytes = CCInfo.getStackSize();
1396
1397	// The prolog code of the callee may store up to 8 GPR argument registers to
1398	// the stack, allowing va_start to index over them in memory if its varargs.
1399	// Because we cannot tell if this is needed on the caller side, we have to
1400	// conservatively assume that it is needed. As such, make sure we have at
1401	// least enough stack space for the caller to store the 8 GPRs.
1402	// FIXME: On ELFv2, it may be unnecessary to allocate the parameter area.
1403	NumBytes = std::max(a: NumBytes, b: LinkageSize + `64`);
1404
1405	// Issue CALLSEQ_START.
1406	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1407	MCID: TII.get(Opcode: TII.getCallFrameSetupOpcode()))
1408	.addImm(Val: NumBytes).addImm(Val: `0`);
1409
1410	// Prepare to assign register arguments. Every argument uses up a
1411	// GPR protocol register even if it's passed in a floating-point
1412	// register (unless we're using the fast calling convention).
1413	unsigned NextGPR = PPC::X3;
1414	unsigned NextFPR = PPC::F1;
1415
1416	// Process arguments.
1417	for (const CCValAssign &VA : ArgLocs) {
1418	Register Arg = ArgRegs [VA.getValNo()];
1419	MVT ArgVT = ArgVTs [VA.getValNo()];
1420
1421	// Handle argument promotion and bitcasts.
1422	switch (VA.getLocInfo()) {
1423	default:
1424	llvm_unreachable("Unknown loc info!");
1425	case CCValAssign::Full:
1426	break;
1427	case CCValAssign::SExt: {
1428	MVT DestVT = VA.getLocVT();
1429	const TargetRegisterClass *RC =
1430	(DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1431	Register TmpReg = createResultReg(RC);
1432	if (!PPCEmitIntExt(SrcVT: ArgVT, SrcReg: Arg, DestVT, DestReg: TmpReg, /IsZExt/false))
1433	llvm_unreachable("Failed to emit a sext!");
1434	ArgVT = DestVT;
1435	Arg = TmpReg;
1436	break;
1437	}
1438	case CCValAssign::AExt:
1439	case CCValAssign::ZExt: {
1440	MVT DestVT = VA.getLocVT();
1441	const TargetRegisterClass *RC =
1442	(DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1443	Register TmpReg = createResultReg(RC);
1444	if (!PPCEmitIntExt(SrcVT: ArgVT, SrcReg: Arg, DestVT, DestReg: TmpReg, /IsZExt/true))
1445	llvm_unreachable("Failed to emit a zext!");
1446	ArgVT = DestVT;
1447	Arg = TmpReg;
1448	break;
1449	}
1450	case CCValAssign::BCvt: {
1451	// FIXME: Not yet handled.
1452	llvm_unreachable("Should have bailed before getting here!");
1453	break;
1454	}
1455	}
1456
1457	// Copy this argument to the appropriate register.
1458	unsigned ArgReg;
1459	if (ArgVT == MVT::f32 \|\| ArgVT == MVT::f64) {
1460	ArgReg = NextFPR++;
1461	if (CC != CallingConv::Fast)
1462	++NextGPR;
1463	} else
1464	ArgReg = NextGPR++;
1465
1466	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1467	MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: ArgReg).addReg(RegNo: Arg);
1468	RegArgs.push_back(Elt: ArgReg);
1469	}
1470
1471	return true;
1472	}
1473
1474	// For a call that we've determined we can fast-select, finish the
1475	// call sequence and generate a copy to obtain the return value (if any).
1476	bool PPCFastISel::finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumBytes) {
1477	CallingConv::ID CC = CLI.CallConv;
1478
1479	// Issue CallSEQ_END.
1480	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1481	MCID: TII.get(Opcode: TII.getCallFrameDestroyOpcode()))
1482	.addImm(Val: NumBytes).addImm(Val: `0`);
1483
1484	// Next, generate a copy to obtain the return value.
1485	// FIXME: No multi-register return values yet, though I don't foresee
1486	// any real difficulties there.
1487	if (RetVT != MVT::isVoid) {
1488	SmallVector<CCValAssign, `16`> RVLocs;
1489	CCState CCInfo(CC, false, FuncInfo.MF, RVLocs, Context);
1490	CCInfo.AnalyzeCallResult(VT: RetVT, Fn: RetCC_PPC64_ELF_FIS);
1491	CCValAssign &VA = RVLocs [`0`];
1492	assert(RVLocs.size() == `1` && "No support for multi-reg return values!");
1493	assert(VA.isRegLoc() && "Can only return in registers!");
1494
1495	MVT DestVT = VA.getValVT();
1496	MVT CopyVT = DestVT;
1497
1498	// Ints smaller than a register still arrive in a full 64-bit
1499	// register, so make sure we recognize this.
1500	if (RetVT == MVT::i8 \|\| RetVT == MVT::i16 \|\| RetVT == MVT::i32)
1501	CopyVT = MVT::i64;
1502
1503	Register SourcePhysReg = VA.getLocReg();
1504	Register ResultReg;
1505
1506	if (RetVT == CopyVT) {
1507	const TargetRegisterClass *CpyRC = TLI.getRegClassFor(VT: CopyVT);
1508	ResultReg = copyRegToRegClass(ToRC: CpyRC, SrcReg: SourcePhysReg);
1509
1510	// If necessary, round the floating result to single precision.
1511	} else if (CopyVT == MVT::f64) {
1512	ResultReg = createResultReg(RC: TLI.getRegClassFor(VT: RetVT));
1513	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::FRSP),
1514	DestReg: ResultReg).addReg(RegNo: SourcePhysReg);
1515
1516	// If only the low half of a general register is needed, generate
1517	// a GPRC copy instead of a G8RC copy. (EXTRACT_SUBREG can't be
1518	// used along the fast-isel path (not lowered), and downstream logic
1519	// also doesn't like a direct subreg copy on a physical reg.)
1520	} else if (RetVT == MVT::i8 \|\| RetVT == MVT::i16 \|\| RetVT == MVT::i32) {
1521	// Convert physical register from G8RC to GPRC.
1522	SourcePhysReg = (SourcePhysReg - PPC::X0) + PPC::R0;
1523	ResultReg = copyRegToRegClass(ToRC: &PPC::GPRCRegClass, SrcReg: SourcePhysReg);
1524	}
1525
1526	assert(ResultReg && "ResultReg unset!");
1527	CLI.InRegs.push_back(Elt: SourcePhysReg);
1528	CLI.ResultReg = ResultReg;
1529	CLI.NumResultRegs = `1`;
1530	}
1531
1532	return true;
1533	}
1534
1535	bool PPCFastISel::fastLowerCall(CallLoweringInfo &CLI) {
1536	CallingConv::ID CC = CLI.CallConv;
1537	bool IsTailCall = CLI.IsTailCall;
1538	bool IsVarArg = CLI.IsVarArg;
1539	const Value *Callee = CLI.Callee;
1540	const MCSymbol *Symbol = CLI.Symbol;
1541
1542	if (!Callee && !Symbol)
1543	return false;
1544
1545	// Allow SelectionDAG isel to handle tail calls and long calls.
1546	if (IsTailCall \|\| Subtarget->useLongCalls())
1547	return false;
1548
1549	// Let SDISel handle vararg functions.
1550	if (IsVarArg)
1551	return false;
1552
1553	// If this is a PC-Rel function, let SDISel handle the call.
1554	if (Subtarget->isUsingPCRelativeCalls())
1555	return false;
1556
1557	// Handle simple calls for now, with legal return types and
1558	// those that can be extended.
1559	Type *RetTy = CLI.RetTy;
1560	MVT RetVT;
1561	if (RetTy->isVoidTy())
1562	RetVT = MVT::isVoid;
1563	else if (!isTypeLegal(Ty: RetTy, VT&: RetVT) && RetVT != MVT::i16 &&
1564	RetVT != MVT::i8)
1565	return false;
1566	else if (RetVT == MVT::i1 && Subtarget->useCRBits())
1567	// We can't handle boolean returns when CR bits are in use.
1568	return false;
1569
1570	// FIXME: No multi-register return values yet.
1571	if (RetVT != MVT::isVoid && RetVT != MVT::i8 && RetVT != MVT::i16 &&
1572	RetVT != MVT::i32 && RetVT != MVT::i64 && RetVT != MVT::f32 &&
1573	RetVT != MVT::f64) {
1574	SmallVector<CCValAssign, `16`> RVLocs;
1575	CCState CCInfo(CC, IsVarArg, FuncInfo.MF, RVLocs, Context);
1576	CCInfo.AnalyzeCallResult(VT: RetVT, Fn: RetCC_PPC64_ELF_FIS);
1577	if (RVLocs.size() > `1`)
1578	return false;
1579	}
1580
1581	// Bail early if more than 8 arguments, as we only currently
1582	// handle arguments passed in registers.
1583	unsigned NumArgs = CLI.OutVals.size();
1584	if (NumArgs > `8`)
1585	return false;
1586
1587	// Set up the argument vectors.
1588	SmallVector<Value*, `8`> Args;
1589	SmallVector<Register, `8`> ArgRegs;
1590	SmallVector<MVT, `8`> ArgVTs;
1591	SmallVector<ISD::ArgFlagsTy, `8`> ArgFlags;
1592
1593	Args.reserve(N: NumArgs);
1594	ArgRegs.reserve(N: NumArgs);
1595	ArgVTs.reserve(N: NumArgs);
1596	ArgFlags.reserve(N: NumArgs);
1597
1598	for (unsigned i = `0`, ie = NumArgs; i != ie; ++i) {
1599	// Only handle easy calls for now. It would be reasonably easy
1600	// to handle <= 8-byte structures passed ByVal in registers, but we
1601	// have to ensure they are right-justified in the register.
1602	ISD::ArgFlagsTy Flags = CLI.OutFlags [i];
1603	if (Flags.isInReg() \|\| Flags.isSRet() \|\| Flags.isNest() \|\| Flags.isByVal())
1604	return false;
1605
1606	Value *ArgValue = CLI.OutVals [i];
1607	Type *ArgTy = ArgValue->getType();
1608	MVT ArgVT;
1609	if (!isTypeLegal(Ty: ArgTy, VT&: ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8)
1610	return false;
1611
1612	// FIXME: FastISel cannot handle non-simple types yet, including 128-bit FP
1613	// types, which is passed through vector register. Skip these types and
1614	// fallback to default SelectionDAG based selection.
1615	if (ArgVT.isVector() \|\| ArgVT == MVT::f128)
1616	return false;
1617
1618	Register Arg = getRegForValue(V: ArgValue);
1619	if (!Arg)
1620	return false;
1621
1622	Args.push_back(Elt: ArgValue);
1623	ArgRegs.push_back(Elt: Arg);
1624	ArgVTs.push_back(Elt: ArgVT);
1625	ArgFlags.push_back(Elt: Flags);
1626	}
1627
1628	// Process the arguments.
1629	SmallVector<unsigned, `8`> RegArgs;
1630	unsigned NumBytes;
1631
1632	if (!processCallArgs(Args, ArgRegs, ArgVTs, ArgFlags,
1633	RegArgs, CC, NumBytes, IsVarArg))
1634	return false;
1635
1636	MachineInstrBuilder MIB;
1637	// FIXME: No handling for function pointers yet. This requires
1638	// implementing the function descriptor (OPD) setup.
1639	const GlobalValue *GV = dyn_cast<GlobalValue>(Val: Callee);
1640	if (!GV) {
1641	// patchpoints are a special case; they always dispatch to a pointer value.
1642	// However, we don't actually want to generate the indirect call sequence
1643	// here (that will be generated, as necessary, during asm printing), and
1644	// the call we generate here will be erased by FastISel::selectPatchpoint,
1645	// so don't try very hard...
1646	if (CLI.IsPatchPoint)
1647	MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::NOP));
1648	else
1649	return false;
1650	} else {
1651	// Build direct call with NOP for TOC restore.
1652	// FIXME: We can and should optimize away the NOP for local calls.
1653	MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1654	MCID: TII.get(Opcode: PPC::BL8_NOP));
1655	// Add callee.
1656	MIB.addGlobalAddress(GV);
1657	}
1658
1659	// Add implicit physical register uses to the call.
1660	for (unsigned Reg : RegArgs)
1661	MIB.addReg(RegNo: Reg, flags: RegState::Implicit);
1662
1663	// Direct calls, in both the ELF V1 and V2 ABIs, need the TOC register live
1664	// into the call.
1665	PPCFuncInfo->setUsesTOCBasePtr();
1666	MIB.addReg(RegNo: PPC::X2, flags: RegState::Implicit);
1667
1668	// Add a register mask with the call-preserved registers. Proper
1669	// defs for return values will be added by setPhysRegsDeadExcept().
1670	MIB.addRegMask(Mask: TRI.getCallPreservedMask(MF: *FuncInfo.MF, CC));
1671
1672	CLI.Call = MIB;
1673
1674	// Finish off the call including any return values.
1675	return finishCall(RetVT, CLI, NumBytes);
1676	}
1677
1678	// Attempt to fast-select a return instruction.
1679	bool PPCFastISel::SelectRet(const Instruction *I) {
1680
1681	if (!FuncInfo.CanLowerReturn)
1682	return false;
1683
1684	const ReturnInst *Ret = cast<ReturnInst>(Val: I);
1685	const Function &F = *I->getParent()->getParent();
1686
1687	// Build a list of return value registers.
1688	SmallVector<Register, `4`> RetRegs;
1689	CallingConv::ID CC = F.getCallingConv();
1690
1691	if (Ret->getNumOperands() > `0`) {
1692	SmallVector<ISD::OutputArg, `4`> Outs;
1693	GetReturnInfo(CC, ReturnType: F.getReturnType(), attr: F.getAttributes(), Outs, TLI, DL);
1694
1695	// Analyze operands of the call, assigning locations to each operand.
1696	SmallVector<CCValAssign, `16`> ValLocs;
1697	CCState CCInfo(CC, F.isVarArg(), FuncInfo.MF, ValLocs, Context);
1698	CCInfo.AnalyzeReturn(Outs, Fn: RetCC_PPC64_ELF_FIS);
1699	const Value *RV = Ret->getOperand(i_nocapture: `0`);
1700
1701	// FIXME: Only one output register for now.
1702	if (ValLocs.size() > `1`)
1703	return false;
1704
1705	// Special case for returning a constant integer of any size - materialize
1706	// the constant as an i64 and copy it to the return register.
1707	if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val: RV)) {
1708	CCValAssign &VA = ValLocs [`0`];
1709
1710	Register RetReg = VA.getLocReg();
1711	// We still need to worry about properly extending the sign. For example,
1712	// we could have only a single bit or a constant that needs zero
1713	// extension rather than sign extension. Make sure we pass the return
1714	// value extension property to integer materialization.
1715	Register SrcReg =
1716	PPCMaterializeInt(CI, VT: MVT::i64, UseSExt: VA.getLocInfo() != CCValAssign::ZExt);
1717
1718	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1719	MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: RetReg).addReg(RegNo: SrcReg);
1720
1721	RetRegs.push_back(Elt: RetReg);
1722
1723	} else {
1724	Register Reg = getRegForValue(V: RV);
1725
1726	if (!Reg)
1727	return false;
1728
1729	// Copy the result values into the output registers.
1730	for (unsigned i = `0`; i < ValLocs.size(); ++i) {
1731
1732	CCValAssign &VA = ValLocs [i];
1733	assert(VA.isRegLoc() && "Can only return in registers!");
1734	RetRegs.push_back(Elt: VA.getLocReg());
1735	Register SrcReg = Reg + VA.getValNo();
1736
1737	EVT RVEVT = TLI.getValueType(DL, Ty: RV->getType());
1738	if (!RVEVT.isSimple())
1739	return false;
1740	MVT RVVT = RVEVT.getSimpleVT();
1741	MVT DestVT = VA.getLocVT();
1742
1743	if (RVVT != DestVT && RVVT != MVT::i8 &&
1744	RVVT != MVT::i16 && RVVT != MVT::i32)
1745	return false;
1746
1747	if (RVVT != DestVT) {
1748	switch (VA.getLocInfo()) {
1749	default:
1750	llvm_unreachable("Unknown loc info!");
1751	case CCValAssign::Full:
1752	llvm_unreachable("Full value assign but types don't match?");
1753	case CCValAssign::AExt:
1754	case CCValAssign::ZExt: {
1755	const TargetRegisterClass *RC =
1756	(DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1757	Register TmpReg = createResultReg(RC);
1758	if (!PPCEmitIntExt(SrcVT: RVVT, SrcReg, DestVT, DestReg: TmpReg, IsZExt: true))
1759	return false;
1760	SrcReg = TmpReg;
1761	break;
1762	}
1763	case CCValAssign::SExt: {
1764	const TargetRegisterClass *RC =
1765	(DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1766	Register TmpReg = createResultReg(RC);
1767	if (!PPCEmitIntExt(SrcVT: RVVT, SrcReg, DestVT, DestReg: TmpReg, IsZExt: false))
1768	return false;
1769	SrcReg = TmpReg;
1770	break;
1771	}
1772	}
1773	}
1774
1775	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1776	MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: RetRegs [i])
1777	.addReg(RegNo: SrcReg);
1778	}
1779	}
1780	}
1781
1782	MachineInstrBuilder MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1783	MCID: TII.get(Opcode: PPC::BLR8));
1784
1785	for (Register Reg : RetRegs)
1786	MIB.addReg(RegNo: Reg, flags: RegState::Implicit);
1787
1788	return true;
1789	}
1790
1791	// Attempt to emit an integer extend of SrcReg into DestReg. Both
1792	// signed and zero extensions are supported. Return false if we
1793	// can't handle it.
1794	bool PPCFastISel::PPCEmitIntExt(MVT SrcVT, Register SrcReg, MVT DestVT,
1795	Register DestReg, bool IsZExt) {
1796	if (DestVT != MVT::i32 && DestVT != MVT::i64)
1797	return false;
1798	if (SrcVT != MVT::i8 && SrcVT != MVT::i16 && SrcVT != MVT::i32)
1799	return false;
1800
1801	// Signed extensions use EXTSB, EXTSH, EXTSW.
1802	if (!IsZExt) {
1803	unsigned Opc;
1804	if (SrcVT == MVT::i8)
1805	Opc = (DestVT == MVT::i32) ? PPC::EXTSB : PPC::EXTSB8_32_64;
1806	else if (SrcVT == MVT::i16)
1807	Opc = (DestVT == MVT::i32) ? PPC::EXTSH : PPC::EXTSH8_32_64;
1808	else {
1809	assert(DestVT == MVT::i64 && "Signed extend from i32 to i32??");
1810	Opc = PPC::EXTSW_32_64;
1811	}
1812	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg)
1813	.addReg(RegNo: SrcReg);
1814
1815	// Unsigned 32-bit extensions use RLWINM.
1816	} else if (DestVT == MVT::i32) {
1817	unsigned MB;
1818	if (SrcVT == MVT::i8)
1819	MB = `24`;
1820	else {
1821	assert(SrcVT == MVT::i16 && "Unsigned extend from i32 to i32??");
1822	MB = `16`;
1823	}
1824	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::RLWINM),
1825	DestReg)
1826	.addReg(RegNo: SrcReg).addImm(/SH=/Val: `0`).addImm(Val: MB).addImm(/ME=/Val: `31`);
1827
1828	// Unsigned 64-bit extensions use RLDICL (with a 32-bit source).
1829	} else {
1830	unsigned MB;
1831	if (SrcVT == MVT::i8)
1832	MB = `56`;
1833	else if (SrcVT == MVT::i16)
1834	MB = `48`;
1835	else
1836	MB = `32`;
1837	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1838	MCID: TII.get(Opcode: PPC::RLDICL_32_64), DestReg)
1839	.addReg(RegNo: SrcReg).addImm(/SH=/Val: `0`).addImm(Val: MB);
1840	}
1841
1842	return true;
1843	}
1844
1845	// Attempt to fast-select an indirect branch instruction.
1846	bool PPCFastISel::SelectIndirectBr(const Instruction *I) {
1847	Register AddrReg = getRegForValue(V: I->getOperand(i: `0`));
1848	if (!AddrReg)
1849	return false;
1850
1851	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::MTCTR8))
1852	.addReg(RegNo: AddrReg);
1853	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::BCTR8));
1854
1855	const IndirectBrInst *IB = cast<IndirectBrInst>(Val: I);
1856	for (const BasicBlock *SuccBB : IB->successors())
1857	FuncInfo.MBB->addSuccessor(Succ: FuncInfo.getMBB(BB: SuccBB));
1858
1859	return true;
1860	}
1861
1862	// Attempt to fast-select an integer truncate instruction.
1863	bool PPCFastISel::SelectTrunc(const Instruction *I) {
1864	Value *Src = I->getOperand(i: `0`);
1865	EVT SrcVT = TLI.getValueType(DL, Ty: Src->getType(), AllowUnknown: true);
1866	EVT DestVT = TLI.getValueType(DL, Ty: I->getType(), AllowUnknown: true);
1867
1868	if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16)
1869	return false;
1870
1871	if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8)
1872	return false;
1873
1874	Register SrcReg = getRegForValue(V: Src);
1875	if (!SrcReg)
1876	return false;
1877
1878	// The only interesting case is when we need to switch register classes.
1879	if (SrcVT == MVT::i64)
1880	SrcReg = copyRegToRegClass(ToRC: &PPC::GPRCRegClass, SrcReg, Flag: `0`, SubReg: PPC::sub_32);
1881
1882	updateValueMap(I, Reg: SrcReg);
1883	return true;
1884	}
1885
1886	// Attempt to fast-select an integer extend instruction.
1887	bool PPCFastISel::SelectIntExt(const Instruction *I) {
1888	Type *DestTy = I->getType();
1889	Value *Src = I->getOperand(i: `0`);
1890	Type *SrcTy = Src->getType();
1891
1892	bool IsZExt = isa<ZExtInst>(Val: I);
1893	Register SrcReg = getRegForValue(V: Src);
1894	if (!SrcReg) return false;
1895
1896	EVT SrcEVT, DestEVT;
1897	SrcEVT = TLI.getValueType(DL, Ty: SrcTy, AllowUnknown: true);
1898	DestEVT = TLI.getValueType(DL, Ty: DestTy, AllowUnknown: true);
1899	if (!SrcEVT.isSimple())
1900	return false;
1901	if (!DestEVT.isSimple())
1902	return false;
1903
1904	MVT SrcVT = SrcEVT.getSimpleVT();
1905	MVT DestVT = DestEVT.getSimpleVT();
1906
1907	// If we know the register class needed for the result of this
1908	// instruction, use it. Otherwise pick the register class of the
1909	// correct size that does not contain X0/R0, since we don't know
1910	// whether downstream uses permit that assignment.
1911	Register AssignedReg = FuncInfo.ValueMap [I];
1912	const TargetRegisterClass *RC =
1913	(AssignedReg ? MRI.getRegClass(Reg: AssignedReg) :
1914	(DestVT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass :
1915	&PPC::GPRC_and_GPRC_NOR0RegClass));
1916	Register ResultReg = createResultReg(RC);
1917
1918	if (!PPCEmitIntExt(SrcVT, SrcReg, DestVT, DestReg: ResultReg, IsZExt))
1919	return false;
1920
1921	updateValueMap(I, Reg: ResultReg);
1922	return true;
1923	}
1924
1925	// Attempt to fast-select an instruction that wasn't handled by
1926	// the table-generated machinery.
1927	bool PPCFastISel::fastSelectInstruction(const Instruction *I) {
1928
1929	switch (I->getOpcode()) {
1930	case Instruction::Load:
1931	return SelectLoad(I);
1932	case Instruction::Store:
1933	return SelectStore(I);
1934	case Instruction::Br:
1935	return SelectBranch(I);
1936	case Instruction::IndirectBr:
1937	return SelectIndirectBr(I);
1938	case Instruction::FPExt:
1939	return SelectFPExt(I);
1940	case Instruction::FPTrunc:
1941	return SelectFPTrunc(I);
1942	case Instruction::SIToFP:
1943	return SelectIToFP(I, /IsSigned/ true);
1944	case Instruction::UIToFP:
1945	return SelectIToFP(I, /IsSigned/ false);
1946	case Instruction::FPToSI:
1947	return SelectFPToI(I, /IsSigned/ true);
1948	case Instruction::FPToUI:
1949	return SelectFPToI(I, /IsSigned/ false);
1950	case Instruction::Add:
1951	return SelectBinaryIntOp(I, ISDOpcode: ISD::ADD);
1952	case Instruction::Or:
1953	return SelectBinaryIntOp(I, ISDOpcode: ISD::OR);
1954	case Instruction::Sub:
1955	return SelectBinaryIntOp(I, ISDOpcode: ISD::SUB);
1956	case Instruction::Ret:
1957	return SelectRet(I);
1958	case Instruction::Trunc:
1959	return SelectTrunc(I);
1960	case Instruction::ZExt:
1961	case Instruction::SExt:
1962	return SelectIntExt(I);
1963	// Here add other flavors of Instruction::XXX that automated
1964	// cases don't catch. For example, switches are terminators
1965	// that aren't yet handled.
1966	default:
1967	break;
1968	}
1969	return false;
1970	}
1971
1972	// Materialize a floating-point constant into a register, and return
1973	// the register number (or zero if we failed to handle it).
1974	Register PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) {
1975	// If this is a PC-Rel function, let SDISel handle constant pool.
1976	if (Subtarget->isUsingPCRelativeCalls())
1977	return Register ();
1978
1979	// No plans to handle long double here.
1980	if (VT != MVT::f32 && VT != MVT::f64)
1981	return Register ();
1982
1983	// All FP constants are loaded from the constant pool.
1984	Align Alignment = DL.getPrefTypeAlign(Ty: CFP->getType());
1985	unsigned Idx = MCP.getConstantPoolIndex(C: cast<Constant>(Val: CFP), Alignment);
1986	const bool HasSPE = Subtarget->hasSPE();
1987	const TargetRegisterClass *RC;
1988	if (HasSPE)
1989	RC = ((VT == MVT::f32) ? &PPC::GPRCRegClass : &PPC::SPERCRegClass);
1990	else
1991	RC = ((VT == MVT::f32) ? &PPC::F4RCRegClass : &PPC::F8RCRegClass);
1992
1993	Register DestReg = createResultReg(RC);
1994	CodeModel::Model CModel = TM.getCodeModel();
1995
1996	MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
1997	PtrInfo: MachinePointerInfo::getConstantPool(MF&: *FuncInfo.MF),
1998	F: MachineMemOperand::MOLoad, Size: (VT == MVT::f32) ? `4` : `8`, BaseAlignment: Alignment);
1999
2000	unsigned Opc;
2001
2002	if (HasSPE)
2003	Opc = ((VT == MVT::f32) ? PPC::SPELWZ : PPC::EVLDD);
2004	else
2005	Opc = ((VT == MVT::f32) ? PPC::LFS : PPC::LFD);
2006
2007	Register TmpReg = createResultReg(RC: &PPC::G8RC_and_G8RC_NOX0RegClass);
2008
2009	PPCFuncInfo->setUsesTOCBasePtr();
2010	// For small code model, generate a LF[SD](0, LDtocCPT(Idx, X2)).
2011	if (CModel == CodeModel::Small) {
2012	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::LDtocCPT),
2013	DestReg: TmpReg)
2014	.addConstantPoolIndex(Idx).addReg(RegNo: PPC::X2);
2015	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg)
2016	.addImm(Val: `0`).addReg(RegNo: TmpReg).addMemOperand(MMO);
2017	} else {
2018	// Otherwise we generate LF[SD](Idx[lo], ADDIStocHA8(X2, Idx)).
2019	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::ADDIStocHA8),
2020	DestReg: TmpReg).addReg(RegNo: PPC::X2).addConstantPoolIndex(Idx);
2021	// But for large code model, we must generate a LDtocL followed
2022	// by the LF[SD].
2023	if (CModel == CodeModel::Large) {
2024	Register TmpReg2 = createResultReg(RC: &PPC::G8RC_and_G8RC_NOX0RegClass);
2025	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::LDtocL),
2026	DestReg: TmpReg2).addConstantPoolIndex(Idx).addReg(RegNo: TmpReg);
2027	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg)
2028	.addImm(Val: `0`)
2029	.addReg(RegNo: TmpReg2);
2030	} else
2031	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg)
2032	.addConstantPoolIndex(Idx, Offset: `0`, TargetFlags: PPCII::MO_TOC_LO)
2033	.addReg(RegNo: TmpReg)
2034	.addMemOperand(MMO);
2035	}
2036
2037	return DestReg;
2038	}
2039
2040	// Materialize the address of a global value into a register, and return
2041	// the register number (or zero if we failed to handle it).
2042	Register PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) {
2043	// If this is a PC-Rel function, let SDISel handle GV materialization.
2044	if (Subtarget->isUsingPCRelativeCalls())
2045	return Register ();
2046
2047	assert(VT == MVT::i64 && "Non-address!");
2048	const TargetRegisterClass *RC = &PPC::G8RC_and_G8RC_NOX0RegClass;
2049	Register DestReg = createResultReg(RC);
2050
2051	// Global values may be plain old object addresses, TLS object
2052	// addresses, constant pool entries, or jump tables. How we generate
2053	// code for these may depend on small, medium, or large code model.
2054	CodeModel::Model CModel = TM.getCodeModel();
2055
2056	// FIXME: Jump tables are not yet required because fast-isel doesn't
2057	// handle switches; if that changes, we need them as well. For now,
2058	// what follows assumes everything's a generic (or TLS) global address.
2059
2060	// FIXME: We don't yet handle the complexity of TLS.
2061	if (GV->isThreadLocal())
2062	return Register ();
2063
2064	PPCFuncInfo->setUsesTOCBasePtr();
2065	bool IsAIXTocData = TM.getTargetTriple().isOSAIX() &&
2066	isa<GlobalVariable>(Val: GV) &&
2067	cast<GlobalVariable>(Val: GV)->hasAttribute(Kind: "toc-data");
2068
2069	// For small code model, generate a simple TOC load.
2070	if (CModel == CodeModel::Small) {
2071	auto MIB = BuildMI(
2072	BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
2073	MCID: IsAIXTocData ? TII.get(Opcode: PPC::ADDItoc8) : TII.get(Opcode: PPC::LDtoc), DestReg);
2074	if (IsAIXTocData)
2075	MIB.addReg(RegNo: PPC::X2).addGlobalAddress(GV);
2076	else
2077	MIB.addGlobalAddress(GV).addReg(RegNo: PPC::X2);
2078	} else {
2079	// If the address is an externally defined symbol, a symbol with common
2080	// or externally available linkage, a non-local function address, or a
2081	// jump table address (not yet needed), or if we are generating code
2082	// for large code model, we generate:
2083	// LDtocL(GV, ADDIStocHA8(%x2, GV))
2084	// Otherwise we generate:
2085	// ADDItocL8(ADDIStocHA8(%x2, GV), GV)
2086	// Either way, start with the ADDIStocHA8:
2087	Register HighPartReg = createResultReg(RC);
2088	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::ADDIStocHA8),
2089	DestReg: HighPartReg).addReg(RegNo: PPC::X2).addGlobalAddress(GV);
2090
2091	if (Subtarget->isGVIndirectSymbol(GV)) {
2092	assert(!IsAIXTocData && "TOC data should always be direct.");
2093	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::LDtocL),
2094	DestReg).addGlobalAddress(GV).addReg(RegNo: HighPartReg);
2095	} else {
2096	// Otherwise generate the ADDItocL8.
2097	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::ADDItocL8),
2098	DestReg)
2099	.addReg(RegNo: HighPartReg)
2100	.addGlobalAddress(GV);
2101	}
2102	}
2103
2104	return DestReg;
2105	}
2106
2107	// Materialize a 32-bit integer constant into a register, and return
2108	// the register number (or zero if we failed to handle it).
2109	Register PPCFastISel::PPCMaterialize32BitInt(int64_t Imm,
2110	const TargetRegisterClass *RC) {
2111	unsigned Lo = Imm & `0xFFFF`;
2112	unsigned Hi = (Imm >> `16`) & `0xFFFF`;
2113
2114	Register ResultReg = createResultReg(RC);
2115	bool IsGPRC = RC->hasSuperClassEq(RC: &PPC::GPRCRegClass);
2116
2117	if (isInt<`16`>(x: Imm))
2118	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
2119	MCID: TII.get(Opcode: IsGPRC ? PPC::LI : PPC::LI8), DestReg: ResultReg)
2120	.addImm(Val: Imm);
2121	else if (Lo) {
2122	// Both Lo and Hi have nonzero bits.
2123	Register TmpReg = createResultReg(RC);
2124	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
2125	MCID: TII.get(Opcode: IsGPRC ? PPC::LIS : PPC::LIS8), DestReg: TmpReg)
2126	.addImm(Val: Hi);
2127	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
2128	MCID: TII.get(Opcode: IsGPRC ? PPC::ORI : PPC::ORI8), DestReg: ResultReg)
2129	.addReg(RegNo: TmpReg).addImm(Val: Lo);
2130	} else
2131	// Just Hi bits.
2132	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
2133	MCID: TII.get(Opcode: IsGPRC ? PPC::LIS : PPC::LIS8), DestReg: ResultReg)
2134	.addImm(Val: Hi);
2135
2136	return ResultReg;
2137	}
2138
2139	// Materialize a 64-bit integer constant into a register, and return
2140	// the register number (or zero if we failed to handle it).
2141	Register PPCFastISel::PPCMaterialize64BitInt(int64_t Imm,
2142	const TargetRegisterClass *RC) {
2143	unsigned Remainder = `0`;
2144	unsigned Shift = `0`;
2145
2146	// If the value doesn't fit in 32 bits, see if we can shift it
2147	// so that it fits in 32 bits.
2148	if (!isInt<`32`>(x: Imm)) {
2149	Shift = llvm::countr_zero<uint64_t>(Val: Imm);
2150	int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
2151
2152	if (isInt<`32`>(x: ImmSh))
2153	Imm = ImmSh;
2154	else {
2155	Remainder = Imm;
2156	Shift = `32`;
2157	Imm >>= `32`;
2158	}
2159	}
2160
2161	// Handle the high-order 32 bits (if shifted) or the whole 32 bits
2162	// (if not shifted).
2163	Register TmpReg1 = PPCMaterialize32BitInt(Imm, RC);
2164	if (!Shift)
2165	return TmpReg1;
2166
2167	// If upper 32 bits were not zero, we've built them and need to shift
2168	// them into place.
2169	Register TmpReg2;
2170	if (Imm) {
2171	TmpReg2 = createResultReg(RC);
2172	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::RLDICR),
2173	DestReg: TmpReg2).addReg(RegNo: TmpReg1).addImm(Val: Shift).addImm(Val: `63` - Shift);
2174	} else
2175	TmpReg2 = TmpReg1;
2176
2177	Register TmpReg3;
2178	unsigned Hi, Lo;
2179	if ((Hi = (Remainder >> `16`) & `0xFFFF`)) {
2180	TmpReg3 = createResultReg(RC);
2181	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::ORIS8),
2182	DestReg: TmpReg3).addReg(RegNo: TmpReg2).addImm(Val: Hi);
2183	} else
2184	TmpReg3 = TmpReg2;
2185
2186	if ((Lo = Remainder & `0xFFFF`)) {
2187	Register ResultReg = createResultReg(RC);
2188	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::ORI8),
2189	DestReg: ResultReg).addReg(RegNo: TmpReg3).addImm(Val: Lo);
2190	return ResultReg;
2191	}
2192
2193	return TmpReg3;
2194	}
2195
2196	// Materialize an integer constant into a register, and return
2197	// the register number (or zero if we failed to handle it).
2198	Register PPCFastISel::PPCMaterializeInt(const ConstantInt *CI, MVT VT,
2199	bool UseSExt) {
2200	// If we're using CR bit registers for i1 values, handle that as a special
2201	// case first.
2202	if (VT == MVT::i1 && Subtarget->useCRBits()) {
2203	Register ImmReg = createResultReg(RC: &PPC::CRBITRCRegClass);
2204	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
2205	MCID: TII.get(Opcode: CI->isZero() ? PPC::CRUNSET : PPC::CRSET), DestReg: ImmReg);
2206	return ImmReg;
2207	}
2208
2209	if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 &&
2210	VT != MVT::i1)
2211	return Register ();
2212
2213	const TargetRegisterClass *RC =
2214	((VT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass);
2215	int64_t Imm = UseSExt ? CI->getSExtValue() : CI->getZExtValue();
2216
2217	// If the constant is in range, use a load-immediate.
2218	// Since LI will sign extend the constant we need to make sure that for
2219	// our zeroext constants that the sign extended constant fits into 16-bits -
2220	// a range of 0..0x7fff.
2221	if (isInt<`16`>(x: Imm)) {
2222	unsigned Opc = (VT == MVT::i64) ? PPC::LI8 : PPC::LI;
2223	Register ImmReg = createResultReg(RC);
2224	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg: ImmReg)
2225	.addImm(Val: Imm);
2226	return ImmReg;
2227	}
2228
2229	// Construct the constant piecewise.
2230	if (VT == MVT::i64)
2231	return PPCMaterialize64BitInt(Imm, RC);
2232	else if (VT == MVT::i32)
2233	return PPCMaterialize32BitInt(Imm, RC);
2234
2235	return Register ();
2236	}
2237
2238	// Materialize a constant into a register, and return the register
2239	// number (or zero if we failed to handle it).
2240	Register PPCFastISel::fastMaterializeConstant(const Constant *C) {
2241	EVT CEVT = TLI.getValueType(DL, Ty: C->getType(), AllowUnknown: true);
2242
2243	// Only handle simple types.
2244	if (!CEVT.isSimple())
2245	return Register ();
2246	MVT VT = CEVT.getSimpleVT();
2247
2248	if (const ConstantFP *CFP = dyn_cast<ConstantFP>(Val: C))
2249	return PPCMaterializeFP(CFP, VT);
2250	else if (const GlobalValue *GV = dyn_cast<GlobalValue>(Val: C))
2251	return PPCMaterializeGV(GV, VT);
2252	else if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val: C))
2253	// Note that the code in FunctionLoweringInfo::ComputePHILiveOutRegInfo
2254	// assumes that constant PHI operands will be zero extended, and failure to
2255	// match that assumption will cause problems if we sign extend here but
2256	// some user of a PHI is in a block for which we fall back to full SDAG
2257	// instruction selection.
2258	return PPCMaterializeInt(CI, VT, UseSExt: false);
2259
2260	return Register ();
2261	}
2262
2263	// Materialize the address created by an alloca into a register, and
2264	// return the register number (or zero if we failed to handle it).
2265	Register PPCFastISel::fastMaterializeAlloca(const AllocaInst *AI) {
2266	DenseMap<const AllocaInst , int*>::iterator SI =
2267	FuncInfo.StaticAllocaMap.find(Val: AI);
2268
2269	// Don't handle dynamic allocas.
2270	if (SI == FuncInfo.StaticAllocaMap.end())
2271	return Register ();
2272
2273	MVT VT;
2274	if (!isLoadTypeLegal(Ty: AI->getType(), VT))
2275	return Register ();
2276
2277	if (SI != FuncInfo.StaticAllocaMap.end()) {
2278	Register ResultReg = createResultReg(RC: &PPC::G8RC_and_G8RC_NOX0RegClass);
2279	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::ADDI8),
2280	DestReg: ResultReg).addFrameIndex(Idx: SI ->second).addImm(Val: `0`);
2281	return ResultReg;
2282	}
2283
2284	return Register ();
2285	}
2286
2287	// Fold loads into extends when possible.
2288	// FIXME: We can have multiple redundant extend/trunc instructions
2289	// following a load. The folding only picks up one. Extend this
2290	// to check subsequent instructions for the same pattern and remove
2291	// them. Thus ResultReg should be the def reg for the last redundant
2292	// instruction in a chain, and all intervening instructions can be
2293	// removed from parent. Change test/CodeGen/PowerPC/fast-isel-fold.ll
2294	// to add ELF64-NOT: rldicl to the appropriate tests when this works.
2295	bool PPCFastISel::tryToFoldLoadIntoMI(MachineInstr MI, unsigned* OpNo,
2296	const LoadInst *LI) {
2297	// Verify we have a legal type before going any further.
2298	MVT VT;
2299	if (!isLoadTypeLegal(Ty: LI->getType(), VT))
2300	return false;
2301
2302	// Combine load followed by zero- or sign-extend.
2303	bool IsZExt = false;
2304	switch(MI->getOpcode()) {
2305	default:
2306	return false;
2307
2308	case PPC::RLDICL:
2309	case PPC::RLDICL_32_64: {
2310	IsZExt = true;
2311	unsigned MB = MI->getOperand(i: `3`).getImm();
2312	if ((VT == MVT::i8 && MB <= `56`) \|\|
2313	(VT == MVT::i16 && MB <= `48`) \|\|
2314	(VT == MVT::i32 && MB <= `32`))
2315	break;
2316	return false;
2317	}
2318
2319	case PPC::RLWINM:
2320	case PPC::RLWINM8: {
2321	IsZExt = true;
2322	unsigned MB = MI->getOperand(i: `3`).getImm();
2323	if ((VT == MVT::i8 && MB <= `24`) \|\|
2324	(VT == MVT::i16 && MB <= `16`))
2325	break;
2326	return false;
2327	}
2328
2329	case PPC::EXTSB:
2330	case PPC::EXTSB8:
2331	case PPC::EXTSB8_32_64:
2332	/ There is no sign-extending load-byte instruction. /
2333	return false;
2334
2335	case PPC::EXTSH:
2336	case PPC::EXTSH8:
2337	case PPC::EXTSH8_32_64: {
2338	if (VT != MVT::i16 && VT != MVT::i8)
2339	return false;
2340	break;
2341	}
2342
2343	case PPC::EXTSW:
2344	case PPC::EXTSW_32:
2345	case PPC::EXTSW_32_64: {
2346	if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8)
2347	return false;
2348	break;
2349	}
2350	}
2351
2352	// See if we can handle this address.
2353	Address Addr;
2354	if (!PPCComputeAddress(Obj: LI->getOperand(i_nocapture: `0`), Addr))
2355	return false;
2356
2357	Register ResultReg = MI->getOperand(i: `0`).getReg();
2358
2359	if (!PPCEmitLoad(VT, ResultReg, Addr, RC: nullptr, IsZExt,
2360	FP64LoadOpc: Subtarget->hasSPE() ? PPC::EVLDD : PPC::LFD))
2361	return false;
2362
2363	MachineBasicBlock::iterator I(MI);
2364	removeDeadCode(I, E: std::next(x: I));
2365	return true;
2366	}
2367
2368	// Attempt to lower call arguments in a faster way than done by
2369	// the selection DAG code.
2370	bool PPCFastISel::fastLowerArguments() {
2371	// Defer to normal argument lowering for now. It's reasonably
2372	// efficient. Consider doing something like ARM to handle the
2373	// case where all args fit in registers, no varargs, no float
2374	// or vector args.
2375	return false;
2376	}
2377
2378	// Handle materializing integer constants into a register. This is not
2379	// automatically generated for PowerPC, so must be explicitly created here.
2380	Register PPCFastISel::fastEmit_i(MVT Ty, MVT VT, unsigned Opc, uint64_t Imm) {
2381
2382	if (Opc != ISD::Constant)
2383	return Register ();
2384
2385	// If we're using CR bit registers for i1 values, handle that as a special
2386	// case first.
2387	if (VT == MVT::i1 && Subtarget->useCRBits()) {
2388	Register ImmReg = createResultReg(RC: &PPC::CRBITRCRegClass);
2389	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
2390	MCID: TII.get(Opcode: Imm == `0` ? PPC::CRUNSET : PPC::CRSET), DestReg: ImmReg);
2391	return ImmReg;
2392	}
2393
2394	if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 &&
2395	VT != MVT::i1)
2396	return Register ();
2397
2398	const TargetRegisterClass *RC = ((VT == MVT::i64) ? &PPC::G8RCRegClass :
2399	&PPC::GPRCRegClass);
2400	if (VT == MVT::i64)
2401	return PPCMaterialize64BitInt(Imm, RC);
2402	else
2403	return PPCMaterialize32BitInt(Imm, RC);
2404	}
2405
2406	// Override for ADDI and ADDI8 to set the correct register class
2407	// on RHS operand 0. The automatic infrastructure naively assumes
2408	// GPRC for i32 and G8RC for i64; the concept of "no R0" is lost
2409	// for these cases. At the moment, none of the other automatically
2410	// generated RI instructions require special treatment. However, once
2411	// SelectSelect is implemented, "isel" requires similar handling.
2412	//
2413	// Also be conservative about the output register class. Avoid
2414	// assigning R0 or X0 to the output register for GPRC and G8RC
2415	// register classes, as any such result could be used in ADDI, etc.,
2416	// where those regs have another meaning.
2417	Register PPCFastISel::fastEmitInst_ri(unsigned MachineInstOpcode,
2418	const TargetRegisterClass *RC,
2419	Register Op0, uint64_t Imm) {
2420	if (MachineInstOpcode == PPC::ADDI)
2421	MRI.setRegClass(Reg: Op0, RC: &PPC::GPRC_and_GPRC_NOR0RegClass);
2422	else if (MachineInstOpcode == PPC::ADDI8)
2423	MRI.setRegClass(Reg: Op0, RC: &PPC::G8RC_and_G8RC_NOX0RegClass);
2424
2425	const TargetRegisterClass *UseRC =
2426	(RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
2427	(RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
2428
2429	return FastISel::fastEmitInst_ri(MachineInstOpcode, RC: UseRC, Op0, Imm);
2430	}
2431
2432	// Override for instructions with one register operand to avoid use of
2433	// R0/X0. The automatic infrastructure isn't aware of the context so
2434	// we must be conservative.
2435	Register PPCFastISel::fastEmitInst_r(unsigned MachineInstOpcode,
2436	const TargetRegisterClass *RC,
2437	Register Op0) {
2438	const TargetRegisterClass *UseRC =
2439	(RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
2440	(RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
2441
2442	return FastISel::fastEmitInst_r(MachineInstOpcode, RC: UseRC, Op0);
2443	}
2444
2445	// Override for instructions with two register operands to avoid use
2446	// of R0/X0. The automatic infrastructure isn't aware of the context
2447	// so we must be conservative.
2448	Register PPCFastISel::fastEmitInst_rr(unsigned MachineInstOpcode,
2449	const TargetRegisterClass *RC,
2450	Register Op0, Register Op1) {
2451	const TargetRegisterClass *UseRC =
2452	(RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
2453	(RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
2454
2455	return FastISel::fastEmitInst_rr(MachineInstOpcode, RC: UseRC, Op0, Op1);
2456	}
2457
2458	namespace llvm {
2459	// Create the fast instruction selector for PowerPC64 ELF.
2460	FastISel *PPC::createFastISel(FunctionLoweringInfo &FuncInfo,
2461	const TargetLibraryInfo *LibInfo) {
2462	// Only available on 64-bit for now.
2463	const PPCSubtarget &Subtarget = FuncInfo.MF->getSubtarget<PPCSubtarget>();
2464	if (Subtarget.isPPC64())
2465	return new PPCFastISel (FuncInfo, LibInfo);
2466	return nullptr;
2467	}
2468	}
2469

Browse the source code of llvm_projects/llvm/lib/Target/PowerPC/PPCFastISel.cpp