PPCFastISel.cpp source code [llvm_projects/llvm/lib/Target/PowerPC/PPCFastISel.cpp]

1	//===-- PPCFastISel.cpp - PowerPC FastISel implementation -----------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file defines the PowerPC-specific support for the FastISel class. Some
10	// of the target-specific code is generated by tablegen in the file
11	// PPCGenFastISel.inc, which is #included here.
12	//
13	//===----------------------------------------------------------------------===//
14
15	#include "MCTargetDesc/PPCPredicates.h"
16	#include "PPC.h"
17	#include "PPCCCState.h"
18	#include "PPCCallingConv.h"
19	#include "PPCISelLowering.h"
20	#include "PPCMachineFunctionInfo.h"
21	#include "PPCSubtarget.h"
22	#include "PPCTargetMachine.h"
23	#include "llvm/CodeGen/CallingConvLower.h"
24	#include "llvm/CodeGen/FastISel.h"
25	#include "llvm/CodeGen/FunctionLoweringInfo.h"
26	#include "llvm/CodeGen/MachineConstantPool.h"
27	#include "llvm/CodeGen/MachineFrameInfo.h"
28	#include "llvm/CodeGen/MachineInstrBuilder.h"
29	#include "llvm/CodeGen/MachineRegisterInfo.h"
30	#include "llvm/CodeGen/TargetLowering.h"
31	#include "llvm/IR/CallingConv.h"
32	#include "llvm/IR/GetElementPtrTypeIterator.h"
33	#include "llvm/IR/GlobalAlias.h"
34	#include "llvm/IR/GlobalVariable.h"
35	#include "llvm/IR/IntrinsicInst.h"
36	#include "llvm/IR/Operator.h"
37	#include "llvm/Support/Debug.h"
38	#include "llvm/Target/TargetMachine.h"
39
40	//===----------------------------------------------------------------------===//
41	//
42	// TBD:
43	// fastLowerArguments: Handle simple cases.
44	// PPCMaterializeGV: Handle TLS.
45	// SelectCall: Handle function pointers.
46	// SelectCall: Handle multi-register return values.
47	// SelectCall: Optimize away nops for local calls.
48	// processCallArgs: Handle bit-converted arguments.
49	// finishCall: Handle multi-register return values.
50	// PPCComputeAddress: Handle parameter references as FrameIndex's.
51	// PPCEmitCmp: Handle immediate as operand 1.
52	// SelectCall: Handle small byval arguments.
53	// SelectIntrinsicCall: Implement.
54	// SelectSelect: Implement.
55	// Consider factoring isTypeLegal into the base class.
56	// Implement switches and jump tables.
57	//
58	//===----------------------------------------------------------------------===//
59	using namespace llvm;
60
61	#define DEBUG_TYPE "ppcfastisel"
62
63	namespace {
64
65	struct Address {
66	enum {
67	RegBase,
68	FrameIndexBase
69	} BaseType;
70
71	union {
72	unsigned Reg;
73	int FI;
74	} Base;
75
76	int64_t Offset;
77
78	// Innocuous defaults for our address.
79	Address()
80	: BaseType(RegBase), Offset(`0`) {
81	Base.Reg = `0`;
82	}
83	};
84
85	class PPCFastISel final : public FastISel {
86
87	const TargetMachine &TM;
88	const PPCSubtarget *Subtarget;
89	PPCFunctionInfo *PPCFuncInfo;
90	const TargetInstrInfo &TII;
91	const TargetLowering &TLI;
92	LLVMContext *Context;
93
94	public:
95	explicit PPCFastISel(FunctionLoweringInfo &FuncInfo,
96	const TargetLibraryInfo *LibInfo)
97	: FastISel (FuncInfo, LibInfo), TM(FuncInfo.MF->getTarget()),
98	Subtarget(&FuncInfo.MF->getSubtarget<PPCSubtarget>()),
99	PPCFuncInfo(FuncInfo.MF->getInfo<PPCFunctionInfo>()),
100	TII(Subtarget->getInstrInfo()), TLI(Subtarget->getTargetLowering()),
101	Context(&FuncInfo.Fn->getContext()) {}
102
103	// Backend specific FastISel code.
104	private:
105	bool fastSelectInstruction(const Instruction *I) override;
106	unsigned fastMaterializeConstant(const Constant *C) override;
107	unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
108	bool tryToFoldLoadIntoMI(MachineInstr MI, unsigned* OpNo,
109	const LoadInst *LI) override;
110	bool fastLowerArguments() override;
111	unsigned fastEmit_i(MVT Ty, MVT RetTy, unsigned Opc, uint64_t Imm) override;
112	unsigned fastEmitInst_ri(unsigned MachineInstOpcode,
113	const TargetRegisterClass *RC,
114	unsigned Op0, uint64_t Imm);
115	unsigned fastEmitInst_r(unsigned MachineInstOpcode,
116	const TargetRegisterClass RC, unsigned* Op0);
117	unsigned fastEmitInst_rr(unsigned MachineInstOpcode,
118	const TargetRegisterClass *RC,
119	unsigned Op0, unsigned Op1);
120
121	bool fastLowerCall(CallLoweringInfo &CLI) override;
122
123	// Instruction selection routines.
124	private:
125	bool SelectLoad(const Instruction *I);
126	bool SelectStore(const Instruction *I);
127	bool SelectBranch(const Instruction *I);
128	bool SelectIndirectBr(const Instruction *I);
129	bool SelectFPExt(const Instruction *I);
130	bool SelectFPTrunc(const Instruction *I);
131	bool SelectIToFP(const Instruction I, bool* IsSigned);
132	bool SelectFPToI(const Instruction I, bool* IsSigned);
133	bool SelectBinaryIntOp(const Instruction I, unsigned* ISDOpcode);
134	bool SelectRet(const Instruction *I);
135	bool SelectTrunc(const Instruction *I);
136	bool SelectIntExt(const Instruction *I);
137
138	// Utility routines.
139	private:
140	bool isTypeLegal(Type *Ty, MVT &VT);
141	bool isLoadTypeLegal(Type *Ty, MVT &VT);
142	bool isValueAvailable(const Value V) const*;
143	bool isVSFRCRegClass(const TargetRegisterClass RC) const* {
144	return RC->getID() == PPC::VSFRCRegClassID;
145	}
146	bool isVSSRCRegClass(const TargetRegisterClass RC) const* {
147	return RC->getID() == PPC::VSSRCRegClassID;
148	}
149	unsigned copyRegToRegClass(const TargetRegisterClass *ToRC,
150	unsigned SrcReg, unsigned Flag = `0`,
151	unsigned SubReg = `0`) {
152	Register TmpReg = createResultReg(RC: ToRC);
153	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
154	MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: TmpReg).addReg(RegNo: SrcReg, flags: Flag, SubReg);
155	return TmpReg;
156	}
157	bool PPCEmitCmp(const Value Src1Value, const* Value *Src2Value,
158	bool isZExt, unsigned DestReg,
159	const PPC::Predicate Pred);
160	bool PPCEmitLoad(MVT VT, Register &ResultReg, Address &Addr,
161	const TargetRegisterClass RC, bool* IsZExt = true,
162	unsigned FP64LoadOpc = PPC::LFD);
163	bool PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr);
164	bool PPCComputeAddress(const Value *Obj, Address &Addr);
165	void PPCSimplifyAddress(Address &Addr, bool &UseOffset,
166	unsigned &IndexReg);
167	bool PPCEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
168	unsigned DestReg, bool IsZExt);
169	unsigned PPCMaterializeFP(const ConstantFP *CFP, MVT VT);
170	unsigned PPCMaterializeGV(const GlobalValue *GV, MVT VT);
171	unsigned PPCMaterializeInt(const ConstantInt *CI, MVT VT,
172	bool UseSExt = true);
173	unsigned PPCMaterialize32BitInt(int64_t Imm,
174	const TargetRegisterClass *RC);
175	unsigned PPCMaterialize64BitInt(int64_t Imm,
176	const TargetRegisterClass *RC);
177	unsigned PPCMoveToIntReg(const Instruction *I, MVT VT,
178	unsigned SrcReg, bool IsSigned);
179	unsigned PPCMoveToFPReg(MVT VT, unsigned SrcReg, bool IsSigned);
180
181	// Call handling routines.
182	private:
183	bool processCallArgs(SmallVectorImpl<Value*> &Args,
184	SmallVectorImpl<unsigned> &ArgRegs,
185	SmallVectorImpl<MVT> &ArgVTs,
186	SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
187	SmallVectorImpl<unsigned> &RegArgs,
188	CallingConv::ID CC,
189	unsigned &NumBytes,
190	bool IsVarArg);
191	bool finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumBytes);
192
193	private:
194	#include "PPCGenFastISel.inc"
195
196	};
197
198	} // end anonymous namespace
199
200	static std::optional<PPC::Predicate> getComparePred(CmpInst::Predicate Pred) {
201	switch (Pred) {
202	// These are not representable with any single compare.
203	case CmpInst::FCMP_FALSE:
204	case CmpInst::FCMP_TRUE:
205	// Major concern about the following 6 cases is NaN result. The comparison
206	// result consists of 4 bits, indicating lt, eq, gt and un (unordered),
207	// only one of which will be set. The result is generated by fcmpu
208	// instruction. However, bc instruction only inspects one of the first 3
209	// bits, so when un is set, bc instruction may jump to an undesired
210	// place.
211	//
212	// More specifically, if we expect an unordered comparison and un is set, we
213	// expect to always go to true branch; in such case UEQ, UGT and ULT still
214	// give false, which are undesired; but UNE, UGE, ULE happen to give true,
215	// since they are tested by inspecting !eq, !lt, !gt, respectively.
216	//
217	// Similarly, for ordered comparison, when un is set, we always expect the
218	// result to be false. In such case OGT, OLT and OEQ is good, since they are
219	// actually testing GT, LT, and EQ respectively, which are false. OGE, OLE
220	// and ONE are tested through !lt, !gt and !eq, and these are true.
221	case CmpInst::FCMP_UEQ:
222	case CmpInst::FCMP_UGT:
223	case CmpInst::FCMP_ULT:
224	case CmpInst::FCMP_OGE:
225	case CmpInst::FCMP_OLE:
226	case CmpInst::FCMP_ONE:
227	default:
228	return std::nullopt;
229
230	case CmpInst::FCMP_OEQ:
231	case CmpInst::ICMP_EQ:
232	return PPC::PRED_EQ;
233
234	case CmpInst::FCMP_OGT:
235	case CmpInst::ICMP_UGT:
236	case CmpInst::ICMP_SGT:
237	return PPC::PRED_GT;
238
239	case CmpInst::FCMP_UGE:
240	case CmpInst::ICMP_UGE:
241	case CmpInst::ICMP_SGE:
242	return PPC::PRED_GE;
243
244	case CmpInst::FCMP_OLT:
245	case CmpInst::ICMP_ULT:
246	case CmpInst::ICMP_SLT:
247	return PPC::PRED_LT;
248
249	case CmpInst::FCMP_ULE:
250	case CmpInst::ICMP_ULE:
251	case CmpInst::ICMP_SLE:
252	return PPC::PRED_LE;
253
254	case CmpInst::FCMP_UNE:
255	case CmpInst::ICMP_NE:
256	return PPC::PRED_NE;
257
258	case CmpInst::FCMP_ORD:
259	return PPC::PRED_NU;
260
261	case CmpInst::FCMP_UNO:
262	return PPC::PRED_UN;
263	}
264	}
265
266	// Determine whether the type Ty is simple enough to be handled by
267	// fast-isel, and return its equivalent machine type in VT.
268	// FIXME: Copied directly from ARM -- factor into base class?
269	bool PPCFastISel::isTypeLegal(Type *Ty, MVT &VT) {
270	EVT Evt = TLI.getValueType(DL, Ty, AllowUnknown: true);
271
272	// Only handle simple types.
273	if (Evt == MVT::Other \|\| !Evt.isSimple()) return false;
274	VT = Evt.getSimpleVT();
275
276	// Handle all legal types, i.e. a register that will directly hold this
277	// value.
278	return TLI.isTypeLegal(VT);
279	}
280
281	// Determine whether the type Ty is simple enough to be handled by
282	// fast-isel as a load target, and return its equivalent machine type in VT.
283	bool PPCFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) {
284	if (isTypeLegal(Ty, VT)) return true;
285
286	// If this is a type than can be sign or zero-extended to a basic operation
287	// go ahead and accept it now.
288	if (VT == MVT::i8 \|\| VT == MVT::i16 \|\| VT == MVT::i32) {
289	return true;
290	}
291
292	return false;
293	}
294
295	bool PPCFastISel::isValueAvailable(const Value V) const* {
296	if (!isa<Instruction>(Val: V))
297	return true;
298
299	const auto *I = cast<Instruction>(Val: V);
300	return FuncInfo.MBBMap [I->getParent()] == FuncInfo.MBB;
301	}
302
303	// Given a value Obj, create an Address object Addr that represents its
304	// address. Return false if we can't handle it.
305	bool PPCFastISel::PPCComputeAddress(const Value *Obj, Address &Addr) {
306	const User U = nullptr*;
307	unsigned Opcode = Instruction::UserOp1;
308	if (const Instruction *I = dyn_cast<Instruction>(Val: Obj)) {
309	// Don't walk into other basic blocks unless the object is an alloca from
310	// another block, otherwise it may not have a virtual register assigned.
311	if (FuncInfo.StaticAllocaMap.count(Val: static_cast<const AllocaInst *>(Obj)) \|\|
312	FuncInfo.MBBMap [I->getParent()] == FuncInfo.MBB) {
313	Opcode = I->getOpcode();
314	U = I;
315	}
316	} else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Val: Obj)) {
317	Opcode = C->getOpcode();
318	U = C;
319	}
320
321	switch (Opcode) {
322	default:
323	break;
324	case Instruction::BitCast:
325	// Look through bitcasts.
326	return PPCComputeAddress(Obj: U->getOperand(i: `0`), Addr);
327	case Instruction::IntToPtr:
328	// Look past no-op inttoptrs.
329	if (TLI.getValueType(DL, Ty: U->getOperand(i: `0`)->getType()) ==
330	TLI.getPointerTy(DL))
331	return PPCComputeAddress(Obj: U->getOperand(i: `0`), Addr);
332	break;
333	case Instruction::PtrToInt:
334	// Look past no-op ptrtoints.
335	if (TLI.getValueType(DL, Ty: U->getType()) == TLI.getPointerTy(DL))
336	return PPCComputeAddress(Obj: U->getOperand(i: `0`), Addr);
337	break;
338	case Instruction::GetElementPtr: {
339	Address SavedAddr = Addr;
340	int64_t TmpOffset = Addr.Offset;
341
342	// Iterate through the GEP folding the constants into offsets where
343	// we can.
344	gep_type_iterator GTI = gep_type_begin(GEP: U);
345	for (User::const_op_iterator II = U->op_begin() + `1`, IE = U->op_end();
346	II != IE; ++II, ++GTI) {
347	const Value Op = II;
348	if (StructType *STy = GTI.getStructTypeOrNull()) {
349	const StructLayout *SL = DL.getStructLayout(Ty: STy);
350	unsigned Idx = cast<ConstantInt>(Val: Op)->getZExtValue();
351	TmpOffset += SL->getElementOffset(Idx);
352	} else {
353	uint64_t S = GTI.getSequentialElementStride(DL);
354	for (;;) {
355	if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val: Op)) {
356	// Constant-offset addressing.
357	TmpOffset += CI->getSExtValue() * S;
358	break;
359	}
360	if (canFoldAddIntoGEP(GEP: U, Add: Op)) {
361	// A compatible add with a constant operand. Fold the constant.
362	ConstantInt *CI =
363	cast<ConstantInt>(Val: cast<AddOperator>(Val: Op)->getOperand(i_nocapture: `1`));
364	TmpOffset += CI->getSExtValue() * S;
365	// Iterate on the other operand.
366	Op = cast<AddOperator>(Val: Op)->getOperand(i_nocapture: `0`);
367	continue;
368	}
369	// Unsupported
370	goto unsupported_gep;
371	}
372	}
373	}
374
375	// Try to grab the base operand now.
376	Addr.Offset = TmpOffset;
377	if (PPCComputeAddress(Obj: U->getOperand(i: `0`), Addr)) return true;
378
379	// We failed, restore everything and try the other options.
380	Addr = SavedAddr;
381
382	unsupported_gep:
383	break;
384	}
385	case Instruction::Alloca: {
386	const AllocaInst *AI = cast<AllocaInst>(Val: Obj);
387	DenseMap<const AllocaInst, int*>::iterator SI =
388	FuncInfo.StaticAllocaMap.find(Val: AI);
389	if (SI != FuncInfo.StaticAllocaMap.end()) {
390	Addr.BaseType = Address::FrameIndexBase;
391	Addr.Base.FI = SI ->second;
392	return true;
393	}
394	break;
395	}
396	}
397
398	// FIXME: References to parameters fall through to the behavior
399	// below. They should be able to reference a frame index since
400	// they are stored to the stack, so we can get "ld rx, offset(r1)"
401	// instead of "addi ry, r1, offset / ld rx, 0(ry)". Obj will
402	// just contain the parameter. Try to handle this with a FI.
403
404	// Try to get this in a register if nothing else has worked.
405	if (Addr.Base.Reg == `0`)
406	Addr.Base.Reg = getRegForValue(V: Obj);
407
408	// Prevent assignment of base register to X0, which is inappropriate
409	// for loads and stores alike.
410	if (Addr.Base.Reg != `0`)
411	MRI.setRegClass(Reg: Addr.Base.Reg, RC: &PPC::G8RC_and_G8RC_NOX0RegClass);
412
413	return Addr.Base.Reg != `0`;
414	}
415
416	// Fix up some addresses that can't be used directly. For example, if
417	// an offset won't fit in an instruction field, we may need to move it
418	// into an index register.
419	void PPCFastISel::PPCSimplifyAddress(Address &Addr, bool &UseOffset,
420	unsigned &IndexReg) {
421
422	// Check whether the offset fits in the instruction field.
423	if (!isInt<`16`>(x: Addr.Offset))
424	UseOffset = false;
425
426	// If this is a stack pointer and the offset needs to be simplified then
427	// put the alloca address into a register, set the base type back to
428	// register and continue. This should almost never happen.
429	if (!UseOffset && Addr.BaseType == Address::FrameIndexBase) {
430	Register ResultReg = createResultReg(RC: &PPC::G8RC_and_G8RC_NOX0RegClass);
431	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::ADDI8),
432	DestReg: ResultReg).addFrameIndex(Idx: Addr.Base.FI).addImm(Val: `0`);
433	Addr.Base.Reg = ResultReg;
434	Addr.BaseType = Address::RegBase;
435	}
436
437	if (!UseOffset) {
438	IntegerType OffsetTy = Type::getInt64Ty(C&: Context);
439	const ConstantInt *Offset = ConstantInt::getSigned(Ty: OffsetTy, V: Addr.Offset);
440	IndexReg = PPCMaterializeInt(CI: Offset, VT: MVT::i64);
441	assert(IndexReg && "Unexpected error in PPCMaterializeInt!");
442	}
443	}
444
445	// Emit a load instruction if possible, returning true if we succeeded,
446	// otherwise false. See commentary below for how the register class of
447	// the load is determined.
448	bool PPCFastISel::PPCEmitLoad(MVT VT, Register &ResultReg, Address &Addr,
449	const TargetRegisterClass *RC,
450	bool IsZExt, unsigned FP64LoadOpc) {
451	unsigned Opc;
452	bool UseOffset = true;
453	bool HasSPE = Subtarget->hasSPE();
454
455	// If ResultReg is given, it determines the register class of the load.
456	// Otherwise, RC is the register class to use. If the result of the
457	// load isn't anticipated in this block, both may be zero, in which
458	// case we must make a conservative guess. In particular, don't assign
459	// R0 or X0 to the result register, as the result may be used in a load,
460	// store, add-immediate, or isel that won't permit this. (Though
461	// perhaps the spill and reload of live-exit values would handle this?)
462	const TargetRegisterClass *UseRC =
463	(ResultReg ? MRI.getRegClass(Reg: ResultReg) :
464	(RC ? RC :
465	(VT == MVT::f64 ? (HasSPE ? &PPC::SPERCRegClass : &PPC::F8RCRegClass) :
466	(VT == MVT::f32 ? (HasSPE ? &PPC::GPRCRegClass : &PPC::F4RCRegClass) :
467	(VT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass :
468	&PPC::GPRC_and_GPRC_NOR0RegClass)))));
469
470	bool Is32BitInt = UseRC->hasSuperClassEq(RC: &PPC::GPRCRegClass);
471
472	switch (VT.SimpleTy) {
473	default: // e.g., vector types not handled
474	return false;
475	case MVT::i8:
476	Opc = Is32BitInt ? PPC::LBZ : PPC::LBZ8;
477	break;
478	case MVT::i16:
479	Opc = (IsZExt ? (Is32BitInt ? PPC::LHZ : PPC::LHZ8)
480	: (Is32BitInt ? PPC::LHA : PPC::LHA8));
481	break;
482	case MVT::i32:
483	Opc = (IsZExt ? (Is32BitInt ? PPC::LWZ : PPC::LWZ8)
484	: (Is32BitInt ? PPC::LWA_32 : PPC::LWA));
485	if ((Opc == PPC::LWA \|\| Opc == PPC::LWA_32) && ((Addr.Offset & `3`) != `0`))
486	UseOffset = false;
487	break;
488	case MVT::i64:
489	Opc = PPC::LD;
490	assert(UseRC->hasSuperClassEq(&PPC::G8RCRegClass) &&
491	"64-bit load with 32-bit target??");
492	UseOffset = ((Addr.Offset & `3`) == `0`);
493	break;
494	case MVT::f32:
495	Opc = Subtarget->hasSPE() ? PPC::SPELWZ : PPC::LFS;
496	break;
497	case MVT::f64:
498	Opc = FP64LoadOpc;
499	break;
500	}
501
502	// If necessary, materialize the offset into a register and use
503	// the indexed form. Also handle stack pointers with special needs.
504	unsigned IndexReg = `0`;
505	PPCSimplifyAddress(Addr, UseOffset, IndexReg);
506
507	// If this is a potential VSX load with an offset of 0, a VSX indexed load can
508	// be used.
509	bool IsVSSRC = isVSSRCRegClass(RC: UseRC);
510	bool IsVSFRC = isVSFRCRegClass(RC: UseRC);
511	bool Is32VSXLoad = IsVSSRC && Opc == PPC::LFS;
512	bool Is64VSXLoad = IsVSFRC && Opc == PPC::LFD;
513	if ((Is32VSXLoad \|\| Is64VSXLoad) &&
514	(Addr.BaseType != Address::FrameIndexBase) && UseOffset &&
515	(Addr.Offset == `0`)) {
516	UseOffset = false;
517	}
518
519	if (ResultReg == `0`)
520	ResultReg = createResultReg(RC: UseRC);
521
522	// Note: If we still have a frame index here, we know the offset is
523	// in range, as otherwise PPCSimplifyAddress would have converted it
524	// into a RegBase.
525	if (Addr.BaseType == Address::FrameIndexBase) {
526	// VSX only provides an indexed load.
527	if (Is32VSXLoad \|\| Is64VSXLoad) return false;
528
529	MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
530	PtrInfo: MachinePointerInfo::getFixedStack(MF&: *FuncInfo.MF, FI: Addr.Base.FI,
531	Offset: Addr.Offset),
532	F: MachineMemOperand::MOLoad, Size: MFI.getObjectSize(ObjectIdx: Addr.Base.FI),
533	BaseAlignment: MFI.getObjectAlign(ObjectIdx: Addr.Base.FI));
534
535	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg: ResultReg)
536	.addImm(Val: Addr.Offset).addFrameIndex(Idx: Addr.Base.FI).addMemOperand(MMO);
537
538	// Base reg with offset in range.
539	} else if (UseOffset) {
540	// VSX only provides an indexed load.
541	if (Is32VSXLoad \|\| Is64VSXLoad) return false;
542
543	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg: ResultReg)
544	.addImm(Val: Addr.Offset).addReg(RegNo: Addr.Base.Reg);
545
546	// Indexed form.
547	} else {
548	// Get the RR opcode corresponding to the RI one. FIXME: It would be
549	// preferable to use the ImmToIdxMap from PPCRegisterInfo.cpp, but it
550	// is hard to get at.
551	switch (Opc) {
552	default: llvm_unreachable("Unexpected opcode!");
553	case PPC::LBZ: Opc = PPC::LBZX; break;
554	case PPC::LBZ8: Opc = PPC::LBZX8; break;
555	case PPC::LHZ: Opc = PPC::LHZX; break;
556	case PPC::LHZ8: Opc = PPC::LHZX8; break;
557	case PPC::LHA: Opc = PPC::LHAX; break;
558	case PPC::LHA8: Opc = PPC::LHAX8; break;
559	case PPC::LWZ: Opc = PPC::LWZX; break;
560	case PPC::LWZ8: Opc = PPC::LWZX8; break;
561	case PPC::LWA: Opc = PPC::LWAX; break;
562	case PPC::LWA_32: Opc = PPC::LWAX_32; break;
563	case PPC::LD: Opc = PPC::LDX; break;
564	case PPC::LFS: Opc = IsVSSRC ? PPC::LXSSPX : PPC::LFSX; break;
565	case PPC::LFD: Opc = IsVSFRC ? PPC::LXSDX : PPC::LFDX; break;
566	case PPC::EVLDD: Opc = PPC::EVLDDX; break;
567	case PPC::SPELWZ: Opc = PPC::SPELWZX; break;
568	}
569
570	auto MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc),
571	DestReg: ResultReg);
572
573	// If we have an index register defined we use it in the store inst,
574	// otherwise we use X0 as base as it makes the vector instructions to
575	// use zero in the computation of the effective address regardless the
576	// content of the register.
577	if (IndexReg)
578	MIB.addReg(RegNo: Addr.Base.Reg).addReg(RegNo: IndexReg);
579	else
580	MIB.addReg(RegNo: PPC::ZERO8).addReg(RegNo: Addr.Base.Reg);
581	}
582
583	return true;
584	}
585
586	// Attempt to fast-select a load instruction.
587	bool PPCFastISel::SelectLoad(const Instruction *I) {
588	// FIXME: No atomic loads are supported.
589	if (cast<LoadInst>(Val: I)->isAtomic())
590	return false;
591
592	// Verify we have a legal type before going any further.
593	MVT VT;
594	if (!isLoadTypeLegal(Ty: I->getType(), VT))
595	return false;
596
597	// See if we can handle this address.
598	Address Addr;
599	if (!PPCComputeAddress(Obj: I->getOperand(i: `0`), Addr))
600	return false;
601
602	// Look at the currently assigned register for this instruction
603	// to determine the required register class. This is necessary
604	// to constrain RA from using R0/X0 when this is not legal.
605	Register AssignedReg = FuncInfo.ValueMap [I];
606	const TargetRegisterClass *RC =
607	AssignedReg ? MRI.getRegClass(Reg: AssignedReg) : nullptr;
608
609	Register ResultReg = `0`;
610	if (!PPCEmitLoad(VT, ResultReg, Addr, RC, IsZExt: true,
611	FP64LoadOpc: Subtarget->hasSPE() ? PPC::EVLDD : PPC::LFD))
612	return false;
613	updateValueMap(I, Reg: ResultReg);
614	return true;
615	}
616
617	// Emit a store instruction to store SrcReg at Addr.
618	bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) {
619	assert(SrcReg && "Nothing to store!");
620	unsigned Opc;
621	bool UseOffset = true;
622
623	const TargetRegisterClass *RC = MRI.getRegClass(Reg: SrcReg);
624	bool Is32BitInt = RC->hasSuperClassEq(RC: &PPC::GPRCRegClass);
625
626	switch (VT.SimpleTy) {
627	default: // e.g., vector types not handled
628	return false;
629	case MVT::i8:
630	Opc = Is32BitInt ? PPC::STB : PPC::STB8;
631	break;
632	case MVT::i16:
633	Opc = Is32BitInt ? PPC::STH : PPC::STH8;
634	break;
635	case MVT::i32:
636	assert(Is32BitInt && "Not GPRC for i32??");
637	Opc = PPC::STW;
638	break;
639	case MVT::i64:
640	Opc = PPC::STD;
641	UseOffset = ((Addr.Offset & `3`) == `0`);
642	break;
643	case MVT::f32:
644	Opc = Subtarget->hasSPE() ? PPC::SPESTW : PPC::STFS;
645	break;
646	case MVT::f64:
647	Opc = Subtarget->hasSPE() ? PPC::EVSTDD : PPC::STFD;
648	break;
649	}
650
651	// If necessary, materialize the offset into a register and use
652	// the indexed form. Also handle stack pointers with special needs.
653	unsigned IndexReg = `0`;
654	PPCSimplifyAddress(Addr, UseOffset, IndexReg);
655
656	// If this is a potential VSX store with an offset of 0, a VSX indexed store
657	// can be used.
658	bool IsVSSRC = isVSSRCRegClass(RC);
659	bool IsVSFRC = isVSFRCRegClass(RC);
660	bool Is32VSXStore = IsVSSRC && Opc == PPC::STFS;
661	bool Is64VSXStore = IsVSFRC && Opc == PPC::STFD;
662	if ((Is32VSXStore \|\| Is64VSXStore) &&
663	(Addr.BaseType != Address::FrameIndexBase) && UseOffset &&
664	(Addr.Offset == `0`)) {
665	UseOffset = false;
666	}
667
668	// Note: If we still have a frame index here, we know the offset is
669	// in range, as otherwise PPCSimplifyAddress would have converted it
670	// into a RegBase.
671	if (Addr.BaseType == Address::FrameIndexBase) {
672	// VSX only provides an indexed store.
673	if (Is32VSXStore \|\| Is64VSXStore) return false;
674
675	MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
676	PtrInfo: MachinePointerInfo::getFixedStack(MF&: *FuncInfo.MF, FI: Addr.Base.FI,
677	Offset: Addr.Offset),
678	F: MachineMemOperand::MOStore, Size: MFI.getObjectSize(ObjectIdx: Addr.Base.FI),
679	BaseAlignment: MFI.getObjectAlign(ObjectIdx: Addr.Base.FI));
680
681	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc))
682	.addReg(RegNo: SrcReg)
683	.addImm(Val: Addr.Offset)
684	.addFrameIndex(Idx: Addr.Base.FI)
685	.addMemOperand(MMO);
686
687	// Base reg with offset in range.
688	} else if (UseOffset) {
689	// VSX only provides an indexed store.
690	if (Is32VSXStore \|\| Is64VSXStore)
691	return false;
692
693	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc))
694	.addReg(RegNo: SrcReg).addImm(Val: Addr.Offset).addReg(RegNo: Addr.Base.Reg);
695
696	// Indexed form.
697	} else {
698	// Get the RR opcode corresponding to the RI one. FIXME: It would be
699	// preferable to use the ImmToIdxMap from PPCRegisterInfo.cpp, but it
700	// is hard to get at.
701	switch (Opc) {
702	default: llvm_unreachable("Unexpected opcode!");
703	case PPC::STB: Opc = PPC::STBX; break;
704	case PPC::STH : Opc = PPC::STHX; break;
705	case PPC::STW : Opc = PPC::STWX; break;
706	case PPC::STB8: Opc = PPC::STBX8; break;
707	case PPC::STH8: Opc = PPC::STHX8; break;
708	case PPC::STW8: Opc = PPC::STWX8; break;
709	case PPC::STD: Opc = PPC::STDX; break;
710	case PPC::STFS: Opc = IsVSSRC ? PPC::STXSSPX : PPC::STFSX; break;
711	case PPC::STFD: Opc = IsVSFRC ? PPC::STXSDX : PPC::STFDX; break;
712	case PPC::EVSTDD: Opc = PPC::EVSTDDX; break;
713	case PPC::SPESTW: Opc = PPC::SPESTWX; break;
714	}
715
716	auto MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc))
717	.addReg(RegNo: SrcReg);
718
719	// If we have an index register defined we use it in the store inst,
720	// otherwise we use X0 as base as it makes the vector instructions to
721	// use zero in the computation of the effective address regardless the
722	// content of the register.
723	if (IndexReg)
724	MIB.addReg(RegNo: Addr.Base.Reg).addReg(RegNo: IndexReg);
725	else
726	MIB.addReg(RegNo: PPC::ZERO8).addReg(RegNo: Addr.Base.Reg);
727	}
728
729	return true;
730	}
731
732	// Attempt to fast-select a store instruction.
733	bool PPCFastISel::SelectStore(const Instruction *I) {
734	Value *Op0 = I->getOperand(i: `0`);
735	unsigned SrcReg = `0`;
736
737	// FIXME: No atomics loads are supported.
738	if (cast<StoreInst>(Val: I)->isAtomic())
739	return false;
740
741	// Verify we have a legal type before going any further.
742	MVT VT;
743	if (!isLoadTypeLegal(Ty: Op0->getType(), VT))
744	return false;
745
746	// Get the value to be stored into a register.
747	SrcReg = getRegForValue(V: Op0);
748	if (SrcReg == `0`)
749	return false;
750
751	// See if we can handle this address.
752	Address Addr;
753	if (!PPCComputeAddress(Obj: I->getOperand(i: `1`), Addr))
754	return false;
755
756	if (!PPCEmitStore(VT, SrcReg, Addr))
757	return false;
758
759	return true;
760	}
761
762	// Attempt to fast-select a branch instruction.
763	bool PPCFastISel::SelectBranch(const Instruction *I) {
764	const BranchInst *BI = cast<BranchInst>(Val: I);
765	MachineBasicBlock *BrBB = FuncInfo.MBB;
766	MachineBasicBlock *TBB = FuncInfo.MBBMap [BI->getSuccessor(i: `0`)];
767	MachineBasicBlock *FBB = FuncInfo.MBBMap [BI->getSuccessor(i: `1`)];
768
769	// For now, just try the simplest case where it's fed by a compare.
770	if (const CmpInst *CI = dyn_cast<CmpInst>(Val: BI->getCondition())) {
771	if (isValueAvailable(V: CI)) {
772	std::optional<PPC::Predicate> OptPPCPred =
773	getComparePred(Pred: CI->getPredicate());
774	if (!OptPPCPred)
775	return false;
776
777	PPC::Predicate PPCPred = *OptPPCPred;
778
779	// Take advantage of fall-through opportunities.
780	if (FuncInfo.MBB->isLayoutSuccessor(MBB: TBB)) {
781	std::swap(a&: TBB, b&: FBB);
782	PPCPred = PPC::InvertPredicate(Opcode: PPCPred);
783	}
784
785	Register CondReg = createResultReg(RC: &PPC::CRRCRegClass);
786
787	if (!PPCEmitCmp(Src1Value: CI->getOperand(i_nocapture: `0`), Src2Value: CI->getOperand(i_nocapture: `1`), isZExt: CI->isUnsigned(),
788	DestReg: CondReg, Pred: PPCPred))
789	return false;
790
791	BuildMI(BB&: *BrBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::BCC))
792	.addImm(Val: Subtarget->hasSPE() ? PPC::PRED_SPE : PPCPred)
793	.addReg(RegNo: CondReg)
794	.addMBB(MBB: TBB);
795	finishCondBranch(BranchBB: BI->getParent(), TrueMBB: TBB, FalseMBB: FBB);
796	return true;
797	}
798	} else if (const ConstantInt *CI =
799	dyn_cast<ConstantInt>(Val: BI->getCondition())) {
800	uint64_t Imm = CI->getZExtValue();
801	MachineBasicBlock *Target = (Imm == `0`) ? FBB : TBB;
802	fastEmitBranch(MSucc: Target, DbgLoc: MIMD.getDL());
803	return true;
804	}
805
806	// FIXME: ARM looks for a case where the block containing the compare
807	// has been split from the block containing the branch. If this happens,
808	// there is a vreg available containing the result of the compare. I'm
809	// not sure we can do much, as we've lost the predicate information with
810	// the compare instruction -- we have a 4-bit CR but don't know which bit
811	// to test here.
812	return false;
813	}
814
815	// Attempt to emit a compare of the two source values. Signed and unsigned
816	// comparisons are supported. Return false if we can't handle it.
817	bool PPCFastISel::PPCEmitCmp(const Value SrcValue1, const* Value *SrcValue2,
818	bool IsZExt, unsigned DestReg,
819	const PPC::Predicate Pred) {
820	Type *Ty = SrcValue1->getType();
821	EVT SrcEVT = TLI.getValueType(DL, Ty, AllowUnknown: true);
822	if (!SrcEVT.isSimple())
823	return false;
824	MVT SrcVT = SrcEVT.getSimpleVT();
825
826	if (SrcVT == MVT::i1 && Subtarget->useCRBits())
827	return false;
828
829	// See if operand 2 is an immediate encodeable in the compare.
830	// FIXME: Operands are not in canonical order at -O0, so an immediate
831	// operand in position 1 is a lost opportunity for now. We are
832	// similar to ARM in this regard.
833	int64_t Imm = `0`;
834	bool UseImm = false;
835	const bool HasSPE = Subtarget->hasSPE();
836
837	// Only 16-bit integer constants can be represented in compares for
838	// PowerPC. Others will be materialized into a register.
839	if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(Val: SrcValue2)) {
840	if (SrcVT == MVT::i64 \|\| SrcVT == MVT::i32 \|\| SrcVT == MVT::i16 \|\|
841	SrcVT == MVT::i8 \|\| SrcVT == MVT::i1) {
842	const APInt &CIVal = ConstInt->getValue();
843	Imm = (IsZExt) ? (int64_t)CIVal.getZExtValue() :
844	(int64_t)CIVal.getSExtValue();
845	if ((IsZExt && isUInt<`16`>(x: Imm)) \|\| (!IsZExt && isInt<`16`>(x: Imm)))
846	UseImm = true;
847	}
848	}
849
850	Register SrcReg1 = getRegForValue(V: SrcValue1);
851	if (SrcReg1 == `0`)
852	return false;
853
854	unsigned SrcReg2 = `0`;
855	if (!UseImm) {
856	SrcReg2 = getRegForValue(V: SrcValue2);
857	if (SrcReg2 == `0`)
858	return false;
859	}
860
861	unsigned CmpOpc;
862	bool NeedsExt = false;
863
864	auto RC1 = MRI.getRegClass(Reg: SrcReg1);
865	auto RC2 = SrcReg2 != `0` ? MRI.getRegClass(Reg: SrcReg2) : nullptr;
866
867	switch (SrcVT.SimpleTy) {
868	default: return false;
869	case MVT::f32:
870	if (HasSPE) {
871	switch (Pred) {
872	default: return false;
873	case PPC::PRED_EQ:
874	CmpOpc = PPC::EFSCMPEQ;
875	break;
876	case PPC::PRED_LT:
877	CmpOpc = PPC::EFSCMPLT;
878	break;
879	case PPC::PRED_GT:
880	CmpOpc = PPC::EFSCMPGT;
881	break;
882	}
883	} else {
884	CmpOpc = PPC::FCMPUS;
885	if (isVSSRCRegClass(RC: RC1))
886	SrcReg1 = copyRegToRegClass(ToRC: &PPC::F4RCRegClass, SrcReg: SrcReg1);
887	if (RC2 && isVSSRCRegClass(RC: RC2))
888	SrcReg2 = copyRegToRegClass(ToRC: &PPC::F4RCRegClass, SrcReg: SrcReg2);
889	}
890	break;
891	case MVT::f64:
892	if (HasSPE) {
893	switch (Pred) {
894	default: return false;
895	case PPC::PRED_EQ:
896	CmpOpc = PPC::EFDCMPEQ;
897	break;
898	case PPC::PRED_LT:
899	CmpOpc = PPC::EFDCMPLT;
900	break;
901	case PPC::PRED_GT:
902	CmpOpc = PPC::EFDCMPGT;
903	break;
904	}
905	} else if (isVSFRCRegClass(RC: RC1) \|\| (RC2 && isVSFRCRegClass(RC: RC2))) {
906	CmpOpc = PPC::XSCMPUDP;
907	} else {
908	CmpOpc = PPC::FCMPUD;
909	}
910	break;
911	case MVT::i1:
912	case MVT::i8:
913	case MVT::i16:
914	NeedsExt = true;
915	[[fallthrough]];
916	case MVT::i32:
917	if (!UseImm)
918	CmpOpc = IsZExt ? PPC::CMPLW : PPC::CMPW;
919	else
920	CmpOpc = IsZExt ? PPC::CMPLWI : PPC::CMPWI;
921	break;
922	case MVT::i64:
923	if (!UseImm)
924	CmpOpc = IsZExt ? PPC::CMPLD : PPC::CMPD;
925	else
926	CmpOpc = IsZExt ? PPC::CMPLDI : PPC::CMPDI;
927	break;
928	}
929
930	if (NeedsExt) {
931	Register ExtReg = createResultReg(RC: &PPC::GPRCRegClass);
932	if (!PPCEmitIntExt(SrcVT, SrcReg: SrcReg1, DestVT: MVT::i32, DestReg: ExtReg, IsZExt))
933	return false;
934	SrcReg1 = ExtReg;
935
936	if (!UseImm) {
937	Register ExtReg = createResultReg(RC: &PPC::GPRCRegClass);
938	if (!PPCEmitIntExt(SrcVT, SrcReg: SrcReg2, DestVT: MVT::i32, DestReg: ExtReg, IsZExt))
939	return false;
940	SrcReg2 = ExtReg;
941	}
942	}
943
944	if (!UseImm)
945	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: CmpOpc), DestReg)
946	.addReg(RegNo: SrcReg1).addReg(RegNo: SrcReg2);
947	else
948	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: CmpOpc), DestReg)
949	.addReg(RegNo: SrcReg1).addImm(Val: Imm);
950
951	return true;
952	}
953
954	// Attempt to fast-select a floating-point extend instruction.
955	bool PPCFastISel::SelectFPExt(const Instruction *I) {
956	Value *Src = I->getOperand(i: `0`);
957	EVT SrcVT = TLI.getValueType(DL, Ty: Src->getType(), AllowUnknown: true);
958	EVT DestVT = TLI.getValueType(DL, Ty: I->getType(), AllowUnknown: true);
959
960	if (SrcVT != MVT::f32 \|\| DestVT != MVT::f64)
961	return false;
962
963	Register SrcReg = getRegForValue(V: Src);
964	if (!SrcReg)
965	return false;
966
967	// No code is generated for a FP extend.
968	updateValueMap(I, Reg: SrcReg);
969	return true;
970	}
971
972	// Attempt to fast-select a floating-point truncate instruction.
973	bool PPCFastISel::SelectFPTrunc(const Instruction *I) {
974	Value *Src = I->getOperand(i: `0`);
975	EVT SrcVT = TLI.getValueType(DL, Ty: Src->getType(), AllowUnknown: true);
976	EVT DestVT = TLI.getValueType(DL, Ty: I->getType(), AllowUnknown: true);
977
978	if (SrcVT != MVT::f64 \|\| DestVT != MVT::f32)
979	return false;
980
981	Register SrcReg = getRegForValue(V: Src);
982	if (!SrcReg)
983	return false;
984
985	// Round the result to single precision.
986	unsigned DestReg;
987	auto RC = MRI.getRegClass(Reg: SrcReg);
988	if (Subtarget->hasSPE()) {
989	DestReg = createResultReg(RC: &PPC::GPRCRegClass);
990	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::EFSCFD),
991	DestReg)
992	.addReg(RegNo: SrcReg);
993	} else if (Subtarget->hasP8Vector() && isVSFRCRegClass(RC)) {
994	DestReg = createResultReg(RC: &PPC::VSSRCRegClass);
995	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::XSRSP),
996	DestReg)
997	.addReg(RegNo: SrcReg);
998	} else {
999	SrcReg = copyRegToRegClass(ToRC: &PPC::F8RCRegClass, SrcReg);
1000	DestReg = createResultReg(RC: &PPC::F4RCRegClass);
1001	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1002	MCID: TII.get(Opcode: PPC::FRSP), DestReg)
1003	.addReg(RegNo: SrcReg);
1004	}
1005
1006	updateValueMap(I, Reg: DestReg);
1007	return true;
1008	}
1009
1010	// Move an i32 or i64 value in a GPR to an f64 value in an FPR.
1011	// FIXME: When direct register moves are implemented (see PowerISA 2.07),
1012	// those should be used instead of moving via a stack slot when the
1013	// subtarget permits.
1014	// FIXME: The code here is sloppy for the 4-byte case. Can use a 4-byte
1015	// stack slot and 4-byte store/load sequence. Or just sext the 4-byte
1016	// case to 8 bytes which produces tighter code but wastes stack space.
1017	unsigned PPCFastISel::PPCMoveToFPReg(MVT SrcVT, unsigned SrcReg,
1018	bool IsSigned) {
1019
1020	// If necessary, extend 32-bit int to 64-bit.
1021	if (SrcVT == MVT::i32) {
1022	Register TmpReg = createResultReg(RC: &PPC::G8RCRegClass);
1023	if (!PPCEmitIntExt(SrcVT: MVT::i32, SrcReg, DestVT: MVT::i64, DestReg: TmpReg, IsZExt: !IsSigned))
1024	return `0`;
1025	SrcReg = TmpReg;
1026	}
1027
1028	// Get a stack slot 8 bytes wide, aligned on an 8-byte boundary.
1029	Address Addr;
1030	Addr.BaseType = Address::FrameIndexBase;
1031	Addr.Base.FI = MFI.CreateStackObject(Size: `8`, Alignment: Align (`8`), isSpillSlot: false);
1032
1033	// Store the value from the GPR.
1034	if (!PPCEmitStore(VT: MVT::i64, SrcReg, Addr))
1035	return `0`;
1036
1037	// Load the integer value into an FPR. The kind of load used depends
1038	// on a number of conditions.
1039	unsigned LoadOpc = PPC::LFD;
1040
1041	if (SrcVT == MVT::i32) {
1042	if (!IsSigned) {
1043	LoadOpc = PPC::LFIWZX;
1044	Addr.Offset = (Subtarget->isLittleEndian()) ? `0` : `4`;
1045	} else if (Subtarget->hasLFIWAX()) {
1046	LoadOpc = PPC::LFIWAX;
1047	Addr.Offset = (Subtarget->isLittleEndian()) ? `0` : `4`;
1048	}
1049	}
1050
1051	const TargetRegisterClass *RC = &PPC::F8RCRegClass;
1052	Register ResultReg = `0`;
1053	if (!PPCEmitLoad(VT: MVT::f64, ResultReg, Addr, RC, IsZExt: !IsSigned, FP64LoadOpc: LoadOpc))
1054	return `0`;
1055
1056	return ResultReg;
1057	}
1058
1059	// Attempt to fast-select an integer-to-floating-point conversion.
1060	// FIXME: Once fast-isel has better support for VSX, conversions using
1061	// direct moves should be implemented.
1062	bool PPCFastISel::SelectIToFP(const Instruction I, bool* IsSigned) {
1063	MVT DstVT;
1064	Type *DstTy = I->getType();
1065	if (!isTypeLegal(Ty: DstTy, VT&: DstVT))
1066	return false;
1067
1068	if (DstVT != MVT::f32 && DstVT != MVT::f64)
1069	return false;
1070
1071	Value *Src = I->getOperand(i: `0`);
1072	EVT SrcEVT = TLI.getValueType(DL, Ty: Src->getType(), AllowUnknown: true);
1073	if (!SrcEVT.isSimple())
1074	return false;
1075
1076	MVT SrcVT = SrcEVT.getSimpleVT();
1077
1078	if (SrcVT != MVT::i8 && SrcVT != MVT::i16 &&
1079	SrcVT != MVT::i32 && SrcVT != MVT::i64)
1080	return false;
1081
1082	Register SrcReg = getRegForValue(V: Src);
1083	if (SrcReg == `0`)
1084	return false;
1085
1086	// Shortcut for SPE. Doesn't need to store/load, since it's all in the GPRs
1087	if (Subtarget->hasSPE()) {
1088	unsigned Opc;
1089	if (DstVT == MVT::f32)
1090	Opc = IsSigned ? PPC::EFSCFSI : PPC::EFSCFUI;
1091	else
1092	Opc = IsSigned ? PPC::EFDCFSI : PPC::EFDCFUI;
1093
1094	Register DestReg = createResultReg(RC: &PPC::SPERCRegClass);
1095	// Generate the convert.
1096	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg)
1097	.addReg(RegNo: SrcReg);
1098	updateValueMap(I, Reg: DestReg);
1099	return true;
1100	}
1101
1102	// We can only lower an unsigned convert if we have the newer
1103	// floating-point conversion operations.
1104	if (!IsSigned && !Subtarget->hasFPCVT())
1105	return false;
1106
1107	// FIXME: For now we require the newer floating-point conversion operations
1108	// (which are present only on P7 and A2 server models) when converting
1109	// to single-precision float. Otherwise we have to generate a lot of
1110	// fiddly code to avoid double rounding. If necessary, the fiddly code
1111	// can be found in PPCTargetLowering::LowerINT_TO_FP().
1112	if (DstVT == MVT::f32 && !Subtarget->hasFPCVT())
1113	return false;
1114
1115	// Extend the input if necessary.
1116	if (SrcVT == MVT::i8 \|\| SrcVT == MVT::i16) {
1117	Register TmpReg = createResultReg(RC: &PPC::G8RCRegClass);
1118	if (!PPCEmitIntExt(SrcVT, SrcReg, DestVT: MVT::i64, DestReg: TmpReg, IsZExt: !IsSigned))
1119	return false;
1120	SrcVT = MVT::i64;
1121	SrcReg = TmpReg;
1122	}
1123
1124	// Move the integer value to an FPR.
1125	unsigned FPReg = PPCMoveToFPReg(SrcVT, SrcReg, IsSigned);
1126	if (FPReg == `0`)
1127	return false;
1128
1129	// Determine the opcode for the conversion.
1130	const TargetRegisterClass *RC = &PPC::F8RCRegClass;
1131	Register DestReg = createResultReg(RC);
1132	unsigned Opc;
1133
1134	if (DstVT == MVT::f32)
1135	Opc = IsSigned ? PPC::FCFIDS : PPC::FCFIDUS;
1136	else
1137	Opc = IsSigned ? PPC::FCFID : PPC::FCFIDU;
1138
1139	// Generate the convert.
1140	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg)
1141	.addReg(RegNo: FPReg);
1142
1143	updateValueMap(I, Reg: DestReg);
1144	return true;
1145	}
1146
1147	// Move the floating-point value in SrcReg into an integer destination
1148	// register, and return the register (or zero if we can't handle it).
1149	// FIXME: When direct register moves are implemented (see PowerISA 2.07),
1150	// those should be used instead of moving via a stack slot when the
1151	// subtarget permits.
1152	unsigned PPCFastISel::PPCMoveToIntReg(const Instruction *I, MVT VT,
1153	unsigned SrcReg, bool IsSigned) {
1154	// Get a stack slot 8 bytes wide, aligned on an 8-byte boundary.
1155	// Note that if have STFIWX available, we could use a 4-byte stack
1156	// slot for i32, but this being fast-isel we'll just go with the
1157	// easiest code gen possible.
1158	Address Addr;
1159	Addr.BaseType = Address::FrameIndexBase;
1160	Addr.Base.FI = MFI.CreateStackObject(Size: `8`, Alignment: Align (`8`), isSpillSlot: false);
1161
1162	// Store the value from the FPR.
1163	if (!PPCEmitStore(VT: MVT::f64, SrcReg, Addr))
1164	return `0`;
1165
1166	// Reload it into a GPR. If we want an i32 on big endian, modify the
1167	// address to have a 4-byte offset so we load from the right place.
1168	if (VT == MVT::i32)
1169	Addr.Offset = (Subtarget->isLittleEndian()) ? `0` : `4`;
1170
1171	// Look at the currently assigned register for this instruction
1172	// to determine the required register class.
1173	Register AssignedReg = FuncInfo.ValueMap [I];
1174	const TargetRegisterClass *RC =
1175	AssignedReg ? MRI.getRegClass(Reg: AssignedReg) : nullptr;
1176
1177	Register ResultReg = `0`;
1178	if (!PPCEmitLoad(VT, ResultReg, Addr, RC, IsZExt: !IsSigned))
1179	return `0`;
1180
1181	return ResultReg;
1182	}
1183
1184	// Attempt to fast-select a floating-point-to-integer conversion.
1185	// FIXME: Once fast-isel has better support for VSX, conversions using
1186	// direct moves should be implemented.
1187	bool PPCFastISel::SelectFPToI(const Instruction I, bool* IsSigned) {
1188	MVT DstVT, SrcVT;
1189	Type *DstTy = I->getType();
1190	if (!isTypeLegal(Ty: DstTy, VT&: DstVT))
1191	return false;
1192
1193	if (DstVT != MVT::i32 && DstVT != MVT::i64)
1194	return false;
1195
1196	// If we don't have FCTIDUZ, or SPE, and we need it, punt to SelectionDAG.
1197	if (DstVT == MVT::i64 && !IsSigned && !Subtarget->hasFPCVT() &&
1198	!Subtarget->hasSPE())
1199	return false;
1200
1201	Value *Src = I->getOperand(i: `0`);
1202	Type *SrcTy = Src->getType();
1203	if (!isTypeLegal(Ty: SrcTy, VT&: SrcVT))
1204	return false;
1205
1206	if (SrcVT != MVT::f32 && SrcVT != MVT::f64)
1207	return false;
1208
1209	Register SrcReg = getRegForValue(V: Src);
1210	if (SrcReg == `0`)
1211	return false;
1212
1213	// Convert f32 to f64 or convert VSSRC to VSFRC if necessary. This is just a
1214	// meaningless copy to get the register class right.
1215	const TargetRegisterClass *InRC = MRI.getRegClass(Reg: SrcReg);
1216	if (InRC == &PPC::F4RCRegClass)
1217	SrcReg = copyRegToRegClass(ToRC: &PPC::F8RCRegClass, SrcReg);
1218	else if (InRC == &PPC::VSSRCRegClass)
1219	SrcReg = copyRegToRegClass(ToRC: &PPC::VSFRCRegClass, SrcReg);
1220
1221	// Determine the opcode for the conversion, which takes place
1222	// entirely within FPRs or VSRs.
1223	unsigned DestReg;
1224	unsigned Opc;
1225	auto RC = MRI.getRegClass(Reg: SrcReg);
1226
1227	if (Subtarget->hasSPE()) {
1228	DestReg = createResultReg(RC: &PPC::GPRCRegClass);
1229	if (IsSigned)
1230	Opc = InRC == &PPC::GPRCRegClass ? PPC::EFSCTSIZ : PPC::EFDCTSIZ;
1231	else
1232	Opc = InRC == &PPC::GPRCRegClass ? PPC::EFSCTUIZ : PPC::EFDCTUIZ;
1233	} else if (isVSFRCRegClass(RC)) {
1234	DestReg = createResultReg(RC: &PPC::VSFRCRegClass);
1235	if (DstVT == MVT::i32)
1236	Opc = IsSigned ? PPC::XSCVDPSXWS : PPC::XSCVDPUXWS;
1237	else
1238	Opc = IsSigned ? PPC::XSCVDPSXDS : PPC::XSCVDPUXDS;
1239	} else {
1240	DestReg = createResultReg(RC: &PPC::F8RCRegClass);
1241	if (DstVT == MVT::i32)
1242	if (IsSigned)
1243	Opc = PPC::FCTIWZ;
1244	else
1245	Opc = Subtarget->hasFPCVT() ? PPC::FCTIWUZ : PPC::FCTIDZ;
1246	else
1247	Opc = IsSigned ? PPC::FCTIDZ : PPC::FCTIDUZ;
1248	}
1249
1250	// Generate the convert.
1251	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg)
1252	.addReg(RegNo: SrcReg);
1253
1254	// Now move the integer value from a float register to an integer register.
1255	unsigned IntReg = Subtarget->hasSPE()
1256	? DestReg
1257	: PPCMoveToIntReg(I, VT: DstVT, SrcReg: DestReg, IsSigned);
1258
1259	if (IntReg == `0`)
1260	return false;
1261
1262	updateValueMap(I, Reg: IntReg);
1263	return true;
1264	}
1265
1266	// Attempt to fast-select a binary integer operation that isn't already
1267	// handled automatically.
1268	bool PPCFastISel::SelectBinaryIntOp(const Instruction I, unsigned* ISDOpcode) {
1269	EVT DestVT = TLI.getValueType(DL, Ty: I->getType(), AllowUnknown: true);
1270
1271	// We can get here in the case when we have a binary operation on a non-legal
1272	// type and the target independent selector doesn't know how to handle it.
1273	if (DestVT != MVT::i16 && DestVT != MVT::i8)
1274	return false;
1275
1276	// Look at the currently assigned register for this instruction
1277	// to determine the required register class. If there is no register,
1278	// make a conservative choice (don't assign R0).
1279	Register AssignedReg = FuncInfo.ValueMap [I];
1280	const TargetRegisterClass *RC =
1281	(AssignedReg ? MRI.getRegClass(Reg: AssignedReg) :
1282	&PPC::GPRC_and_GPRC_NOR0RegClass);
1283	bool IsGPRC = RC->hasSuperClassEq(RC: &PPC::GPRCRegClass);
1284
1285	unsigned Opc;
1286	switch (ISDOpcode) {
1287	default: return false;
1288	case ISD::ADD:
1289	Opc = IsGPRC ? PPC::ADD4 : PPC::ADD8;
1290	break;
1291	case ISD::OR:
1292	Opc = IsGPRC ? PPC::OR : PPC::OR8;
1293	break;
1294	case ISD::SUB:
1295	Opc = IsGPRC ? PPC::SUBF : PPC::SUBF8;
1296	break;
1297	}
1298
1299	Register ResultReg = createResultReg(RC: RC ? RC : &PPC::G8RCRegClass);
1300	Register SrcReg1 = getRegForValue(V: I->getOperand(i: `0`));
1301	if (SrcReg1 == `0`) return false;
1302
1303	// Handle case of small immediate operand.
1304	if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(Val: I->getOperand(i: `1`))) {
1305	const APInt &CIVal = ConstInt->getValue();
1306	int Imm = (int)CIVal.getSExtValue();
1307	bool UseImm = true;
1308	if (isInt<`16`>(x: Imm)) {
1309	switch (Opc) {
1310	default:
1311	llvm_unreachable("Missing case!");
1312	case PPC::ADD4:
1313	Opc = PPC::ADDI;
1314	MRI.setRegClass(Reg: SrcReg1, RC: &PPC::GPRC_and_GPRC_NOR0RegClass);
1315	break;
1316	case PPC::ADD8:
1317	Opc = PPC::ADDI8;
1318	MRI.setRegClass(Reg: SrcReg1, RC: &PPC::G8RC_and_G8RC_NOX0RegClass);
1319	break;
1320	case PPC::OR:
1321	Opc = PPC::ORI;
1322	break;
1323	case PPC::OR8:
1324	Opc = PPC::ORI8;
1325	break;
1326	case PPC::SUBF:
1327	if (Imm == -`32768`)
1328	UseImm = false;
1329	else {
1330	Opc = PPC::ADDI;
1331	MRI.setRegClass(Reg: SrcReg1, RC: &PPC::GPRC_and_GPRC_NOR0RegClass);
1332	Imm = -Imm;
1333	}
1334	break;
1335	case PPC::SUBF8:
1336	if (Imm == -`32768`)
1337	UseImm = false;
1338	else {
1339	Opc = PPC::ADDI8;
1340	MRI.setRegClass(Reg: SrcReg1, RC: &PPC::G8RC_and_G8RC_NOX0RegClass);
1341	Imm = -Imm;
1342	}
1343	break;
1344	}
1345
1346	if (UseImm) {
1347	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc),
1348	DestReg: ResultReg)
1349	.addReg(RegNo: SrcReg1)
1350	.addImm(Val: Imm);
1351	updateValueMap(I, Reg: ResultReg);
1352	return true;
1353	}
1354	}
1355	}
1356
1357	// Reg-reg case.
1358	Register SrcReg2 = getRegForValue(V: I->getOperand(i: `1`));
1359	if (SrcReg2 == `0`) return false;
1360
1361	// Reverse operands for subtract-from.
1362	if (ISDOpcode == ISD::SUB)
1363	std::swap(a&: SrcReg1, b&: SrcReg2);
1364
1365	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg: ResultReg)
1366	.addReg(RegNo: SrcReg1).addReg(RegNo: SrcReg2);
1367	updateValueMap(I, Reg: ResultReg);
1368	return true;
1369	}
1370
1371	// Handle arguments to a call that we're attempting to fast-select.
1372	// Return false if the arguments are too complex for us at the moment.
1373	bool PPCFastISel::processCallArgs(SmallVectorImpl<Value*> &Args,
1374	SmallVectorImpl<unsigned> &ArgRegs,
1375	SmallVectorImpl<MVT> &ArgVTs,
1376	SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
1377	SmallVectorImpl<unsigned> &RegArgs,
1378	CallingConv::ID CC,
1379	unsigned &NumBytes,
1380	bool IsVarArg) {
1381	SmallVector<CCValAssign, `16`> ArgLocs;
1382	CCState CCInfo(CC, IsVarArg, FuncInfo.MF, ArgLocs, Context);
1383
1384	// Reserve space for the linkage area on the stack.
1385	unsigned LinkageSize = Subtarget->getFrameLowering()->getLinkageSize();
1386	CCInfo.AllocateStack(Size: LinkageSize, Alignment: Align (`8`));
1387
1388	CCInfo.AnalyzeCallOperands(ArgVTs, Flags&: ArgFlags, Fn: CC_PPC64_ELF_FIS);
1389
1390	// Bail out if we can't handle any of the arguments.
1391	for (const CCValAssign &VA : ArgLocs) {
1392	MVT ArgVT = ArgVTs [VA.getValNo()];
1393
1394	// Skip vector arguments for now, as well as long double and
1395	// uint128_t, and anything that isn't passed in a register.
1396	if (ArgVT.isVector() \|\| ArgVT.getSizeInBits() > `64` \|\| ArgVT == MVT::i1 \|\|
1397	!VA.isRegLoc() \|\| VA.needsCustom())
1398	return false;
1399
1400	// Skip bit-converted arguments for now.
1401	if (VA.getLocInfo() == CCValAssign::BCvt)
1402	return false;
1403	}
1404
1405	// Get a count of how many bytes are to be pushed onto the stack.
1406	NumBytes = CCInfo.getStackSize();
1407
1408	// The prolog code of the callee may store up to 8 GPR argument registers to
1409	// the stack, allowing va_start to index over them in memory if its varargs.
1410	// Because we cannot tell if this is needed on the caller side, we have to
1411	// conservatively assume that it is needed. As such, make sure we have at
1412	// least enough stack space for the caller to store the 8 GPRs.
1413	// FIXME: On ELFv2, it may be unnecessary to allocate the parameter area.
1414	NumBytes = std::max(a: NumBytes, b: LinkageSize + `64`);
1415
1416	// Issue CALLSEQ_START.
1417	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1418	MCID: TII.get(Opcode: TII.getCallFrameSetupOpcode()))
1419	.addImm(Val: NumBytes).addImm(Val: `0`);
1420
1421	// Prepare to assign register arguments. Every argument uses up a
1422	// GPR protocol register even if it's passed in a floating-point
1423	// register (unless we're using the fast calling convention).
1424	unsigned NextGPR = PPC::X3;
1425	unsigned NextFPR = PPC::F1;
1426
1427	// Process arguments.
1428	for (const CCValAssign &VA : ArgLocs) {
1429	unsigned Arg = ArgRegs [VA.getValNo()];
1430	MVT ArgVT = ArgVTs [VA.getValNo()];
1431
1432	// Handle argument promotion and bitcasts.
1433	switch (VA.getLocInfo()) {
1434	default:
1435	llvm_unreachable("Unknown loc info!");
1436	case CCValAssign::Full:
1437	break;
1438	case CCValAssign::SExt: {
1439	MVT DestVT = VA.getLocVT();
1440	const TargetRegisterClass *RC =
1441	(DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1442	Register TmpReg = createResultReg(RC);
1443	if (!PPCEmitIntExt(SrcVT: ArgVT, SrcReg: Arg, DestVT, DestReg: TmpReg, /IsZExt/false))
1444	llvm_unreachable("Failed to emit a sext!");
1445	ArgVT = DestVT;
1446	Arg = TmpReg;
1447	break;
1448	}
1449	case CCValAssign::AExt:
1450	case CCValAssign::ZExt: {
1451	MVT DestVT = VA.getLocVT();
1452	const TargetRegisterClass *RC =
1453	(DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1454	Register TmpReg = createResultReg(RC);
1455	if (!PPCEmitIntExt(SrcVT: ArgVT, SrcReg: Arg, DestVT, DestReg: TmpReg, /IsZExt/true))
1456	llvm_unreachable("Failed to emit a zext!");
1457	ArgVT = DestVT;
1458	Arg = TmpReg;
1459	break;
1460	}
1461	case CCValAssign::BCvt: {
1462	// FIXME: Not yet handled.
1463	llvm_unreachable("Should have bailed before getting here!");
1464	break;
1465	}
1466	}
1467
1468	// Copy this argument to the appropriate register.
1469	unsigned ArgReg;
1470	if (ArgVT == MVT::f32 \|\| ArgVT == MVT::f64) {
1471	ArgReg = NextFPR++;
1472	if (CC != CallingConv::Fast)
1473	++NextGPR;
1474	} else
1475	ArgReg = NextGPR++;
1476
1477	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1478	MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: ArgReg).addReg(RegNo: Arg);
1479	RegArgs.push_back(Elt: ArgReg);
1480	}
1481
1482	return true;
1483	}
1484
1485	// For a call that we've determined we can fast-select, finish the
1486	// call sequence and generate a copy to obtain the return value (if any).
1487	bool PPCFastISel::finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumBytes) {
1488	CallingConv::ID CC = CLI.CallConv;
1489
1490	// Issue CallSEQ_END.
1491	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1492	MCID: TII.get(Opcode: TII.getCallFrameDestroyOpcode()))
1493	.addImm(Val: NumBytes).addImm(Val: `0`);
1494
1495	// Next, generate a copy to obtain the return value.
1496	// FIXME: No multi-register return values yet, though I don't foresee
1497	// any real difficulties there.
1498	if (RetVT != MVT::isVoid) {
1499	SmallVector<CCValAssign, `16`> RVLocs;
1500	CCState CCInfo(CC, false, FuncInfo.MF, RVLocs, Context);
1501	CCInfo.AnalyzeCallResult(VT: RetVT, Fn: RetCC_PPC64_ELF_FIS);
1502	CCValAssign &VA = RVLocs [`0`];
1503	assert(RVLocs.size() == `1` && "No support for multi-reg return values!");
1504	assert(VA.isRegLoc() && "Can only return in registers!");
1505
1506	MVT DestVT = VA.getValVT();
1507	MVT CopyVT = DestVT;
1508
1509	// Ints smaller than a register still arrive in a full 64-bit
1510	// register, so make sure we recognize this.
1511	if (RetVT == MVT::i8 \|\| RetVT == MVT::i16 \|\| RetVT == MVT::i32)
1512	CopyVT = MVT::i64;
1513
1514	unsigned SourcePhysReg = VA.getLocReg();
1515	unsigned ResultReg = `0`;
1516
1517	if (RetVT == CopyVT) {
1518	const TargetRegisterClass *CpyRC = TLI.getRegClassFor(VT: CopyVT);
1519	ResultReg = copyRegToRegClass(ToRC: CpyRC, SrcReg: SourcePhysReg);
1520
1521	// If necessary, round the floating result to single precision.
1522	} else if (CopyVT == MVT::f64) {
1523	ResultReg = createResultReg(RC: TLI.getRegClassFor(VT: RetVT));
1524	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::FRSP),
1525	DestReg: ResultReg).addReg(RegNo: SourcePhysReg);
1526
1527	// If only the low half of a general register is needed, generate
1528	// a GPRC copy instead of a G8RC copy. (EXTRACT_SUBREG can't be
1529	// used along the fast-isel path (not lowered), and downstream logic
1530	// also doesn't like a direct subreg copy on a physical reg.)
1531	} else if (RetVT == MVT::i8 \|\| RetVT == MVT::i16 \|\| RetVT == MVT::i32) {
1532	// Convert physical register from G8RC to GPRC.
1533	SourcePhysReg -= PPC::X0 - PPC::R0;
1534	ResultReg = copyRegToRegClass(ToRC: &PPC::GPRCRegClass, SrcReg: SourcePhysReg);
1535	}
1536
1537	assert(ResultReg && "ResultReg unset!");
1538	CLI.InRegs.push_back(Elt: SourcePhysReg);
1539	CLI.ResultReg = ResultReg;
1540	CLI.NumResultRegs = `1`;
1541	}
1542
1543	return true;
1544	}
1545
1546	bool PPCFastISel::fastLowerCall(CallLoweringInfo &CLI) {
1547	CallingConv::ID CC = CLI.CallConv;
1548	bool IsTailCall = CLI.IsTailCall;
1549	bool IsVarArg = CLI.IsVarArg;
1550	const Value *Callee = CLI.Callee;
1551	const MCSymbol *Symbol = CLI.Symbol;
1552
1553	if (!Callee && !Symbol)
1554	return false;
1555
1556	// Allow SelectionDAG isel to handle tail calls and long calls.
1557	if (IsTailCall \|\| Subtarget->useLongCalls())
1558	return false;
1559
1560	// Let SDISel handle vararg functions.
1561	if (IsVarArg)
1562	return false;
1563
1564	// If this is a PC-Rel function, let SDISel handle the call.
1565	if (Subtarget->isUsingPCRelativeCalls())
1566	return false;
1567
1568	// Handle simple calls for now, with legal return types and
1569	// those that can be extended.
1570	Type *RetTy = CLI.RetTy;
1571	MVT RetVT;
1572	if (RetTy->isVoidTy())
1573	RetVT = MVT::isVoid;
1574	else if (!isTypeLegal(Ty: RetTy, VT&: RetVT) && RetVT != MVT::i16 &&
1575	RetVT != MVT::i8)
1576	return false;
1577	else if (RetVT == MVT::i1 && Subtarget->useCRBits())
1578	// We can't handle boolean returns when CR bits are in use.
1579	return false;
1580
1581	// FIXME: No multi-register return values yet.
1582	if (RetVT != MVT::isVoid && RetVT != MVT::i8 && RetVT != MVT::i16 &&
1583	RetVT != MVT::i32 && RetVT != MVT::i64 && RetVT != MVT::f32 &&
1584	RetVT != MVT::f64) {
1585	SmallVector<CCValAssign, `16`> RVLocs;
1586	CCState CCInfo(CC, IsVarArg, FuncInfo.MF, RVLocs, Context);
1587	CCInfo.AnalyzeCallResult(VT: RetVT, Fn: RetCC_PPC64_ELF_FIS);
1588	if (RVLocs.size() > `1`)
1589	return false;
1590	}
1591
1592	// Bail early if more than 8 arguments, as we only currently
1593	// handle arguments passed in registers.
1594	unsigned NumArgs = CLI.OutVals.size();
1595	if (NumArgs > `8`)
1596	return false;
1597
1598	// Set up the argument vectors.
1599	SmallVector<Value*, `8`> Args;
1600	SmallVector<unsigned, `8`> ArgRegs;
1601	SmallVector<MVT, `8`> ArgVTs;
1602	SmallVector<ISD::ArgFlagsTy, `8`> ArgFlags;
1603
1604	Args.reserve(N: NumArgs);
1605	ArgRegs.reserve(N: NumArgs);
1606	ArgVTs.reserve(N: NumArgs);
1607	ArgFlags.reserve(N: NumArgs);
1608
1609	for (unsigned i = `0`, ie = NumArgs; i != ie; ++i) {
1610	// Only handle easy calls for now. It would be reasonably easy
1611	// to handle <= 8-byte structures passed ByVal in registers, but we
1612	// have to ensure they are right-justified in the register.
1613	ISD::ArgFlagsTy Flags = CLI.OutFlags [i];
1614	if (Flags.isInReg() \|\| Flags.isSRet() \|\| Flags.isNest() \|\| Flags.isByVal())
1615	return false;
1616
1617	Value *ArgValue = CLI.OutVals [i];
1618	Type *ArgTy = ArgValue->getType();
1619	MVT ArgVT;
1620	if (!isTypeLegal(Ty: ArgTy, VT&: ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8)
1621	return false;
1622
1623	// FIXME: FastISel cannot handle non-simple types yet, including 128-bit FP
1624	// types, which is passed through vector register. Skip these types and
1625	// fallback to default SelectionDAG based selection.
1626	if (ArgVT.isVector() \|\| ArgVT == MVT::f128)
1627	return false;
1628
1629	Register Arg = getRegForValue(V: ArgValue);
1630	if (Arg == `0`)
1631	return false;
1632
1633	Args.push_back(Elt: ArgValue);
1634	ArgRegs.push_back(Elt: Arg);
1635	ArgVTs.push_back(Elt: ArgVT);
1636	ArgFlags.push_back(Elt: Flags);
1637	}
1638
1639	// Process the arguments.
1640	SmallVector<unsigned, `8`> RegArgs;
1641	unsigned NumBytes;
1642
1643	if (!processCallArgs(Args, ArgRegs, ArgVTs, ArgFlags,
1644	RegArgs, CC, NumBytes, IsVarArg))
1645	return false;
1646
1647	MachineInstrBuilder MIB;
1648	// FIXME: No handling for function pointers yet. This requires
1649	// implementing the function descriptor (OPD) setup.
1650	const GlobalValue *GV = dyn_cast<GlobalValue>(Val: Callee);
1651	if (!GV) {
1652	// patchpoints are a special case; they always dispatch to a pointer value.
1653	// However, we don't actually want to generate the indirect call sequence
1654	// here (that will be generated, as necessary, during asm printing), and
1655	// the call we generate here will be erased by FastISel::selectPatchpoint,
1656	// so don't try very hard...
1657	if (CLI.IsPatchPoint)
1658	MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::NOP));
1659	else
1660	return false;
1661	} else {
1662	// Build direct call with NOP for TOC restore.
1663	// FIXME: We can and should optimize away the NOP for local calls.
1664	MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1665	MCID: TII.get(Opcode: PPC::BL8_NOP));
1666	// Add callee.
1667	MIB.addGlobalAddress(GV);
1668	}
1669
1670	// Add implicit physical register uses to the call.
1671	for (unsigned Reg : RegArgs)
1672	MIB.addReg(RegNo: Reg, flags: RegState::Implicit);
1673
1674	// Direct calls, in both the ELF V1 and V2 ABIs, need the TOC register live
1675	// into the call.
1676	PPCFuncInfo->setUsesTOCBasePtr();
1677	MIB.addReg(RegNo: PPC::X2, flags: RegState::Implicit);
1678
1679	// Add a register mask with the call-preserved registers. Proper
1680	// defs for return values will be added by setPhysRegsDeadExcept().
1681	MIB.addRegMask(Mask: TRI.getCallPreservedMask(MF: *FuncInfo.MF, CC));
1682
1683	CLI.Call = MIB;
1684
1685	// Finish off the call including any return values.
1686	return finishCall(RetVT, CLI, NumBytes);
1687	}
1688
1689	// Attempt to fast-select a return instruction.
1690	bool PPCFastISel::SelectRet(const Instruction *I) {
1691
1692	if (!FuncInfo.CanLowerReturn)
1693	return false;
1694
1695	const ReturnInst *Ret = cast<ReturnInst>(Val: I);
1696	const Function &F = *I->getParent()->getParent();
1697
1698	// Build a list of return value registers.
1699	SmallVector<unsigned, `4`> RetRegs;
1700	CallingConv::ID CC = F.getCallingConv();
1701
1702	if (Ret->getNumOperands() > `0`) {
1703	SmallVector<ISD::OutputArg, `4`> Outs;
1704	GetReturnInfo(CC, ReturnType: F.getReturnType(), attr: F.getAttributes(), Outs, TLI, DL);
1705
1706	// Analyze operands of the call, assigning locations to each operand.
1707	SmallVector<CCValAssign, `16`> ValLocs;
1708	CCState CCInfo(CC, F.isVarArg(), FuncInfo.MF, ValLocs, Context);
1709	CCInfo.AnalyzeReturn(Outs, Fn: RetCC_PPC64_ELF_FIS);
1710	const Value *RV = Ret->getOperand(i_nocapture: `0`);
1711
1712	// FIXME: Only one output register for now.
1713	if (ValLocs.size() > `1`)
1714	return false;
1715
1716	// Special case for returning a constant integer of any size - materialize
1717	// the constant as an i64 and copy it to the return register.
1718	if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val: RV)) {
1719	CCValAssign &VA = ValLocs [`0`];
1720
1721	Register RetReg = VA.getLocReg();
1722	// We still need to worry about properly extending the sign. For example,
1723	// we could have only a single bit or a constant that needs zero
1724	// extension rather than sign extension. Make sure we pass the return
1725	// value extension property to integer materialization.
1726	unsigned SrcReg =
1727	PPCMaterializeInt(CI, VT: MVT::i64, UseSExt: VA.getLocInfo() != CCValAssign::ZExt);
1728
1729	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1730	MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: RetReg).addReg(RegNo: SrcReg);
1731
1732	RetRegs.push_back(Elt: RetReg);
1733
1734	} else {
1735	Register Reg = getRegForValue(V: RV);
1736
1737	if (Reg == `0`)
1738	return false;
1739
1740	// Copy the result values into the output registers.
1741	for (unsigned i = `0`; i < ValLocs.size(); ++i) {
1742
1743	CCValAssign &VA = ValLocs [i];
1744	assert(VA.isRegLoc() && "Can only return in registers!");
1745	RetRegs.push_back(Elt: VA.getLocReg());
1746	unsigned SrcReg = Reg + VA.getValNo();
1747
1748	EVT RVEVT = TLI.getValueType(DL, Ty: RV->getType());
1749	if (!RVEVT.isSimple())
1750	return false;
1751	MVT RVVT = RVEVT.getSimpleVT();
1752	MVT DestVT = VA.getLocVT();
1753
1754	if (RVVT != DestVT && RVVT != MVT::i8 &&
1755	RVVT != MVT::i16 && RVVT != MVT::i32)
1756	return false;
1757
1758	if (RVVT != DestVT) {
1759	switch (VA.getLocInfo()) {
1760	default:
1761	llvm_unreachable("Unknown loc info!");
1762	case CCValAssign::Full:
1763	llvm_unreachable("Full value assign but types don't match?");
1764	case CCValAssign::AExt:
1765	case CCValAssign::ZExt: {
1766	const TargetRegisterClass *RC =
1767	(DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1768	Register TmpReg = createResultReg(RC);
1769	if (!PPCEmitIntExt(SrcVT: RVVT, SrcReg, DestVT, DestReg: TmpReg, IsZExt: true))
1770	return false;
1771	SrcReg = TmpReg;
1772	break;
1773	}
1774	case CCValAssign::SExt: {
1775	const TargetRegisterClass *RC =
1776	(DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1777	Register TmpReg = createResultReg(RC);
1778	if (!PPCEmitIntExt(SrcVT: RVVT, SrcReg, DestVT, DestReg: TmpReg, IsZExt: false))
1779	return false;
1780	SrcReg = TmpReg;
1781	break;
1782	}
1783	}
1784	}
1785
1786	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1787	MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: RetRegs [i])
1788	.addReg(RegNo: SrcReg);
1789	}
1790	}
1791	}
1792
1793	MachineInstrBuilder MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1794	MCID: TII.get(Opcode: PPC::BLR8));
1795
1796	for (unsigned Reg : RetRegs)
1797	MIB.addReg(RegNo: Reg, flags: RegState::Implicit);
1798
1799	return true;
1800	}
1801
1802	// Attempt to emit an integer extend of SrcReg into DestReg. Both
1803	// signed and zero extensions are supported. Return false if we
1804	// can't handle it.
1805	bool PPCFastISel::PPCEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
1806	unsigned DestReg, bool IsZExt) {
1807	if (DestVT != MVT::i32 && DestVT != MVT::i64)
1808	return false;
1809	if (SrcVT != MVT::i8 && SrcVT != MVT::i16 && SrcVT != MVT::i32)
1810	return false;
1811
1812	// Signed extensions use EXTSB, EXTSH, EXTSW.
1813	if (!IsZExt) {
1814	unsigned Opc;
1815	if (SrcVT == MVT::i8)
1816	Opc = (DestVT == MVT::i32) ? PPC::EXTSB : PPC::EXTSB8_32_64;
1817	else if (SrcVT == MVT::i16)
1818	Opc = (DestVT == MVT::i32) ? PPC::EXTSH : PPC::EXTSH8_32_64;
1819	else {
1820	assert(DestVT == MVT::i64 && "Signed extend from i32 to i32??");
1821	Opc = PPC::EXTSW_32_64;
1822	}
1823	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg)
1824	.addReg(RegNo: SrcReg);
1825
1826	// Unsigned 32-bit extensions use RLWINM.
1827	} else if (DestVT == MVT::i32) {
1828	unsigned MB;
1829	if (SrcVT == MVT::i8)
1830	MB = `24`;
1831	else {
1832	assert(SrcVT == MVT::i16 && "Unsigned extend from i32 to i32??");
1833	MB = `16`;
1834	}
1835	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::RLWINM),
1836	DestReg)
1837	.addReg(RegNo: SrcReg).addImm(/SH=/Val: `0`).addImm(Val: MB).addImm(/ME=/Val: `31`);
1838
1839	// Unsigned 64-bit extensions use RLDICL (with a 32-bit source).
1840	} else {
1841	unsigned MB;
1842	if (SrcVT == MVT::i8)
1843	MB = `56`;
1844	else if (SrcVT == MVT::i16)
1845	MB = `48`;
1846	else
1847	MB = `32`;
1848	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1849	MCID: TII.get(Opcode: PPC::RLDICL_32_64), DestReg)
1850	.addReg(RegNo: SrcReg).addImm(/SH=/Val: `0`).addImm(Val: MB);
1851	}
1852
1853	return true;
1854	}
1855
1856	// Attempt to fast-select an indirect branch instruction.
1857	bool PPCFastISel::SelectIndirectBr(const Instruction *I) {
1858	Register AddrReg = getRegForValue(V: I->getOperand(i: `0`));
1859	if (AddrReg == `0`)
1860	return false;
1861
1862	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::MTCTR8))
1863	.addReg(RegNo: AddrReg);
1864	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::BCTR8));
1865
1866	const IndirectBrInst *IB = cast<IndirectBrInst>(Val: I);
1867	for (const BasicBlock *SuccBB : IB->successors())
1868	FuncInfo.MBB->addSuccessor(Succ: FuncInfo.MBBMap [SuccBB]);
1869
1870	return true;
1871	}
1872
1873	// Attempt to fast-select an integer truncate instruction.
1874	bool PPCFastISel::SelectTrunc(const Instruction *I) {
1875	Value *Src = I->getOperand(i: `0`);
1876	EVT SrcVT = TLI.getValueType(DL, Ty: Src->getType(), AllowUnknown: true);
1877	EVT DestVT = TLI.getValueType(DL, Ty: I->getType(), AllowUnknown: true);
1878
1879	if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16)
1880	return false;
1881
1882	if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8)
1883	return false;
1884
1885	Register SrcReg = getRegForValue(V: Src);
1886	if (!SrcReg)
1887	return false;
1888
1889	// The only interesting case is when we need to switch register classes.
1890	if (SrcVT == MVT::i64)
1891	SrcReg = copyRegToRegClass(ToRC: &PPC::GPRCRegClass, SrcReg, Flag: `0`, SubReg: PPC::sub_32);
1892
1893	updateValueMap(I, Reg: SrcReg);
1894	return true;
1895	}
1896
1897	// Attempt to fast-select an integer extend instruction.
1898	bool PPCFastISel::SelectIntExt(const Instruction *I) {
1899	Type *DestTy = I->getType();
1900	Value *Src = I->getOperand(i: `0`);
1901	Type *SrcTy = Src->getType();
1902
1903	bool IsZExt = isa<ZExtInst>(Val: I);
1904	Register SrcReg = getRegForValue(V: Src);
1905	if (!SrcReg) return false;
1906
1907	EVT SrcEVT, DestEVT;
1908	SrcEVT = TLI.getValueType(DL, Ty: SrcTy, AllowUnknown: true);
1909	DestEVT = TLI.getValueType(DL, Ty: DestTy, AllowUnknown: true);
1910	if (!SrcEVT.isSimple())
1911	return false;
1912	if (!DestEVT.isSimple())
1913	return false;
1914
1915	MVT SrcVT = SrcEVT.getSimpleVT();
1916	MVT DestVT = DestEVT.getSimpleVT();
1917
1918	// If we know the register class needed for the result of this
1919	// instruction, use it. Otherwise pick the register class of the
1920	// correct size that does not contain X0/R0, since we don't know
1921	// whether downstream uses permit that assignment.
1922	Register AssignedReg = FuncInfo.ValueMap [I];
1923	const TargetRegisterClass *RC =
1924	(AssignedReg ? MRI.getRegClass(Reg: AssignedReg) :
1925	(DestVT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass :
1926	&PPC::GPRC_and_GPRC_NOR0RegClass));
1927	Register ResultReg = createResultReg(RC);
1928
1929	if (!PPCEmitIntExt(SrcVT, SrcReg, DestVT, DestReg: ResultReg, IsZExt))
1930	return false;
1931
1932	updateValueMap(I, Reg: ResultReg);
1933	return true;
1934	}
1935
1936	// Attempt to fast-select an instruction that wasn't handled by
1937	// the table-generated machinery.
1938	bool PPCFastISel::fastSelectInstruction(const Instruction *I) {
1939
1940	switch (I->getOpcode()) {
1941	case Instruction::Load:
1942	return SelectLoad(I);
1943	case Instruction::Store:
1944	return SelectStore(I);
1945	case Instruction::Br:
1946	return SelectBranch(I);
1947	case Instruction::IndirectBr:
1948	return SelectIndirectBr(I);
1949	case Instruction::FPExt:
1950	return SelectFPExt(I);
1951	case Instruction::FPTrunc:
1952	return SelectFPTrunc(I);
1953	case Instruction::SIToFP:
1954	return SelectIToFP(I, /IsSigned/ true);
1955	case Instruction::UIToFP:
1956	return SelectIToFP(I, /IsSigned/ false);
1957	case Instruction::FPToSI:
1958	return SelectFPToI(I, /IsSigned/ true);
1959	case Instruction::FPToUI:
1960	return SelectFPToI(I, /IsSigned/ false);
1961	case Instruction::Add:
1962	return SelectBinaryIntOp(I, ISDOpcode: ISD::ADD);
1963	case Instruction::Or:
1964	return SelectBinaryIntOp(I, ISDOpcode: ISD::OR);
1965	case Instruction::Sub:
1966	return SelectBinaryIntOp(I, ISDOpcode: ISD::SUB);
1967	case Instruction::Ret:
1968	return SelectRet(I);
1969	case Instruction::Trunc:
1970	return SelectTrunc(I);
1971	case Instruction::ZExt:
1972	case Instruction::SExt:
1973	return SelectIntExt(I);
1974	// Here add other flavors of Instruction::XXX that automated
1975	// cases don't catch. For example, switches are terminators
1976	// that aren't yet handled.
1977	default:
1978	break;
1979	}
1980	return false;
1981	}
1982
1983	// Materialize a floating-point constant into a register, and return
1984	// the register number (or zero if we failed to handle it).
1985	unsigned PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) {
1986	// If this is a PC-Rel function, let SDISel handle constant pool.
1987	if (Subtarget->isUsingPCRelativeCalls())
1988	return false;
1989
1990	// No plans to handle long double here.
1991	if (VT != MVT::f32 && VT != MVT::f64)
1992	return `0`;
1993
1994	// All FP constants are loaded from the constant pool.
1995	Align Alignment = DL.getPrefTypeAlign(Ty: CFP->getType());
1996	unsigned Idx = MCP.getConstantPoolIndex(C: cast<Constant>(Val: CFP), Alignment);
1997	const bool HasSPE = Subtarget->hasSPE();
1998	const TargetRegisterClass *RC;
1999	if (HasSPE)
2000	RC = ((VT == MVT::f32) ? &PPC::GPRCRegClass : &PPC::SPERCRegClass);
2001	else
2002	RC = ((VT == MVT::f32) ? &PPC::F4RCRegClass : &PPC::F8RCRegClass);
2003
2004	Register DestReg = createResultReg(RC);
2005	CodeModel::Model CModel = TM.getCodeModel();
2006
2007	MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
2008	PtrInfo: MachinePointerInfo::getConstantPool(MF&: *FuncInfo.MF),
2009	F: MachineMemOperand::MOLoad, Size: (VT == MVT::f32) ? `4` : `8`, BaseAlignment: Alignment);
2010
2011	unsigned Opc;
2012
2013	if (HasSPE)
2014	Opc = ((VT == MVT::f32) ? PPC::SPELWZ : PPC::EVLDD);
2015	else
2016	Opc = ((VT == MVT::f32) ? PPC::LFS : PPC::LFD);
2017
2018	Register TmpReg = createResultReg(RC: &PPC::G8RC_and_G8RC_NOX0RegClass);
2019
2020	PPCFuncInfo->setUsesTOCBasePtr();
2021	// For small code model, generate a LF[SD](0, LDtocCPT(Idx, X2)).
2022	if (CModel == CodeModel::Small) {
2023	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::LDtocCPT),
2024	DestReg: TmpReg)
2025	.addConstantPoolIndex(Idx).addReg(RegNo: PPC::X2);
2026	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg)
2027	.addImm(Val: `0`).addReg(RegNo: TmpReg).addMemOperand(MMO);
2028	} else {
2029	// Otherwise we generate LF[SD](Idx[lo], ADDIStocHA8(X2, Idx)).
2030	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::ADDIStocHA8),
2031	DestReg: TmpReg).addReg(RegNo: PPC::X2).addConstantPoolIndex(Idx);
2032	// But for large code model, we must generate a LDtocL followed
2033	// by the LF[SD].
2034	if (CModel == CodeModel::Large) {
2035	Register TmpReg2 = createResultReg(RC: &PPC::G8RC_and_G8RC_NOX0RegClass);
2036	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::LDtocL),
2037	DestReg: TmpReg2).addConstantPoolIndex(Idx).addReg(RegNo: TmpReg);
2038	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg)
2039	.addImm(Val: `0`)
2040	.addReg(RegNo: TmpReg2);
2041	} else
2042	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg)
2043	.addConstantPoolIndex(Idx, Offset: `0`, TargetFlags: PPCII::MO_TOC_LO)
2044	.addReg(RegNo: TmpReg)
2045	.addMemOperand(MMO);
2046	}
2047
2048	return DestReg;
2049	}
2050
2051	// Materialize the address of a global value into a register, and return
2052	// the register number (or zero if we failed to handle it).
2053	unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) {
2054	// If this is a PC-Rel function, let SDISel handle GV materialization.
2055	if (Subtarget->isUsingPCRelativeCalls())
2056	return false;
2057
2058	assert(VT == MVT::i64 && "Non-address!");
2059	const TargetRegisterClass *RC = &PPC::G8RC_and_G8RC_NOX0RegClass;
2060	Register DestReg = createResultReg(RC);
2061
2062	// Global values may be plain old object addresses, TLS object
2063	// addresses, constant pool entries, or jump tables. How we generate
2064	// code for these may depend on small, medium, or large code model.
2065	CodeModel::Model CModel = TM.getCodeModel();
2066
2067	// FIXME: Jump tables are not yet required because fast-isel doesn't
2068	// handle switches; if that changes, we need them as well. For now,
2069	// what follows assumes everything's a generic (or TLS) global address.
2070
2071	// FIXME: We don't yet handle the complexity of TLS.
2072	if (GV->isThreadLocal())
2073	return `0`;
2074
2075	PPCFuncInfo->setUsesTOCBasePtr();
2076	bool IsAIXTocData = TM.getTargetTriple().isOSAIX() &&
2077	isa<GlobalVariable>(Val: GV) &&
2078	cast<GlobalVariable>(Val: GV)->hasAttribute(Kind: "toc-data");
2079
2080	// For small code model, generate a simple TOC load.
2081	if (CModel == CodeModel::Small) {
2082	auto MIB = BuildMI(
2083	BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
2084	MCID: IsAIXTocData ? TII.get(Opcode: PPC::ADDItoc8) : TII.get(Opcode: PPC::LDtoc), DestReg);
2085	if (IsAIXTocData)
2086	MIB.addReg(RegNo: PPC::X2).addGlobalAddress(GV);
2087	else
2088	MIB.addGlobalAddress(GV).addReg(RegNo: PPC::X2);
2089	} else {
2090	// If the address is an externally defined symbol, a symbol with common
2091	// or externally available linkage, a non-local function address, or a
2092	// jump table address (not yet needed), or if we are generating code
2093	// for large code model, we generate:
2094	// LDtocL(GV, ADDIStocHA8(%x2, GV))
2095	// Otherwise we generate:
2096	// ADDItocL8(ADDIStocHA8(%x2, GV), GV)
2097	// Either way, start with the ADDIStocHA8:
2098	Register HighPartReg = createResultReg(RC);
2099	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::ADDIStocHA8),
2100	DestReg: HighPartReg).addReg(RegNo: PPC::X2).addGlobalAddress(GV);
2101
2102	if (Subtarget->isGVIndirectSymbol(GV)) {
2103	assert(!IsAIXTocData && "TOC data should always be direct.");
2104	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::LDtocL),
2105	DestReg).addGlobalAddress(GV).addReg(RegNo: HighPartReg);
2106	} else {
2107	// Otherwise generate the ADDItocL8.
2108	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::ADDItocL8),
2109	DestReg)
2110	.addReg(RegNo: HighPartReg)
2111	.addGlobalAddress(GV);
2112	}
2113	}
2114
2115	return DestReg;
2116	}
2117
2118	// Materialize a 32-bit integer constant into a register, and return
2119	// the register number (or zero if we failed to handle it).
2120	unsigned PPCFastISel::PPCMaterialize32BitInt(int64_t Imm,
2121	const TargetRegisterClass *RC) {
2122	unsigned Lo = Imm & `0xFFFF`;
2123	unsigned Hi = (Imm >> `16`) & `0xFFFF`;
2124
2125	Register ResultReg = createResultReg(RC);
2126	bool IsGPRC = RC->hasSuperClassEq(RC: &PPC::GPRCRegClass);
2127
2128	if (isInt<`16`>(x: Imm))
2129	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
2130	MCID: TII.get(Opcode: IsGPRC ? PPC::LI : PPC::LI8), DestReg: ResultReg)
2131	.addImm(Val: Imm);
2132	else if (Lo) {
2133	// Both Lo and Hi have nonzero bits.
2134	Register TmpReg = createResultReg(RC);
2135	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
2136	MCID: TII.get(Opcode: IsGPRC ? PPC::LIS : PPC::LIS8), DestReg: TmpReg)
2137	.addImm(Val: Hi);
2138	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
2139	MCID: TII.get(Opcode: IsGPRC ? PPC::ORI : PPC::ORI8), DestReg: ResultReg)
2140	.addReg(RegNo: TmpReg).addImm(Val: Lo);
2141	} else
2142	// Just Hi bits.
2143	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
2144	MCID: TII.get(Opcode: IsGPRC ? PPC::LIS : PPC::LIS8), DestReg: ResultReg)
2145	.addImm(Val: Hi);
2146
2147	return ResultReg;
2148	}
2149
2150	// Materialize a 64-bit integer constant into a register, and return
2151	// the register number (or zero if we failed to handle it).
2152	unsigned PPCFastISel::PPCMaterialize64BitInt(int64_t Imm,
2153	const TargetRegisterClass *RC) {
2154	unsigned Remainder = `0`;
2155	unsigned Shift = `0`;
2156
2157	// If the value doesn't fit in 32 bits, see if we can shift it
2158	// so that it fits in 32 bits.
2159	if (!isInt<`32`>(x: Imm)) {
2160	Shift = llvm::countr_zero<uint64_t>(Val: Imm);
2161	int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
2162
2163	if (isInt<`32`>(x: ImmSh))
2164	Imm = ImmSh;
2165	else {
2166	Remainder = Imm;
2167	Shift = `32`;
2168	Imm >>= `32`;
2169	}
2170	}
2171
2172	// Handle the high-order 32 bits (if shifted) or the whole 32 bits
2173	// (if not shifted).
2174	unsigned TmpReg1 = PPCMaterialize32BitInt(Imm, RC);
2175	if (!Shift)
2176	return TmpReg1;
2177
2178	// If upper 32 bits were not zero, we've built them and need to shift
2179	// them into place.
2180	unsigned TmpReg2;
2181	if (Imm) {
2182	TmpReg2 = createResultReg(RC);
2183	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::RLDICR),
2184	DestReg: TmpReg2).addReg(RegNo: TmpReg1).addImm(Val: Shift).addImm(Val: `63` - Shift);
2185	} else
2186	TmpReg2 = TmpReg1;
2187
2188	unsigned TmpReg3, Hi, Lo;
2189	if ((Hi = (Remainder >> `16`) & `0xFFFF`)) {
2190	TmpReg3 = createResultReg(RC);
2191	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::ORIS8),
2192	DestReg: TmpReg3).addReg(RegNo: TmpReg2).addImm(Val: Hi);
2193	} else
2194	TmpReg3 = TmpReg2;
2195
2196	if ((Lo = Remainder & `0xFFFF`)) {
2197	Register ResultReg = createResultReg(RC);
2198	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::ORI8),
2199	DestReg: ResultReg).addReg(RegNo: TmpReg3).addImm(Val: Lo);
2200	return ResultReg;
2201	}
2202
2203	return TmpReg3;
2204	}
2205
2206	// Materialize an integer constant into a register, and return
2207	// the register number (or zero if we failed to handle it).
2208	unsigned PPCFastISel::PPCMaterializeInt(const ConstantInt *CI, MVT VT,
2209	bool UseSExt) {
2210	// If we're using CR bit registers for i1 values, handle that as a special
2211	// case first.
2212	if (VT == MVT::i1 && Subtarget->useCRBits()) {
2213	Register ImmReg = createResultReg(RC: &PPC::CRBITRCRegClass);
2214	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
2215	MCID: TII.get(Opcode: CI->isZero() ? PPC::CRUNSET : PPC::CRSET), DestReg: ImmReg);
2216	return ImmReg;
2217	}
2218
2219	if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 &&
2220	VT != MVT::i1)
2221	return `0`;
2222
2223	const TargetRegisterClass *RC =
2224	((VT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass);
2225	int64_t Imm = UseSExt ? CI->getSExtValue() : CI->getZExtValue();
2226
2227	// If the constant is in range, use a load-immediate.
2228	// Since LI will sign extend the constant we need to make sure that for
2229	// our zeroext constants that the sign extended constant fits into 16-bits -
2230	// a range of 0..0x7fff.
2231	if (isInt<`16`>(x: Imm)) {
2232	unsigned Opc = (VT == MVT::i64) ? PPC::LI8 : PPC::LI;
2233	Register ImmReg = createResultReg(RC);
2234	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg: ImmReg)
2235	.addImm(Val: Imm);
2236	return ImmReg;
2237	}
2238
2239	// Construct the constant piecewise.
2240	if (VT == MVT::i64)
2241	return PPCMaterialize64BitInt(Imm, RC);
2242	else if (VT == MVT::i32)
2243	return PPCMaterialize32BitInt(Imm, RC);
2244
2245	return `0`;
2246	}
2247
2248	// Materialize a constant into a register, and return the register
2249	// number (or zero if we failed to handle it).
2250	unsigned PPCFastISel::fastMaterializeConstant(const Constant *C) {
2251	EVT CEVT = TLI.getValueType(DL, Ty: C->getType(), AllowUnknown: true);
2252
2253	// Only handle simple types.
2254	if (!CEVT.isSimple()) return `0`;
2255	MVT VT = CEVT.getSimpleVT();
2256
2257	if (const ConstantFP *CFP = dyn_cast<ConstantFP>(Val: C))
2258	return PPCMaterializeFP(CFP, VT);
2259	else if (const GlobalValue *GV = dyn_cast<GlobalValue>(Val: C))
2260	return PPCMaterializeGV(GV, VT);
2261	else if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val: C))
2262	// Note that the code in FunctionLoweringInfo::ComputePHILiveOutRegInfo
2263	// assumes that constant PHI operands will be zero extended, and failure to
2264	// match that assumption will cause problems if we sign extend here but
2265	// some user of a PHI is in a block for which we fall back to full SDAG
2266	// instruction selection.
2267	return PPCMaterializeInt(CI, VT, UseSExt: false);
2268
2269	return `0`;
2270	}
2271
2272	// Materialize the address created by an alloca into a register, and
2273	// return the register number (or zero if we failed to handle it).
2274	unsigned PPCFastISel::fastMaterializeAlloca(const AllocaInst *AI) {
2275	// Don't handle dynamic allocas.
2276	if (!FuncInfo.StaticAllocaMap.count(Val: AI)) return `0`;
2277
2278	MVT VT;
2279	if (!isLoadTypeLegal(Ty: AI->getType(), VT)) return `0`;
2280
2281	DenseMap<const AllocaInst, int*>::iterator SI =
2282	FuncInfo.StaticAllocaMap.find(Val: AI);
2283
2284	if (SI != FuncInfo.StaticAllocaMap.end()) {
2285	Register ResultReg = createResultReg(RC: &PPC::G8RC_and_G8RC_NOX0RegClass);
2286	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::ADDI8),
2287	DestReg: ResultReg).addFrameIndex(Idx: SI ->second).addImm(Val: `0`);
2288	return ResultReg;
2289	}
2290
2291	return `0`;
2292	}
2293
2294	// Fold loads into extends when possible.
2295	// FIXME: We can have multiple redundant extend/trunc instructions
2296	// following a load. The folding only picks up one. Extend this
2297	// to check subsequent instructions for the same pattern and remove
2298	// them. Thus ResultReg should be the def reg for the last redundant
2299	// instruction in a chain, and all intervening instructions can be
2300	// removed from parent. Change test/CodeGen/PowerPC/fast-isel-fold.ll
2301	// to add ELF64-NOT: rldicl to the appropriate tests when this works.
2302	bool PPCFastISel::tryToFoldLoadIntoMI(MachineInstr MI, unsigned* OpNo,
2303	const LoadInst *LI) {
2304	// Verify we have a legal type before going any further.
2305	MVT VT;
2306	if (!isLoadTypeLegal(Ty: LI->getType(), VT))
2307	return false;
2308
2309	// Combine load followed by zero- or sign-extend.
2310	bool IsZExt = false;
2311	switch(MI->getOpcode()) {
2312	default:
2313	return false;
2314
2315	case PPC::RLDICL:
2316	case PPC::RLDICL_32_64: {
2317	IsZExt = true;
2318	unsigned MB = MI->getOperand(i: `3`).getImm();
2319	if ((VT == MVT::i8 && MB <= `56`) \|\|
2320	(VT == MVT::i16 && MB <= `48`) \|\|
2321	(VT == MVT::i32 && MB <= `32`))
2322	break;
2323	return false;
2324	}
2325
2326	case PPC::RLWINM:
2327	case PPC::RLWINM8: {
2328	IsZExt = true;
2329	unsigned MB = MI->getOperand(i: `3`).getImm();
2330	if ((VT == MVT::i8 && MB <= `24`) \|\|
2331	(VT == MVT::i16 && MB <= `16`))
2332	break;
2333	return false;
2334	}
2335
2336	case PPC::EXTSB:
2337	case PPC::EXTSB8:
2338	case PPC::EXTSB8_32_64:
2339	/ There is no sign-extending load-byte instruction. /
2340	return false;
2341
2342	case PPC::EXTSH:
2343	case PPC::EXTSH8:
2344	case PPC::EXTSH8_32_64: {
2345	if (VT != MVT::i16 && VT != MVT::i8)
2346	return false;
2347	break;
2348	}
2349
2350	case PPC::EXTSW:
2351	case PPC::EXTSW_32:
2352	case PPC::EXTSW_32_64: {
2353	if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8)
2354	return false;
2355	break;
2356	}
2357	}
2358
2359	// See if we can handle this address.
2360	Address Addr;
2361	if (!PPCComputeAddress(Obj: LI->getOperand(i_nocapture: `0`), Addr))
2362	return false;
2363
2364	Register ResultReg = MI->getOperand(i: `0`).getReg();
2365
2366	if (!PPCEmitLoad(VT, ResultReg, Addr, RC: nullptr, IsZExt,
2367	FP64LoadOpc: Subtarget->hasSPE() ? PPC::EVLDD : PPC::LFD))
2368	return false;
2369
2370	MachineBasicBlock::iterator I(MI);
2371	removeDeadCode(I, E: std::next(x: I));
2372	return true;
2373	}
2374
2375	// Attempt to lower call arguments in a faster way than done by
2376	// the selection DAG code.
2377	bool PPCFastISel::fastLowerArguments() {
2378	// Defer to normal argument lowering for now. It's reasonably
2379	// efficient. Consider doing something like ARM to handle the
2380	// case where all args fit in registers, no varargs, no float
2381	// or vector args.
2382	return false;
2383	}
2384
2385	// Handle materializing integer constants into a register. This is not
2386	// automatically generated for PowerPC, so must be explicitly created here.
2387	unsigned PPCFastISel::fastEmit_i(MVT Ty, MVT VT, unsigned Opc, uint64_t Imm) {
2388
2389	if (Opc != ISD::Constant)
2390	return `0`;
2391
2392	// If we're using CR bit registers for i1 values, handle that as a special
2393	// case first.
2394	if (VT == MVT::i1 && Subtarget->useCRBits()) {
2395	Register ImmReg = createResultReg(RC: &PPC::CRBITRCRegClass);
2396	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
2397	MCID: TII.get(Opcode: Imm == `0` ? PPC::CRUNSET : PPC::CRSET), DestReg: ImmReg);
2398	return ImmReg;
2399	}
2400
2401	if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 &&
2402	VT != MVT::i1)
2403	return `0`;
2404
2405	const TargetRegisterClass *RC = ((VT == MVT::i64) ? &PPC::G8RCRegClass :
2406	&PPC::GPRCRegClass);
2407	if (VT == MVT::i64)
2408	return PPCMaterialize64BitInt(Imm, RC);
2409	else
2410	return PPCMaterialize32BitInt(Imm, RC);
2411	}
2412
2413	// Override for ADDI and ADDI8 to set the correct register class
2414	// on RHS operand 0. The automatic infrastructure naively assumes
2415	// GPRC for i32 and G8RC for i64; the concept of "no R0" is lost
2416	// for these cases. At the moment, none of the other automatically
2417	// generated RI instructions require special treatment. However, once
2418	// SelectSelect is implemented, "isel" requires similar handling.
2419	//
2420	// Also be conservative about the output register class. Avoid
2421	// assigning R0 or X0 to the output register for GPRC and G8RC
2422	// register classes, as any such result could be used in ADDI, etc.,
2423	// where those regs have another meaning.
2424	unsigned PPCFastISel::fastEmitInst_ri(unsigned MachineInstOpcode,
2425	const TargetRegisterClass *RC,
2426	unsigned Op0,
2427	uint64_t Imm) {
2428	if (MachineInstOpcode == PPC::ADDI)
2429	MRI.setRegClass(Reg: Op0, RC: &PPC::GPRC_and_GPRC_NOR0RegClass);
2430	else if (MachineInstOpcode == PPC::ADDI8)
2431	MRI.setRegClass(Reg: Op0, RC: &PPC::G8RC_and_G8RC_NOX0RegClass);
2432
2433	const TargetRegisterClass *UseRC =
2434	(RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
2435	(RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
2436
2437	return FastISel::fastEmitInst_ri(MachineInstOpcode, RC: UseRC, Op0, Imm);
2438	}
2439
2440	// Override for instructions with one register operand to avoid use of
2441	// R0/X0. The automatic infrastructure isn't aware of the context so
2442	// we must be conservative.
2443	unsigned PPCFastISel::fastEmitInst_r(unsigned MachineInstOpcode,
2444	const TargetRegisterClass* RC,
2445	unsigned Op0) {
2446	const TargetRegisterClass *UseRC =
2447	(RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
2448	(RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
2449
2450	return FastISel::fastEmitInst_r(MachineInstOpcode, RC: UseRC, Op0);
2451	}
2452
2453	// Override for instructions with two register operands to avoid use
2454	// of R0/X0. The automatic infrastructure isn't aware of the context
2455	// so we must be conservative.
2456	unsigned PPCFastISel::fastEmitInst_rr(unsigned MachineInstOpcode,
2457	const TargetRegisterClass* RC,
2458	unsigned Op0, unsigned Op1) {
2459	const TargetRegisterClass *UseRC =
2460	(RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
2461	(RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
2462
2463	return FastISel::fastEmitInst_rr(MachineInstOpcode, RC: UseRC, Op0, Op1);
2464	}
2465
2466	namespace llvm {
2467	// Create the fast instruction selector for PowerPC64 ELF.
2468	FastISel *PPC::createFastISel(FunctionLoweringInfo &FuncInfo,
2469	const TargetLibraryInfo *LibInfo) {
2470	// Only available on 64-bit for now.
2471	const PPCSubtarget &Subtarget = FuncInfo.MF->getSubtarget<PPCSubtarget>();
2472	if (Subtarget.isPPC64())
2473	return new PPCFastISel (FuncInfo, LibInfo);
2474	return nullptr;
2475	}
2476	}
2477

Browse the source code of llvm_projects/llvm/lib/Target/PowerPC/PPCFastISel.cpp