PPCFastISel.cpp source code [llvm_projects/llvm/lib/Target/PowerPC/PPCFastISel.cpp]

1	//===-- PPCFastISel.cpp - PowerPC FastISel implementation -----------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file defines the PowerPC-specific support for the FastISel class. Some
10	// of the target-specific code is generated by tablegen in the file
11	// PPCGenFastISel.inc, which is #included here.
12	//
13	//===----------------------------------------------------------------------===//
14
15	#include "MCTargetDesc/PPCPredicates.h"
16	#include "PPC.h"
17	#include "PPCCallingConv.h"
18	#include "PPCISelLowering.h"
19	#include "PPCMachineFunctionInfo.h"
20	#include "PPCSelectionDAGInfo.h"
21	#include "PPCSubtarget.h"
22	#include "llvm/CodeGen/CallingConvLower.h"
23	#include "llvm/CodeGen/FastISel.h"
24	#include "llvm/CodeGen/FunctionLoweringInfo.h"
25	#include "llvm/CodeGen/MachineConstantPool.h"
26	#include "llvm/CodeGen/MachineFrameInfo.h"
27	#include "llvm/CodeGen/MachineInstrBuilder.h"
28	#include "llvm/CodeGen/MachineRegisterInfo.h"
29	#include "llvm/CodeGen/TargetLowering.h"
30	#include "llvm/IR/CallingConv.h"
31	#include "llvm/IR/GetElementPtrTypeIterator.h"
32	#include "llvm/IR/GlobalVariable.h"
33	#include "llvm/IR/Operator.h"
34	#include "llvm/Target/TargetMachine.h"
35
36	//===----------------------------------------------------------------------===//
37	//
38	// TBD:
39	// fastLowerArguments: Handle simple cases.
40	// PPCMaterializeGV: Handle TLS.
41	// SelectCall: Handle function pointers.
42	// SelectCall: Handle multi-register return values.
43	// SelectCall: Optimize away nops for local calls.
44	// processCallArgs: Handle bit-converted arguments.
45	// finishCall: Handle multi-register return values.
46	// PPCComputeAddress: Handle parameter references as FrameIndex's.
47	// PPCEmitCmp: Handle immediate as operand 1.
48	// SelectCall: Handle small byval arguments.
49	// SelectIntrinsicCall: Implement.
50	// SelectSelect: Implement.
51	// Consider factoring isTypeLegal into the base class.
52	// Implement switches and jump tables.
53	//
54	//===----------------------------------------------------------------------===//
55	using namespace llvm;
56
57	#define DEBUG_TYPE "ppcfastisel"
58
59	namespace {
60
61	struct Address {
62	enum {
63	RegBase,
64	FrameIndexBase
65	} BaseType;
66
67	union {
68	unsigned Reg;
69	int FI;
70	} Base;
71
72	int64_t Offset;
73
74	// Innocuous defaults for our address.
75	Address()
76	: BaseType(RegBase), Offset(`0`) {
77	Base.Reg = `0`;
78	}
79	};
80
81	class PPCFastISel final : public FastISel {
82
83	const TargetMachine &TM;
84	const PPCSubtarget *Subtarget;
85	PPCFunctionInfo *PPCFuncInfo;
86	const TargetInstrInfo &TII;
87	const TargetLowering &TLI;
88	LLVMContext *Context;
89
90	public:
91	explicit PPCFastISel(FunctionLoweringInfo &FuncInfo,
92	const TargetLibraryInfo *LibInfo,
93	const LibcallLoweringInfo *LibcallLowering)
94	: FastISel (FuncInfo, LibInfo, LibcallLowering),
95	TM(FuncInfo.MF->getTarget()),
96	Subtarget(&FuncInfo.MF->getSubtarget<PPCSubtarget>()),
97	PPCFuncInfo(FuncInfo.MF->getInfo<PPCFunctionInfo>()),
98	TII(Subtarget->getInstrInfo()), TLI(Subtarget->getTargetLowering()),
99	Context(&FuncInfo.Fn->getContext()) {}
100
101	// Backend specific FastISel code.
102	private:
103	bool fastSelectInstruction(const Instruction *I) override;
104	Register fastMaterializeConstant(const Constant *C) override;
105	Register fastMaterializeAlloca(const AllocaInst *AI) override;
106	bool tryToFoldLoadIntoMI(MachineInstr MI, unsigned* OpNo,
107	const LoadInst *LI) override;
108	bool fastLowerArguments() override;
109	Register fastEmit_i(MVT Ty, MVT RetTy, unsigned Opc, uint64_t Imm) override;
110	Register fastEmitInst_ri(unsigned MachineInstOpcode,
111	const TargetRegisterClass *RC, Register Op0,
112	uint64_t Imm);
113	Register fastEmitInst_r(unsigned MachineInstOpcode,
114	const TargetRegisterClass *RC, Register Op0);
115	Register fastEmitInst_rr(unsigned MachineInstOpcode,
116	const TargetRegisterClass *RC, Register Op0,
117	Register Op1);
118
119	bool fastLowerCall(CallLoweringInfo &CLI) override;
120
121	// Instruction selection routines.
122	private:
123	bool SelectLoad(const Instruction *I);
124	bool SelectStore(const Instruction *I);
125	bool SelectBranch(const Instruction *I);
126	bool SelectIndirectBr(const Instruction *I);
127	bool SelectFPExt(const Instruction *I);
128	bool SelectFPTrunc(const Instruction *I);
129	bool SelectIToFP(const Instruction I, bool* IsSigned);
130	bool SelectFPToI(const Instruction I, bool* IsSigned);
131	bool SelectBinaryIntOp(const Instruction I, unsigned* ISDOpcode);
132	bool SelectRet(const Instruction *I);
133	bool SelectTrunc(const Instruction *I);
134	bool SelectIntExt(const Instruction *I);
135
136	// Utility routines.
137	private:
138	bool isTypeLegal(Type *Ty, MVT &VT);
139	bool isLoadTypeLegal(Type *Ty, MVT &VT);
140	bool isValueAvailable(const Value V) const*;
141	bool isVSFRCRegClass(const TargetRegisterClass RC) const* {
142	return RC->getID() == PPC::VSFRCRegClassID;
143	}
144	bool isVSSRCRegClass(const TargetRegisterClass RC) const* {
145	return RC->getID() == PPC::VSSRCRegClassID;
146	}
147	Register copyRegToRegClass(const TargetRegisterClass *ToRC, Register SrcReg,
148	RegState Flag = {}, unsigned SubReg = `0`) {
149	Register TmpReg = createResultReg(RC: ToRC);
150	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
151	MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: TmpReg).addReg(RegNo: SrcReg, Flags: Flag, SubReg);
152	return TmpReg;
153	}
154	bool PPCEmitCmp(const Value Src1Value, const* Value Src2Value, bool* isZExt,
155	Register DestReg, const PPC::Predicate Pred);
156	bool PPCEmitLoad(MVT VT, Register &ResultReg, Address &Addr,
157	const TargetRegisterClass RC, bool* IsZExt = true,
158	unsigned FP64LoadOpc = PPC::LFD);
159	bool PPCEmitStore(MVT VT, Register SrcReg, Address &Addr);
160	bool PPCComputeAddress(const Value *Obj, Address &Addr);
161	void PPCSimplifyAddress(Address &Addr, bool &UseOffset, Register &IndexReg);
162	bool PPCEmitIntExt(MVT SrcVT, Register SrcReg, MVT DestVT, Register DestReg,
163	bool IsZExt);
164	Register PPCMaterializeFP(const ConstantFP *CFP, MVT VT);
165	Register PPCMaterializeGV(const GlobalValue *GV, MVT VT);
166	Register PPCMaterializeInt(const ConstantInt *CI, MVT VT,
167	bool UseSExt = true);
168	Register PPCMaterialize32BitInt(int64_t Imm, const TargetRegisterClass *RC);
169	Register PPCMaterialize64BitInt(int64_t Imm, const TargetRegisterClass *RC);
170	Register PPCMoveToIntReg(const Instruction *I, MVT VT, Register SrcReg,
171	bool IsSigned);
172	Register PPCMoveToFPReg(MVT VT, Register SrcReg, bool IsSigned);
173
174	// Call handling routines.
175	private:
176	bool processCallArgs(SmallVectorImpl<Value *> &Args,
177	SmallVectorImpl<Register> &ArgRegs,
178	SmallVectorImpl<MVT> &ArgVTs,
179	SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
180	SmallVectorImpl<unsigned> &RegArgs, CallingConv::ID CC,
181	unsigned &NumBytes, bool IsVarArg);
182	bool finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumBytes);
183
184	private:
185	#include "PPCGenFastISel.inc"
186
187	};
188
189	} // end anonymous namespace
190
191	static std::optional<PPC::Predicate> getComparePred(CmpInst::Predicate Pred) {
192	switch (Pred) {
193	// These are not representable with any single compare.
194	case CmpInst::FCMP_FALSE:
195	case CmpInst::FCMP_TRUE:
196	// Major concern about the following 6 cases is NaN result. The comparison
197	// result consists of 4 bits, indicating lt, eq, gt and un (unordered),
198	// only one of which will be set. The result is generated by fcmpu
199	// instruction. However, bc instruction only inspects one of the first 3
200	// bits, so when un is set, bc instruction may jump to an undesired
201	// place.
202	//
203	// More specifically, if we expect an unordered comparison and un is set, we
204	// expect to always go to true branch; in such case UEQ, UGT and ULT still
205	// give false, which are undesired; but UNE, UGE, ULE happen to give true,
206	// since they are tested by inspecting !eq, !lt, !gt, respectively.
207	//
208	// Similarly, for ordered comparison, when un is set, we always expect the
209	// result to be false. In such case OGT, OLT and OEQ is good, since they are
210	// actually testing GT, LT, and EQ respectively, which are false. OGE, OLE
211	// and ONE are tested through !lt, !gt and !eq, and these are true.
212	case CmpInst::FCMP_UEQ:
213	case CmpInst::FCMP_UGT:
214	case CmpInst::FCMP_ULT:
215	case CmpInst::FCMP_OGE:
216	case CmpInst::FCMP_OLE:
217	case CmpInst::FCMP_ONE:
218	default:
219	return std::nullopt;
220
221	case CmpInst::FCMP_OEQ:
222	case CmpInst::ICMP_EQ:
223	return PPC::PRED_EQ;
224
225	case CmpInst::FCMP_OGT:
226	case CmpInst::ICMP_UGT:
227	case CmpInst::ICMP_SGT:
228	return PPC::PRED_GT;
229
230	case CmpInst::FCMP_UGE:
231	case CmpInst::ICMP_UGE:
232	case CmpInst::ICMP_SGE:
233	return PPC::PRED_GE;
234
235	case CmpInst::FCMP_OLT:
236	case CmpInst::ICMP_ULT:
237	case CmpInst::ICMP_SLT:
238	return PPC::PRED_LT;
239
240	case CmpInst::FCMP_ULE:
241	case CmpInst::ICMP_ULE:
242	case CmpInst::ICMP_SLE:
243	return PPC::PRED_LE;
244
245	case CmpInst::FCMP_UNE:
246	case CmpInst::ICMP_NE:
247	return PPC::PRED_NE;
248
249	case CmpInst::FCMP_ORD:
250	return PPC::PRED_NU;
251
252	case CmpInst::FCMP_UNO:
253	return PPC::PRED_UN;
254	}
255	}
256
257	// Determine whether the type Ty is simple enough to be handled by
258	// fast-isel, and return its equivalent machine type in VT.
259	// FIXME: Copied directly from ARM -- factor into base class?
260	bool PPCFastISel::isTypeLegal(Type *Ty, MVT &VT) {
261	EVT Evt = TLI.getValueType(DL, Ty, AllowUnknown: true);
262
263	// Only handle simple types.
264	if (Evt == MVT::Other \|\| !Evt.isSimple()) return false;
265	VT = Evt.getSimpleVT();
266
267	// Handle all legal types, i.e. a register that will directly hold this
268	// value.
269	return TLI.isTypeLegal(VT);
270	}
271
272	// Determine whether the type Ty is simple enough to be handled by
273	// fast-isel as a load target, and return its equivalent machine type in VT.
274	bool PPCFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) {
275	if (isTypeLegal(Ty, VT)) return true;
276
277	// If this is a type than can be sign or zero-extended to a basic operation
278	// go ahead and accept it now.
279	if (VT == MVT::i8 \|\| VT == MVT::i16 \|\| VT == MVT::i32) {
280	return true;
281	}
282
283	return false;
284	}
285
286	bool PPCFastISel::isValueAvailable(const Value V) const* {
287	if (!isa<Instruction>(Val: V))
288	return true;
289
290	const auto *I = cast<Instruction>(Val: V);
291	return FuncInfo.getMBB(BB: I->getParent()) == FuncInfo.MBB;
292	}
293
294	// Given a value Obj, create an Address object Addr that represents its
295	// address. Return false if we can't handle it.
296	bool PPCFastISel::PPCComputeAddress(const Value *Obj, Address &Addr) {
297	const User U = nullptr*;
298	unsigned Opcode = Instruction::UserOp1;
299	if (const Instruction *I = dyn_cast<Instruction>(Val: Obj)) {
300	// Don't walk into other basic blocks unless the object is an alloca from
301	// another block, otherwise it may not have a virtual register assigned.
302	if (FuncInfo.StaticAllocaMap.count(Val: static_cast<const AllocaInst *>(Obj)) \|\|
303	FuncInfo.getMBB(BB: I->getParent()) == FuncInfo.MBB) {
304	Opcode = I->getOpcode();
305	U = I;
306	}
307	} else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Val: Obj)) {
308	Opcode = C->getOpcode();
309	U = C;
310	}
311
312	switch (Opcode) {
313	default:
314	break;
315	case Instruction::BitCast:
316	// Look through bitcasts.
317	return PPCComputeAddress(Obj: U->getOperand(i: `0`), Addr);
318	case Instruction::IntToPtr:
319	// Look past no-op inttoptrs.
320	if (TLI.getValueType(DL, Ty: U->getOperand(i: `0`)->getType()) ==
321	TLI.getPointerTy(DL))
322	return PPCComputeAddress(Obj: U->getOperand(i: `0`), Addr);
323	break;
324	case Instruction::PtrToInt:
325	// Look past no-op ptrtoints.
326	if (TLI.getValueType(DL, Ty: U->getType()) == TLI.getPointerTy(DL))
327	return PPCComputeAddress(Obj: U->getOperand(i: `0`), Addr);
328	break;
329	case Instruction::GetElementPtr: {
330	Address SavedAddr = Addr;
331	int64_t TmpOffset = Addr.Offset;
332
333	// Iterate through the GEP folding the constants into offsets where
334	// we can.
335	gep_type_iterator GTI = gep_type_begin(GEP: U);
336	for (User::const_op_iterator II = U->op_begin() + `1`, IE = U->op_end();
337	II != IE; ++II, ++GTI) {
338	const Value Op = II;
339	if (StructType *STy = GTI.getStructTypeOrNull()) {
340	const StructLayout *SL = DL.getStructLayout(Ty: STy);
341	unsigned Idx = cast<ConstantInt>(Val: Op)->getZExtValue();
342	TmpOffset += SL->getElementOffset(Idx);
343	} else {
344	uint64_t S = GTI.getSequentialElementStride(DL);
345	for (;;) {
346	if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val: Op)) {
347	// Constant-offset addressing.
348	TmpOffset += CI->getSExtValue() * S;
349	break;
350	}
351	if (canFoldAddIntoGEP(GEP: U, Add: Op)) {
352	// A compatible add with a constant operand. Fold the constant.
353	ConstantInt *CI =
354	cast<ConstantInt>(Val: cast<AddOperator>(Val: Op)->getOperand(i_nocapture: `1`));
355	TmpOffset += CI->getSExtValue() * S;
356	// Iterate on the other operand.
357	Op = cast<AddOperator>(Val: Op)->getOperand(i_nocapture: `0`);
358	continue;
359	}
360	// Unsupported
361	goto unsupported_gep;
362	}
363	}
364	}
365
366	// Try to grab the base operand now.
367	Addr.Offset = TmpOffset;
368	if (PPCComputeAddress(Obj: U->getOperand(i: `0`), Addr)) return true;
369
370	// We failed, restore everything and try the other options.
371	Addr = SavedAddr;
372
373	unsupported_gep:
374	break;
375	}
376	case Instruction::Alloca: {
377	const AllocaInst *AI = cast<AllocaInst>(Val: Obj);
378	auto SI = FuncInfo.StaticAllocaMap.find(Val: AI);
379	if (SI != FuncInfo.StaticAllocaMap.end()) {
380	Addr.BaseType = Address::FrameIndexBase;
381	Addr.Base.FI = SI ->second;
382	return true;
383	}
384	break;
385	}
386	}
387
388	// FIXME: References to parameters fall through to the behavior
389	// below. They should be able to reference a frame index since
390	// they are stored to the stack, so we can get "ld rx, offset(r1)"
391	// instead of "addi ry, r1, offset / ld rx, 0(ry)". Obj will
392	// just contain the parameter. Try to handle this with a FI.
393
394	// Try to get this in a register if nothing else has worked.
395	if (Addr.Base.Reg == `0`)
396	Addr.Base.Reg = getRegForValue(V: Obj);
397
398	// Prevent assignment of base register to X0, which is inappropriate
399	// for loads and stores alike.
400	if (Addr.Base.Reg != `0`)
401	MRI.setRegClass(Reg: Addr.Base.Reg, RC: &PPC::G8RC_and_G8RC_NOX0RegClass);
402
403	return Addr.Base.Reg != `0`;
404	}
405
406	// Fix up some addresses that can't be used directly. For example, if
407	// an offset won't fit in an instruction field, we may need to move it
408	// into an index register.
409	void PPCFastISel::PPCSimplifyAddress(Address &Addr, bool &UseOffset,
410	Register &IndexReg) {
411
412	// Check whether the offset fits in the instruction field.
413	if (!isInt<`16`>(x: Addr.Offset))
414	UseOffset = false;
415
416	// If this is a stack pointer and the offset needs to be simplified then
417	// put the alloca address into a register, set the base type back to
418	// register and continue. This should almost never happen.
419	if (!UseOffset && Addr.BaseType == Address::FrameIndexBase) {
420	Register ResultReg = createResultReg(RC: &PPC::G8RC_and_G8RC_NOX0RegClass);
421	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::ADDI8),
422	DestReg: ResultReg).addFrameIndex(Idx: Addr.Base.FI).addImm(Val: `0`);
423	Addr.Base.Reg = ResultReg;
424	Addr.BaseType = Address::RegBase;
425	}
426
427	if (!UseOffset) {
428	IntegerType OffsetTy = Type::getInt64Ty(C&: Context);
429	const ConstantInt *Offset = ConstantInt::getSigned(Ty: OffsetTy, V: Addr.Offset);
430	IndexReg = PPCMaterializeInt(CI: Offset, VT: MVT::i64);
431	assert(IndexReg && "Unexpected error in PPCMaterializeInt!");
432	}
433	}
434
435	// Emit a load instruction if possible, returning true if we succeeded,
436	// otherwise false. See commentary below for how the register class of
437	// the load is determined.
438	bool PPCFastISel::PPCEmitLoad(MVT VT, Register &ResultReg, Address &Addr,
439	const TargetRegisterClass *RC,
440	bool IsZExt, unsigned FP64LoadOpc) {
441	unsigned Opc;
442	bool UseOffset = true;
443	bool HasSPE = Subtarget->hasSPE();
444
445	// If ResultReg is given, it determines the register class of the load.
446	// Otherwise, RC is the register class to use. If the result of the
447	// load isn't anticipated in this block, both may be zero, in which
448	// case we must make a conservative guess. In particular, don't assign
449	// R0 or X0 to the result register, as the result may be used in a load,
450	// store, add-immediate, or isel that won't permit this. (Though
451	// perhaps the spill and reload of live-exit values would handle this?)
452	const TargetRegisterClass *UseRC =
453	(ResultReg ? MRI.getRegClass(Reg: ResultReg) :
454	(RC ? RC :
455	(VT == MVT::f64 ? (HasSPE ? &PPC::SPERCRegClass : &PPC::F8RCRegClass) :
456	(VT == MVT::f32 ? (HasSPE ? &PPC::GPRCRegClass : &PPC::F4RCRegClass) :
457	(VT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass :
458	&PPC::GPRC_and_GPRC_NOR0RegClass)))));
459
460	bool Is32BitInt = UseRC->hasSuperClassEq(RC: &PPC::GPRCRegClass);
461
462	switch (VT.SimpleTy) {
463	default: // e.g., vector types not handled
464	return false;
465	case MVT::i8:
466	Opc = Is32BitInt ? PPC::LBZ : PPC::LBZ8;
467	break;
468	case MVT::i16:
469	Opc = (IsZExt ? (Is32BitInt ? PPC::LHZ : PPC::LHZ8)
470	: (Is32BitInt ? PPC::LHA : PPC::LHA8));
471	break;
472	case MVT::i32:
473	Opc = (IsZExt ? (Is32BitInt ? PPC::LWZ : PPC::LWZ8)
474	: (Is32BitInt ? PPC::LWA_32 : PPC::LWA));
475	if ((Opc == PPC::LWA \|\| Opc == PPC::LWA_32) && ((Addr.Offset & `3`) != `0`))
476	UseOffset = false;
477	break;
478	case MVT::i64:
479	Opc = PPC::LD;
480	assert(UseRC->hasSuperClassEq(&PPC::G8RCRegClass) &&
481	"64-bit load with 32-bit target??");
482	UseOffset = ((Addr.Offset & `3`) == `0`);
483	break;
484	case MVT::f32:
485	Opc = Subtarget->hasSPE() ? PPC::SPELWZ : PPC::LFS;
486	break;
487	case MVT::f64:
488	Opc = FP64LoadOpc;
489	break;
490	}
491
492	// If necessary, materialize the offset into a register and use
493	// the indexed form. Also handle stack pointers with special needs.
494	Register IndexReg;
495	PPCSimplifyAddress(Addr, UseOffset, IndexReg);
496
497	// If this is a potential VSX load with an offset of 0, a VSX indexed load can
498	// be used.
499	bool IsVSSRC = isVSSRCRegClass(RC: UseRC);
500	bool IsVSFRC = isVSFRCRegClass(RC: UseRC);
501	bool Is32VSXLoad = IsVSSRC && Opc == PPC::LFS;
502	bool Is64VSXLoad = IsVSFRC && Opc == PPC::LFD;
503	if ((Is32VSXLoad \|\| Is64VSXLoad) &&
504	(Addr.BaseType != Address::FrameIndexBase) && UseOffset &&
505	(Addr.Offset == `0`)) {
506	UseOffset = false;
507	}
508
509	if (!ResultReg)
510	ResultReg = createResultReg(RC: UseRC);
511
512	// Note: If we still have a frame index here, we know the offset is
513	// in range, as otherwise PPCSimplifyAddress would have converted it
514	// into a RegBase.
515	if (Addr.BaseType == Address::FrameIndexBase) {
516	// VSX only provides an indexed load.
517	if (Is32VSXLoad \|\| Is64VSXLoad) return false;
518
519	MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
520	PtrInfo: MachinePointerInfo::getFixedStack(MF&: *FuncInfo.MF, FI: Addr.Base.FI,
521	Offset: Addr.Offset),
522	F: MachineMemOperand::MOLoad, Size: MFI.getObjectSize(ObjectIdx: Addr.Base.FI),
523	BaseAlignment: MFI.getObjectAlign(ObjectIdx: Addr.Base.FI));
524
525	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg: ResultReg)
526	.addImm(Val: Addr.Offset).addFrameIndex(Idx: Addr.Base.FI).addMemOperand(MMO);
527
528	// Base reg with offset in range.
529	} else if (UseOffset) {
530	// VSX only provides an indexed load.
531	if (Is32VSXLoad \|\| Is64VSXLoad) return false;
532
533	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg: ResultReg)
534	.addImm(Val: Addr.Offset).addReg(RegNo: Addr.Base.Reg);
535
536	// Indexed form.
537	} else {
538	// Get the RR opcode corresponding to the RI one. FIXME: It would be
539	// preferable to use the ImmToIdxMap from PPCRegisterInfo.cpp, but it
540	// is hard to get at.
541	switch (Opc) {
542	default: llvm_unreachable("Unexpected opcode!");
543	case PPC::LBZ: Opc = PPC::LBZX; break;
544	case PPC::LBZ8: Opc = PPC::LBZX8; break;
545	case PPC::LHZ: Opc = PPC::LHZX; break;
546	case PPC::LHZ8: Opc = PPC::LHZX8; break;
547	case PPC::LHA: Opc = PPC::LHAX; break;
548	case PPC::LHA8: Opc = PPC::LHAX8; break;
549	case PPC::LWZ: Opc = PPC::LWZX; break;
550	case PPC::LWZ8: Opc = PPC::LWZX8; break;
551	case PPC::LWA: Opc = PPC::LWAX; break;
552	case PPC::LWA_32: Opc = PPC::LWAX_32; break;
553	case PPC::LD: Opc = PPC::LDX; break;
554	case PPC::LFS: Opc = IsVSSRC ? PPC::LXSSPX : PPC::LFSX; break;
555	case PPC::LFD: Opc = IsVSFRC ? PPC::LXSDX : PPC::LFDX; break;
556	case PPC::EVLDD: Opc = PPC::EVLDDX; break;
557	case PPC::SPELWZ: Opc = PPC::SPELWZX; break;
558	}
559
560	auto MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc),
561	DestReg: ResultReg);
562
563	// If we have an index register defined we use it in the store inst,
564	// otherwise we use X0 as base as it makes the vector instructions to
565	// use zero in the computation of the effective address regardless the
566	// content of the register.
567	if (IndexReg)
568	MIB.addReg(RegNo: Addr.Base.Reg).addReg(RegNo: IndexReg);
569	else
570	MIB.addReg(RegNo: PPC::ZERO8).addReg(RegNo: Addr.Base.Reg);
571	}
572
573	return true;
574	}
575
576	// Attempt to fast-select a load instruction.
577	bool PPCFastISel::SelectLoad(const Instruction *I) {
578	// FIXME: No atomic loads are supported.
579	if (cast<LoadInst>(Val: I)->isAtomic())
580	return false;
581
582	// Verify we have a legal type before going any further.
583	MVT VT;
584	if (!isLoadTypeLegal(Ty: I->getType(), VT))
585	return false;
586
587	// See if we can handle this address.
588	Address Addr;
589	if (!PPCComputeAddress(Obj: I->getOperand(i: `0`), Addr))
590	return false;
591
592	// Look at the currently assigned register for this instruction
593	// to determine the required register class. This is necessary
594	// to constrain RA from using R0/X0 when this is not legal.
595	Register AssignedReg = FuncInfo.ValueMap [I];
596	const TargetRegisterClass *RC =
597	AssignedReg ? MRI.getRegClass(Reg: AssignedReg) : nullptr;
598
599	Register ResultReg = `0`;
600	if (!PPCEmitLoad(VT, ResultReg, Addr, RC, IsZExt: true,
601	FP64LoadOpc: Subtarget->hasSPE() ? PPC::EVLDD : PPC::LFD))
602	return false;
603	updateValueMap(I, Reg: ResultReg);
604	return true;
605	}
606
607	// Emit a store instruction to store SrcReg at Addr.
608	bool PPCFastISel::PPCEmitStore(MVT VT, Register SrcReg, Address &Addr) {
609	assert(SrcReg && "Nothing to store!");
610	unsigned Opc;
611	bool UseOffset = true;
612
613	const TargetRegisterClass *RC = MRI.getRegClass(Reg: SrcReg);
614	bool Is32BitInt = RC->hasSuperClassEq(RC: &PPC::GPRCRegClass);
615
616	switch (VT.SimpleTy) {
617	default: // e.g., vector types not handled
618	return false;
619	case MVT::i8:
620	Opc = Is32BitInt ? PPC::STB : PPC::STB8;
621	break;
622	case MVT::i16:
623	Opc = Is32BitInt ? PPC::STH : PPC::STH8;
624	break;
625	case MVT::i32:
626	assert(Is32BitInt && "Not GPRC for i32??");
627	Opc = PPC::STW;
628	break;
629	case MVT::i64:
630	Opc = PPC::STD;
631	UseOffset = ((Addr.Offset & `3`) == `0`);
632	break;
633	case MVT::f32:
634	Opc = Subtarget->hasSPE() ? PPC::SPESTW : PPC::STFS;
635	break;
636	case MVT::f64:
637	Opc = Subtarget->hasSPE() ? PPC::EVSTDD : PPC::STFD;
638	break;
639	}
640
641	// If necessary, materialize the offset into a register and use
642	// the indexed form. Also handle stack pointers with special needs.
643	Register IndexReg;
644	PPCSimplifyAddress(Addr, UseOffset, IndexReg);
645
646	// If this is a potential VSX store with an offset of 0, a VSX indexed store
647	// can be used.
648	bool IsVSSRC = isVSSRCRegClass(RC);
649	bool IsVSFRC = isVSFRCRegClass(RC);
650	bool Is32VSXStore = IsVSSRC && Opc == PPC::STFS;
651	bool Is64VSXStore = IsVSFRC && Opc == PPC::STFD;
652	if ((Is32VSXStore \|\| Is64VSXStore) &&
653	(Addr.BaseType != Address::FrameIndexBase) && UseOffset &&
654	(Addr.Offset == `0`)) {
655	UseOffset = false;
656	}
657
658	// Note: If we still have a frame index here, we know the offset is
659	// in range, as otherwise PPCSimplifyAddress would have converted it
660	// into a RegBase.
661	if (Addr.BaseType == Address::FrameIndexBase) {
662	// VSX only provides an indexed store.
663	if (Is32VSXStore \|\| Is64VSXStore) return false;
664
665	MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
666	PtrInfo: MachinePointerInfo::getFixedStack(MF&: *FuncInfo.MF, FI: Addr.Base.FI,
667	Offset: Addr.Offset),
668	F: MachineMemOperand::MOStore, Size: MFI.getObjectSize(ObjectIdx: Addr.Base.FI),
669	BaseAlignment: MFI.getObjectAlign(ObjectIdx: Addr.Base.FI));
670
671	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc))
672	.addReg(RegNo: SrcReg)
673	.addImm(Val: Addr.Offset)
674	.addFrameIndex(Idx: Addr.Base.FI)
675	.addMemOperand(MMO);
676
677	// Base reg with offset in range.
678	} else if (UseOffset) {
679	// VSX only provides an indexed store.
680	if (Is32VSXStore \|\| Is64VSXStore)
681	return false;
682
683	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc))
684	.addReg(RegNo: SrcReg).addImm(Val: Addr.Offset).addReg(RegNo: Addr.Base.Reg);
685
686	// Indexed form.
687	} else {
688	// Get the RR opcode corresponding to the RI one. FIXME: It would be
689	// preferable to use the ImmToIdxMap from PPCRegisterInfo.cpp, but it
690	// is hard to get at.
691	switch (Opc) {
692	default: llvm_unreachable("Unexpected opcode!");
693	case PPC::STB: Opc = PPC::STBX; break;
694	case PPC::STH : Opc = PPC::STHX; break;
695	case PPC::STW : Opc = PPC::STWX; break;
696	case PPC::STB8: Opc = PPC::STBX8; break;
697	case PPC::STH8: Opc = PPC::STHX8; break;
698	case PPC::STW8: Opc = PPC::STWX8; break;
699	case PPC::STD: Opc = PPC::STDX; break;
700	case PPC::STFS: Opc = IsVSSRC ? PPC::STXSSPX : PPC::STFSX; break;
701	case PPC::STFD: Opc = IsVSFRC ? PPC::STXSDX : PPC::STFDX; break;
702	case PPC::EVSTDD: Opc = PPC::EVSTDDX; break;
703	case PPC::SPESTW: Opc = PPC::SPESTWX; break;
704	}
705
706	auto MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc))
707	.addReg(RegNo: SrcReg);
708
709	// If we have an index register defined we use it in the store inst,
710	// otherwise we use X0 as base as it makes the vector instructions to
711	// use zero in the computation of the effective address regardless the
712	// content of the register.
713	if (IndexReg)
714	MIB.addReg(RegNo: Addr.Base.Reg).addReg(RegNo: IndexReg);
715	else
716	MIB.addReg(RegNo: PPC::ZERO8).addReg(RegNo: Addr.Base.Reg);
717	}
718
719	return true;
720	}
721
722	// Attempt to fast-select a store instruction.
723	bool PPCFastISel::SelectStore(const Instruction *I) {
724	Value *Op0 = I->getOperand(i: `0`);
725	Register SrcReg;
726
727	// FIXME: No atomics loads are supported.
728	if (cast<StoreInst>(Val: I)->isAtomic())
729	return false;
730
731	// Verify we have a legal type before going any further.
732	MVT VT;
733	if (!isLoadTypeLegal(Ty: Op0->getType(), VT))
734	return false;
735
736	// Get the value to be stored into a register.
737	SrcReg = getRegForValue(V: Op0);
738	if (!SrcReg)
739	return false;
740
741	// See if we can handle this address.
742	Address Addr;
743	if (!PPCComputeAddress(Obj: I->getOperand(i: `1`), Addr))
744	return false;
745
746	if (!PPCEmitStore(VT, SrcReg, Addr))
747	return false;
748
749	return true;
750	}
751
752	// Attempt to fast-select a branch instruction.
753	bool PPCFastISel::SelectBranch(const Instruction *I) {
754	const CondBrInst *BI = cast<CondBrInst>(Val: I);
755	MachineBasicBlock *BrBB = FuncInfo.MBB;
756	MachineBasicBlock *TBB = FuncInfo.getMBB(BB: BI->getSuccessor(i: `0`));
757	MachineBasicBlock *FBB = FuncInfo.getMBB(BB: BI->getSuccessor(i: `1`));
758
759	// For now, just try the simplest case where it's fed by a compare.
760	if (const CmpInst *CI = dyn_cast<CmpInst>(Val: BI->getCondition())) {
761	if (isValueAvailable(V: CI)) {
762	std::optional<PPC::Predicate> OptPPCPred =
763	getComparePred(Pred: CI->getPredicate());
764	if (!OptPPCPred)
765	return false;
766
767	PPC::Predicate PPCPred = *OptPPCPred;
768
769	// Take advantage of fall-through opportunities.
770	if (FuncInfo.MBB->isLayoutSuccessor(MBB: TBB)) {
771	std::swap(a&: TBB, b&: FBB);
772	PPCPred = PPC::InvertPredicate(Opcode: PPCPred);
773	}
774
775	Register CondReg = createResultReg(RC: &PPC::CRRCRegClass);
776
777	if (!PPCEmitCmp(Src1Value: CI->getOperand(i_nocapture: `0`), Src2Value: CI->getOperand(i_nocapture: `1`), isZExt: CI->isUnsigned(),
778	DestReg: CondReg, Pred: PPCPred))
779	return false;
780
781	BuildMI(BB&: *BrBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::BCC))
782	.addImm(Val: Subtarget->hasSPE() ? PPC::PRED_SPE : PPCPred)
783	.addReg(RegNo: CondReg)
784	.addMBB(MBB: TBB);
785	finishCondBranch(BranchBB: BI->getParent(), TrueMBB: TBB, FalseMBB: FBB);
786	return true;
787	}
788	} else if (const ConstantInt *CI =
789	dyn_cast<ConstantInt>(Val: BI->getCondition())) {
790	uint64_t Imm = CI->getZExtValue();
791	MachineBasicBlock *Target = (Imm == `0`) ? FBB : TBB;
792	fastEmitBranch(MSucc: Target, DbgLoc: MIMD.getDL());
793	return true;
794	}
795
796	// FIXME: ARM looks for a case where the block containing the compare
797	// has been split from the block containing the branch. If this happens,
798	// there is a vreg available containing the result of the compare. I'm
799	// not sure we can do much, as we've lost the predicate information with
800	// the compare instruction -- we have a 4-bit CR but don't know which bit
801	// to test here.
802	return false;
803	}
804
805	// Attempt to emit a compare of the two source values. Signed and unsigned
806	// comparisons are supported. Return false if we can't handle it.
807	bool PPCFastISel::PPCEmitCmp(const Value SrcValue1, const* Value *SrcValue2,
808	bool IsZExt, Register DestReg,
809	const PPC::Predicate Pred) {
810	Type *Ty = SrcValue1->getType();
811	EVT SrcEVT = TLI.getValueType(DL, Ty, AllowUnknown: true);
812	if (!SrcEVT.isSimple())
813	return false;
814	MVT SrcVT = SrcEVT.getSimpleVT();
815
816	if (SrcVT == MVT::i1 && Subtarget->useCRBits())
817	return false;
818
819	// See if operand 2 is an immediate encodeable in the compare.
820	// FIXME: Operands are not in canonical order at -O0, so an immediate
821	// operand in position 1 is a lost opportunity for now. We are
822	// similar to ARM in this regard.
823	int64_t Imm = `0`;
824	bool UseImm = false;
825	const bool HasSPE = Subtarget->hasSPE();
826
827	// Only 16-bit integer constants can be represented in compares for
828	// PowerPC. Others will be materialized into a register.
829	if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(Val: SrcValue2)) {
830	if (SrcVT == MVT::i64 \|\| SrcVT == MVT::i32 \|\| SrcVT == MVT::i16 \|\|
831	SrcVT == MVT::i8 \|\| SrcVT == MVT::i1) {
832	const APInt &CIVal = ConstInt->getValue();
833	Imm = (IsZExt) ? (int64_t)CIVal.getZExtValue() :
834	(int64_t)CIVal.getSExtValue();
835	if ((IsZExt && isUInt<`16`>(x: Imm)) \|\| (!IsZExt && isInt<`16`>(x: Imm)))
836	UseImm = true;
837	}
838	}
839
840	Register SrcReg1 = getRegForValue(V: SrcValue1);
841	if (!SrcReg1)
842	return false;
843
844	Register SrcReg2;
845	if (!UseImm) {
846	SrcReg2 = getRegForValue(V: SrcValue2);
847	if (!SrcReg2)
848	return false;
849	}
850
851	unsigned CmpOpc;
852	bool NeedsExt = false;
853
854	auto RC1 = MRI.getRegClass(Reg: SrcReg1);
855	auto RC2 = SrcReg2 != `0` ? MRI.getRegClass(Reg: SrcReg2) : nullptr;
856
857	switch (SrcVT.SimpleTy) {
858	default: return false;
859	case MVT::f32:
860	if (HasSPE) {
861	switch (Pred) {
862	default: return false;
863	case PPC::PRED_EQ:
864	CmpOpc = PPC::EFSCMPEQ;
865	break;
866	case PPC::PRED_LT:
867	CmpOpc = PPC::EFSCMPLT;
868	break;
869	case PPC::PRED_GT:
870	CmpOpc = PPC::EFSCMPGT;
871	break;
872	}
873	} else {
874	CmpOpc = PPC::FCMPUS;
875	if (isVSSRCRegClass(RC: RC1))
876	SrcReg1 = copyRegToRegClass(ToRC: &PPC::F4RCRegClass, SrcReg: SrcReg1);
877	if (RC2 && isVSSRCRegClass(RC: RC2))
878	SrcReg2 = copyRegToRegClass(ToRC: &PPC::F4RCRegClass, SrcReg: SrcReg2);
879	}
880	break;
881	case MVT::f64:
882	if (HasSPE) {
883	switch (Pred) {
884	default: return false;
885	case PPC::PRED_EQ:
886	CmpOpc = PPC::EFDCMPEQ;
887	break;
888	case PPC::PRED_LT:
889	CmpOpc = PPC::EFDCMPLT;
890	break;
891	case PPC::PRED_GT:
892	CmpOpc = PPC::EFDCMPGT;
893	break;
894	}
895	} else if (isVSFRCRegClass(RC: RC1) \|\| (RC2 && isVSFRCRegClass(RC: RC2))) {
896	CmpOpc = PPC::XSCMPUDP;
897	} else {
898	CmpOpc = PPC::FCMPUD;
899	}
900	break;
901	case MVT::i1:
902	case MVT::i8:
903	case MVT::i16:
904	NeedsExt = true;
905	[[fallthrough]];
906	case MVT::i32:
907	if (!UseImm)
908	CmpOpc = IsZExt ? PPC::CMPLW : PPC::CMPW;
909	else
910	CmpOpc = IsZExt ? PPC::CMPLWI : PPC::CMPWI;
911	break;
912	case MVT::i64:
913	if (!UseImm)
914	CmpOpc = IsZExt ? PPC::CMPLD : PPC::CMPD;
915	else
916	CmpOpc = IsZExt ? PPC::CMPLDI : PPC::CMPDI;
917	break;
918	}
919
920	if (NeedsExt) {
921	Register ExtReg = createResultReg(RC: &PPC::GPRCRegClass);
922	if (!PPCEmitIntExt(SrcVT, SrcReg: SrcReg1, DestVT: MVT::i32, DestReg: ExtReg, IsZExt))
923	return false;
924	SrcReg1 = ExtReg;
925
926	if (!UseImm) {
927	Register ExtReg = createResultReg(RC: &PPC::GPRCRegClass);
928	if (!PPCEmitIntExt(SrcVT, SrcReg: SrcReg2, DestVT: MVT::i32, DestReg: ExtReg, IsZExt))
929	return false;
930	SrcReg2 = ExtReg;
931	}
932	}
933
934	if (!UseImm)
935	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: CmpOpc), DestReg)
936	.addReg(RegNo: SrcReg1).addReg(RegNo: SrcReg2);
937	else
938	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: CmpOpc), DestReg)
939	.addReg(RegNo: SrcReg1).addImm(Val: Imm);
940
941	return true;
942	}
943
944	// Attempt to fast-select a floating-point extend instruction.
945	bool PPCFastISel::SelectFPExt(const Instruction *I) {
946	Value *Src = I->getOperand(i: `0`);
947	EVT SrcVT = TLI.getValueType(DL, Ty: Src->getType(), AllowUnknown: true);
948	EVT DestVT = TLI.getValueType(DL, Ty: I->getType(), AllowUnknown: true);
949
950	if (SrcVT != MVT::f32 \|\| DestVT != MVT::f64)
951	return false;
952
953	Register SrcReg = getRegForValue(V: Src);
954	if (!SrcReg)
955	return false;
956
957	// No code is generated for a FP extend.
958	updateValueMap(I, Reg: SrcReg);
959	return true;
960	}
961
962	// Attempt to fast-select a floating-point truncate instruction.
963	bool PPCFastISel::SelectFPTrunc(const Instruction *I) {
964	Value *Src = I->getOperand(i: `0`);
965	EVT SrcVT = TLI.getValueType(DL, Ty: Src->getType(), AllowUnknown: true);
966	EVT DestVT = TLI.getValueType(DL, Ty: I->getType(), AllowUnknown: true);
967
968	if (SrcVT != MVT::f64 \|\| DestVT != MVT::f32)
969	return false;
970
971	Register SrcReg = getRegForValue(V: Src);
972	if (!SrcReg)
973	return false;
974
975	// Round the result to single precision.
976	Register DestReg;
977	auto RC = MRI.getRegClass(Reg: SrcReg);
978	if (Subtarget->hasSPE()) {
979	DestReg = createResultReg(RC: &PPC::GPRCRegClass);
980	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::EFSCFD),
981	DestReg)
982	.addReg(RegNo: SrcReg);
983	} else if (Subtarget->hasP8Vector() && isVSFRCRegClass(RC)) {
984	DestReg = createResultReg(RC: &PPC::VSSRCRegClass);
985	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::XSRSP),
986	DestReg)
987	.addReg(RegNo: SrcReg);
988	} else {
989	SrcReg = copyRegToRegClass(ToRC: &PPC::F8RCRegClass, SrcReg);
990	DestReg = createResultReg(RC: &PPC::F4RCRegClass);
991	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
992	MCID: TII.get(Opcode: PPC::FRSP), DestReg)
993	.addReg(RegNo: SrcReg);
994	}
995
996	updateValueMap(I, Reg: DestReg);
997	return true;
998	}
999
1000	// Move an i32 or i64 value in a GPR to an f64 value in an FPR.
1001	// FIXME: When direct register moves are implemented (see PowerISA 2.07),
1002	// those should be used instead of moving via a stack slot when the
1003	// subtarget permits.
1004	// FIXME: The code here is sloppy for the 4-byte case. Can use a 4-byte
1005	// stack slot and 4-byte store/load sequence. Or just sext the 4-byte
1006	// case to 8 bytes which produces tighter code but wastes stack space.
1007	Register PPCFastISel::PPCMoveToFPReg(MVT SrcVT, Register SrcReg,
1008	bool IsSigned) {
1009
1010	// If necessary, extend 32-bit int to 64-bit.
1011	if (SrcVT == MVT::i32) {
1012	Register TmpReg = createResultReg(RC: &PPC::G8RCRegClass);
1013	if (!PPCEmitIntExt(SrcVT: MVT::i32, SrcReg, DestVT: MVT::i64, DestReg: TmpReg, IsZExt: !IsSigned))
1014	return Register ();
1015	SrcReg = TmpReg;
1016	}
1017
1018	// Get a stack slot 8 bytes wide, aligned on an 8-byte boundary.
1019	Address Addr;
1020	Addr.BaseType = Address::FrameIndexBase;
1021	Addr.Base.FI = MFI.CreateStackObject(Size: `8`, Alignment: Align (`8`), isSpillSlot: false);
1022
1023	// Store the value from the GPR.
1024	if (!PPCEmitStore(VT: MVT::i64, SrcReg, Addr))
1025	return Register ();
1026
1027	// Load the integer value into an FPR. The kind of load used depends
1028	// on a number of conditions.
1029	unsigned LoadOpc = PPC::LFD;
1030
1031	if (SrcVT == MVT::i32) {
1032	if (!IsSigned) {
1033	LoadOpc = PPC::LFIWZX;
1034	Addr.Offset = (Subtarget->isLittleEndian()) ? `0` : `4`;
1035	} else if (Subtarget->hasLFIWAX()) {
1036	LoadOpc = PPC::LFIWAX;
1037	Addr.Offset = (Subtarget->isLittleEndian()) ? `0` : `4`;
1038	}
1039	}
1040
1041	const TargetRegisterClass *RC = &PPC::F8RCRegClass;
1042	Register ResultReg;
1043	if (!PPCEmitLoad(VT: MVT::f64, ResultReg, Addr, RC, IsZExt: !IsSigned, FP64LoadOpc: LoadOpc))
1044	return Register ();
1045
1046	return ResultReg;
1047	}
1048
1049	// Attempt to fast-select an integer-to-floating-point conversion.
1050	// FIXME: Once fast-isel has better support for VSX, conversions using
1051	// direct moves should be implemented.
1052	bool PPCFastISel::SelectIToFP(const Instruction I, bool* IsSigned) {
1053	MVT DstVT;
1054	Type *DstTy = I->getType();
1055	if (!isTypeLegal(Ty: DstTy, VT&: DstVT))
1056	return false;
1057
1058	if (DstVT != MVT::f32 && DstVT != MVT::f64)
1059	return false;
1060
1061	Value *Src = I->getOperand(i: `0`);
1062	EVT SrcEVT = TLI.getValueType(DL, Ty: Src->getType(), AllowUnknown: true);
1063	if (!SrcEVT.isSimple())
1064	return false;
1065
1066	MVT SrcVT = SrcEVT.getSimpleVT();
1067
1068	if (SrcVT != MVT::i8 && SrcVT != MVT::i16 &&
1069	SrcVT != MVT::i32 && SrcVT != MVT::i64)
1070	return false;
1071
1072	Register SrcReg = getRegForValue(V: Src);
1073	if (!SrcReg)
1074	return false;
1075
1076	// Shortcut for SPE. Doesn't need to store/load, since it's all in the GPRs
1077	if (Subtarget->hasSPE()) {
1078	unsigned Opc;
1079	if (DstVT == MVT::f32)
1080	Opc = IsSigned ? PPC::EFSCFSI : PPC::EFSCFUI;
1081	else
1082	Opc = IsSigned ? PPC::EFDCFSI : PPC::EFDCFUI;
1083
1084	Register DestReg = createResultReg(RC: &PPC::SPERCRegClass);
1085	// Generate the convert.
1086	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg)
1087	.addReg(RegNo: SrcReg);
1088	updateValueMap(I, Reg: DestReg);
1089	return true;
1090	}
1091
1092	// We can only lower an unsigned convert if we have the newer
1093	// floating-point conversion operations.
1094	if (!IsSigned && !Subtarget->hasFPCVT())
1095	return false;
1096
1097	// FIXME: For now we require the newer floating-point conversion operations
1098	// (which are present only on P7 and A2 server models) when converting
1099	// to single-precision float. Otherwise we have to generate a lot of
1100	// fiddly code to avoid double rounding. If necessary, the fiddly code
1101	// can be found in PPCTargetLowering::LowerINT_TO_FP().
1102	if (DstVT == MVT::f32 && !Subtarget->hasFPCVT())
1103	return false;
1104
1105	// Extend the input if necessary.
1106	if (SrcVT == MVT::i8 \|\| SrcVT == MVT::i16) {
1107	Register TmpReg = createResultReg(RC: &PPC::G8RCRegClass);
1108	if (!PPCEmitIntExt(SrcVT, SrcReg, DestVT: MVT::i64, DestReg: TmpReg, IsZExt: !IsSigned))
1109	return false;
1110	SrcVT = MVT::i64;
1111	SrcReg = TmpReg;
1112	}
1113
1114	// Move the integer value to an FPR.
1115	Register FPReg = PPCMoveToFPReg(SrcVT, SrcReg, IsSigned);
1116	if (!FPReg)
1117	return false;
1118
1119	// Determine the opcode for the conversion.
1120	const TargetRegisterClass *RC = &PPC::F8RCRegClass;
1121	Register DestReg = createResultReg(RC);
1122	unsigned Opc;
1123
1124	if (DstVT == MVT::f32)
1125	Opc = IsSigned ? PPC::FCFIDS : PPC::FCFIDUS;
1126	else
1127	Opc = IsSigned ? PPC::FCFID : PPC::FCFIDU;
1128
1129	// Generate the convert.
1130	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg)
1131	.addReg(RegNo: FPReg);
1132
1133	updateValueMap(I, Reg: DestReg);
1134	return true;
1135	}
1136
1137	// Move the floating-point value in SrcReg into an integer destination
1138	// register, and return the register (or zero if we can't handle it).
1139	// FIXME: When direct register moves are implemented (see PowerISA 2.07),
1140	// those should be used instead of moving via a stack slot when the
1141	// subtarget permits.
1142	Register PPCFastISel::PPCMoveToIntReg(const Instruction *I, MVT VT,
1143	Register SrcReg, bool IsSigned) {
1144	// Get a stack slot 8 bytes wide, aligned on an 8-byte boundary.
1145	// Note that if have STFIWX available, we could use a 4-byte stack
1146	// slot for i32, but this being fast-isel we'll just go with the
1147	// easiest code gen possible.
1148	Address Addr;
1149	Addr.BaseType = Address::FrameIndexBase;
1150	Addr.Base.FI = MFI.CreateStackObject(Size: `8`, Alignment: Align (`8`), isSpillSlot: false);
1151
1152	// Store the value from the FPR.
1153	if (!PPCEmitStore(VT: MVT::f64, SrcReg, Addr))
1154	return Register ();
1155
1156	// Reload it into a GPR. If we want an i32 on big endian, modify the
1157	// address to have a 4-byte offset so we load from the right place.
1158	if (VT == MVT::i32)
1159	Addr.Offset = (Subtarget->isLittleEndian()) ? `0` : `4`;
1160
1161	// Look at the currently assigned register for this instruction
1162	// to determine the required register class.
1163	Register AssignedReg = FuncInfo.ValueMap [I];
1164	const TargetRegisterClass *RC =
1165	AssignedReg ? MRI.getRegClass(Reg: AssignedReg) : nullptr;
1166
1167	Register ResultReg;
1168	if (!PPCEmitLoad(VT, ResultReg, Addr, RC, IsZExt: !IsSigned))
1169	return Register ();
1170
1171	return ResultReg;
1172	}
1173
1174	// Attempt to fast-select a floating-point-to-integer conversion.
1175	// FIXME: Once fast-isel has better support for VSX, conversions using
1176	// direct moves should be implemented.
1177	bool PPCFastISel::SelectFPToI(const Instruction I, bool* IsSigned) {
1178	MVT DstVT, SrcVT;
1179	Type *DstTy = I->getType();
1180	if (!isTypeLegal(Ty: DstTy, VT&: DstVT))
1181	return false;
1182
1183	if (DstVT != MVT::i32 && DstVT != MVT::i64)
1184	return false;
1185
1186	// If we don't have FCTIDUZ, or SPE, and we need it, punt to SelectionDAG.
1187	if (DstVT == MVT::i64 && !IsSigned && !Subtarget->hasFPCVT() &&
1188	!Subtarget->hasSPE())
1189	return false;
1190
1191	Value *Src = I->getOperand(i: `0`);
1192	Type *SrcTy = Src->getType();
1193	if (!isTypeLegal(Ty: SrcTy, VT&: SrcVT))
1194	return false;
1195
1196	if (SrcVT != MVT::f32 && SrcVT != MVT::f64)
1197	return false;
1198
1199	Register SrcReg = getRegForValue(V: Src);
1200	if (!SrcReg)
1201	return false;
1202
1203	// Convert f32 to f64 or convert VSSRC to VSFRC if necessary. This is just a
1204	// meaningless copy to get the register class right.
1205	const TargetRegisterClass *InRC = MRI.getRegClass(Reg: SrcReg);
1206	if (InRC == &PPC::F4RCRegClass)
1207	SrcReg = copyRegToRegClass(ToRC: &PPC::F8RCRegClass, SrcReg);
1208	else if (InRC == &PPC::VSSRCRegClass)
1209	SrcReg = copyRegToRegClass(ToRC: &PPC::VSFRCRegClass, SrcReg);
1210
1211	// Determine the opcode for the conversion, which takes place
1212	// entirely within FPRs or VSRs.
1213	Register DestReg;
1214	unsigned Opc;
1215	auto RC = MRI.getRegClass(Reg: SrcReg);
1216
1217	if (Subtarget->hasSPE()) {
1218	DestReg = createResultReg(RC: &PPC::GPRCRegClass);
1219	if (IsSigned)
1220	Opc = InRC == &PPC::GPRCRegClass ? PPC::EFSCTSIZ : PPC::EFDCTSIZ;
1221	else
1222	Opc = InRC == &PPC::GPRCRegClass ? PPC::EFSCTUIZ : PPC::EFDCTUIZ;
1223	} else if (isVSFRCRegClass(RC)) {
1224	DestReg = createResultReg(RC: &PPC::VSFRCRegClass);
1225	if (DstVT == MVT::i32)
1226	Opc = IsSigned ? PPC::XSCVDPSXWS : PPC::XSCVDPUXWS;
1227	else
1228	Opc = IsSigned ? PPC::XSCVDPSXDS : PPC::XSCVDPUXDS;
1229	} else {
1230	DestReg = createResultReg(RC: &PPC::F8RCRegClass);
1231	if (DstVT == MVT::i32)
1232	if (IsSigned)
1233	Opc = PPC::FCTIWZ;
1234	else
1235	Opc = Subtarget->hasFPCVT() ? PPC::FCTIWUZ : PPC::FCTIDZ;
1236	else
1237	Opc = IsSigned ? PPC::FCTIDZ : PPC::FCTIDUZ;
1238	}
1239
1240	// Generate the convert.
1241	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg)
1242	.addReg(RegNo: SrcReg);
1243
1244	// Now move the integer value from a float register to an integer register.
1245	Register IntReg = Subtarget->hasSPE()
1246	? DestReg
1247	: PPCMoveToIntReg(I, VT: DstVT, SrcReg: DestReg, IsSigned);
1248
1249	if (!IntReg)
1250	return false;
1251
1252	updateValueMap(I, Reg: IntReg);
1253	return true;
1254	}
1255
1256	// Attempt to fast-select a binary integer operation that isn't already
1257	// handled automatically.
1258	bool PPCFastISel::SelectBinaryIntOp(const Instruction I, unsigned* ISDOpcode) {
1259	EVT DestVT = TLI.getValueType(DL, Ty: I->getType(), AllowUnknown: true);
1260
1261	// We can get here in the case when we have a binary operation on a non-legal
1262	// type and the target independent selector doesn't know how to handle it.
1263	if (DestVT != MVT::i16 && DestVT != MVT::i8)
1264	return false;
1265
1266	// Look at the currently assigned register for this instruction
1267	// to determine the required register class. If there is no register,
1268	// make a conservative choice (don't assign R0).
1269	Register AssignedReg = FuncInfo.ValueMap [I];
1270	const TargetRegisterClass *RC =
1271	(AssignedReg ? MRI.getRegClass(Reg: AssignedReg) :
1272	&PPC::GPRC_and_GPRC_NOR0RegClass);
1273	bool IsGPRC = RC->hasSuperClassEq(RC: &PPC::GPRCRegClass);
1274
1275	unsigned Opc;
1276	switch (ISDOpcode) {
1277	default: return false;
1278	case ISD::ADD:
1279	Opc = IsGPRC ? PPC::ADD4 : PPC::ADD8;
1280	break;
1281	case ISD::OR:
1282	Opc = IsGPRC ? PPC::OR : PPC::OR8;
1283	break;
1284	case ISD::SUB:
1285	Opc = IsGPRC ? PPC::SUBF : PPC::SUBF8;
1286	break;
1287	}
1288
1289	Register ResultReg = createResultReg(RC: RC ? RC : &PPC::G8RCRegClass);
1290	Register SrcReg1 = getRegForValue(V: I->getOperand(i: `0`));
1291	if (!SrcReg1)
1292	return false;
1293
1294	// Handle case of small immediate operand.
1295	if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(Val: I->getOperand(i: `1`))) {
1296	const APInt &CIVal = ConstInt->getValue();
1297	int Imm = (int)CIVal.getSExtValue();
1298	bool UseImm = true;
1299	if (isInt<`16`>(x: Imm)) {
1300	switch (Opc) {
1301	default:
1302	llvm_unreachable("Missing case!");
1303	case PPC::ADD4:
1304	Opc = PPC::ADDI;
1305	MRI.setRegClass(Reg: SrcReg1, RC: &PPC::GPRC_and_GPRC_NOR0RegClass);
1306	break;
1307	case PPC::ADD8:
1308	Opc = PPC::ADDI8;
1309	MRI.setRegClass(Reg: SrcReg1, RC: &PPC::G8RC_and_G8RC_NOX0RegClass);
1310	break;
1311	case PPC::OR:
1312	Opc = PPC::ORI;
1313	break;
1314	case PPC::OR8:
1315	Opc = PPC::ORI8;
1316	break;
1317	case PPC::SUBF:
1318	if (Imm == -`32768`)
1319	UseImm = false;
1320	else {
1321	Opc = PPC::ADDI;
1322	MRI.setRegClass(Reg: SrcReg1, RC: &PPC::GPRC_and_GPRC_NOR0RegClass);
1323	Imm = -Imm;
1324	}
1325	break;
1326	case PPC::SUBF8:
1327	if (Imm == -`32768`)
1328	UseImm = false;
1329	else {
1330	Opc = PPC::ADDI8;
1331	MRI.setRegClass(Reg: SrcReg1, RC: &PPC::G8RC_and_G8RC_NOX0RegClass);
1332	Imm = -Imm;
1333	}
1334	break;
1335	}
1336
1337	if (UseImm) {
1338	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc),
1339	DestReg: ResultReg)
1340	.addReg(RegNo: SrcReg1)
1341	.addImm(Val: Imm);
1342	updateValueMap(I, Reg: ResultReg);
1343	return true;
1344	}
1345	}
1346	}
1347
1348	// Reg-reg case.
1349	Register SrcReg2 = getRegForValue(V: I->getOperand(i: `1`));
1350	if (!SrcReg2)
1351	return false;
1352
1353	// Reverse operands for subtract-from.
1354	if (ISDOpcode == ISD::SUB)
1355	std::swap(a&: SrcReg1, b&: SrcReg2);
1356
1357	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg: ResultReg)
1358	.addReg(RegNo: SrcReg1).addReg(RegNo: SrcReg2);
1359	updateValueMap(I, Reg: ResultReg);
1360	return true;
1361	}
1362
1363	// Handle arguments to a call that we're attempting to fast-select.
1364	// Return false if the arguments are too complex for us at the moment.
1365	bool PPCFastISel::processCallArgs(SmallVectorImpl<Value *> &Args,
1366	SmallVectorImpl<Register> &ArgRegs,
1367	SmallVectorImpl<MVT> &ArgVTs,
1368	SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
1369	SmallVectorImpl<unsigned> &RegArgs,
1370	CallingConv::ID CC, unsigned &NumBytes,
1371	bool IsVarArg) {
1372	SmallVector<CCValAssign, `16`> ArgLocs;
1373	CCState CCInfo(CC, IsVarArg, FuncInfo.MF, ArgLocs, Context);
1374
1375	// Reserve space for the linkage area on the stack.
1376	unsigned LinkageSize = Subtarget->getFrameLowering()->getLinkageSize();
1377	CCInfo.AllocateStack(Size: LinkageSize, Alignment: Align (`8`));
1378
1379	SmallVector<Type *, `16`> ArgTys;
1380	for (Value *Arg : Args)
1381	ArgTys.push_back(Elt: Arg->getType());
1382	CCInfo.AnalyzeCallOperands(ArgVTs, Flags&: ArgFlags, OrigTys&: ArgTys, Fn: CC_PPC64_ELF_FIS);
1383
1384	// Bail out if we can't handle any of the arguments.
1385	for (const CCValAssign &VA : ArgLocs) {
1386	MVT ArgVT = ArgVTs [VA.getValNo()];
1387
1388	// Skip vector arguments for now, as well as long double and
1389	// uint128_t, and anything that isn't passed in a register.
1390	if (ArgVT.isVector() \|\| ArgVT.getSizeInBits() > `64` \|\| ArgVT == MVT::i1 \|\|
1391	!VA.isRegLoc() \|\| VA.needsCustom())
1392	return false;
1393
1394	// Skip bit-converted arguments for now.
1395	if (VA.getLocInfo() == CCValAssign::BCvt)
1396	return false;
1397	}
1398
1399	// Get a count of how many bytes are to be pushed onto the stack.
1400	NumBytes = CCInfo.getStackSize();
1401
1402	// The prolog code of the callee may store up to 8 GPR argument registers to
1403	// the stack, allowing va_start to index over them in memory if its varargs.
1404	// Because we cannot tell if this is needed on the caller side, we have to
1405	// conservatively assume that it is needed. As such, make sure we have at
1406	// least enough stack space for the caller to store the 8 GPRs.
1407	// FIXME: On ELFv2, it may be unnecessary to allocate the parameter area.
1408	NumBytes = std::max(a: NumBytes, b: LinkageSize + `64`);
1409
1410	// Issue CALLSEQ_START.
1411	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1412	MCID: TII.get(Opcode: TII.getCallFrameSetupOpcode()))
1413	.addImm(Val: NumBytes).addImm(Val: `0`);
1414
1415	// Prepare to assign register arguments. Every argument uses up a
1416	// GPR protocol register even if it's passed in a floating-point
1417	// register (unless we're using the fast calling convention).
1418	unsigned NextGPR = PPC::X3;
1419	unsigned NextFPR = PPC::F1;
1420
1421	// Process arguments.
1422	for (const CCValAssign &VA : ArgLocs) {
1423	Register Arg = ArgRegs [VA.getValNo()];
1424	MVT ArgVT = ArgVTs [VA.getValNo()];
1425
1426	// Handle argument promotion and bitcasts.
1427	switch (VA.getLocInfo()) {
1428	default:
1429	llvm_unreachable("Unknown loc info!");
1430	case CCValAssign::Full:
1431	break;
1432	case CCValAssign::SExt: {
1433	MVT DestVT = VA.getLocVT();
1434	const TargetRegisterClass *RC =
1435	(DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1436	Register TmpReg = createResultReg(RC);
1437	if (!PPCEmitIntExt(SrcVT: ArgVT, SrcReg: Arg, DestVT, DestReg: TmpReg, /IsZExt/false))
1438	llvm_unreachable("Failed to emit a sext!");
1439	ArgVT = DestVT;
1440	Arg = TmpReg;
1441	break;
1442	}
1443	case CCValAssign::AExt:
1444	case CCValAssign::ZExt: {
1445	MVT DestVT = VA.getLocVT();
1446	const TargetRegisterClass *RC =
1447	(DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1448	Register TmpReg = createResultReg(RC);
1449	if (!PPCEmitIntExt(SrcVT: ArgVT, SrcReg: Arg, DestVT, DestReg: TmpReg, /IsZExt/true))
1450	llvm_unreachable("Failed to emit a zext!");
1451	ArgVT = DestVT;
1452	Arg = TmpReg;
1453	break;
1454	}
1455	case CCValAssign::BCvt: {
1456	// FIXME: Not yet handled.
1457	llvm_unreachable("Should have bailed before getting here!");
1458	break;
1459	}
1460	}
1461
1462	// Copy this argument to the appropriate register.
1463	unsigned ArgReg;
1464	if (ArgVT == MVT::f32 \|\| ArgVT == MVT::f64) {
1465	ArgReg = NextFPR++;
1466	if (CC != CallingConv::Fast)
1467	++NextGPR;
1468	} else
1469	ArgReg = NextGPR++;
1470
1471	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1472	MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: ArgReg).addReg(RegNo: Arg);
1473	RegArgs.push_back(Elt: ArgReg);
1474	}
1475
1476	return true;
1477	}
1478
1479	// For a call that we've determined we can fast-select, finish the
1480	// call sequence and generate a copy to obtain the return value (if any).
1481	bool PPCFastISel::finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumBytes) {
1482	CallingConv::ID CC = CLI.CallConv;
1483
1484	// Issue CallSEQ_END.
1485	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1486	MCID: TII.get(Opcode: TII.getCallFrameDestroyOpcode()))
1487	.addImm(Val: NumBytes).addImm(Val: `0`);
1488
1489	// Next, generate a copy to obtain the return value.
1490	// FIXME: No multi-register return values yet, though I don't foresee
1491	// any real difficulties there.
1492	if (RetVT != MVT::isVoid) {
1493	SmallVector<CCValAssign, `16`> RVLocs;
1494	CCState CCInfo(CC, false, FuncInfo.MF, RVLocs, Context);
1495	CCInfo.AnalyzeCallResult(VT: RetVT, OrigTy: CLI.RetTy, Fn: RetCC_PPC64_ELF_FIS);
1496	CCValAssign &VA = RVLocs [`0`];
1497	assert(RVLocs.size() == `1` && "No support for multi-reg return values!");
1498	assert(VA.isRegLoc() && "Can only return in registers!");
1499
1500	MVT DestVT = VA.getValVT();
1501	MVT CopyVT = DestVT;
1502
1503	// Ints smaller than a register still arrive in a full 64-bit
1504	// register, so make sure we recognize this.
1505	if (RetVT == MVT::i8 \|\| RetVT == MVT::i16 \|\| RetVT == MVT::i32)
1506	CopyVT = MVT::i64;
1507
1508	Register SourcePhysReg = VA.getLocReg();
1509	Register ResultReg;
1510
1511	if (RetVT == CopyVT) {
1512	const TargetRegisterClass *CpyRC = TLI.getRegClassFor(VT: CopyVT);
1513	ResultReg = copyRegToRegClass(ToRC: CpyRC, SrcReg: SourcePhysReg);
1514
1515	// If necessary, round the floating result to single precision.
1516	} else if (CopyVT == MVT::f64) {
1517	ResultReg = createResultReg(RC: TLI.getRegClassFor(VT: RetVT));
1518	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::FRSP),
1519	DestReg: ResultReg).addReg(RegNo: SourcePhysReg);
1520
1521	// If only the low half of a general register is needed, generate
1522	// a GPRC copy instead of a G8RC copy. (EXTRACT_SUBREG can't be
1523	// used along the fast-isel path (not lowered), and downstream logic
1524	// also doesn't like a direct subreg copy on a physical reg.)
1525	} else if (RetVT == MVT::i8 \|\| RetVT == MVT::i16 \|\| RetVT == MVT::i32) {
1526	// Convert physical register from G8RC to GPRC.
1527	SourcePhysReg = (SourcePhysReg - PPC::X0) + PPC::R0;
1528	ResultReg = copyRegToRegClass(ToRC: &PPC::GPRCRegClass, SrcReg: SourcePhysReg);
1529	}
1530
1531	assert(ResultReg && "ResultReg unset!");
1532	CLI.InRegs.push_back(Elt: SourcePhysReg);
1533	CLI.ResultReg = ResultReg;
1534	CLI.NumResultRegs = `1`;
1535	}
1536
1537	return true;
1538	}
1539
1540	bool PPCFastISel::fastLowerCall(CallLoweringInfo &CLI) {
1541	CallingConv::ID CC = CLI.CallConv;
1542	bool IsTailCall = CLI.IsTailCall;
1543	bool IsVarArg = CLI.IsVarArg;
1544	const Value *Callee = CLI.Callee;
1545	const MCSymbol *Symbol = CLI.Symbol;
1546
1547	if (!Callee && !Symbol)
1548	return false;
1549
1550	// Allow SelectionDAG isel to handle tail calls and long calls.
1551	if (IsTailCall \|\| Subtarget->useLongCalls())
1552	return false;
1553
1554	// Let SDISel handle vararg functions.
1555	if (IsVarArg)
1556	return false;
1557
1558	// If this is a PC-Rel function, let SDISel handle the call.
1559	if (Subtarget->isUsingPCRelativeCalls())
1560	return false;
1561
1562	// Handle simple calls for now, with legal return types and
1563	// those that can be extended.
1564	Type *RetTy = CLI.RetTy;
1565	MVT RetVT;
1566	if (RetTy->isVoidTy())
1567	RetVT = MVT::isVoid;
1568	else if (!isTypeLegal(Ty: RetTy, VT&: RetVT) && RetVT != MVT::i16 &&
1569	RetVT != MVT::i8)
1570	return false;
1571	else if (RetVT == MVT::i1 && Subtarget->useCRBits())
1572	// We can't handle boolean returns when CR bits are in use.
1573	return false;
1574
1575	// FIXME: No multi-register return values yet.
1576	if (RetVT != MVT::isVoid && RetVT != MVT::i8 && RetVT != MVT::i16 &&
1577	RetVT != MVT::i32 && RetVT != MVT::i64 && RetVT != MVT::f32 &&
1578	RetVT != MVT::f64) {
1579	SmallVector<CCValAssign, `16`> RVLocs;
1580	CCState CCInfo(CC, IsVarArg, FuncInfo.MF, RVLocs, Context);
1581	CCInfo.AnalyzeCallResult(VT: RetVT, OrigTy: RetTy, Fn: RetCC_PPC64_ELF_FIS);
1582	if (RVLocs.size() > `1`)
1583	return false;
1584	}
1585
1586	// Bail early if more than 8 arguments, as we only currently
1587	// handle arguments passed in registers.
1588	unsigned NumArgs = CLI.OutVals.size();
1589	if (NumArgs > `8`)
1590	return false;
1591
1592	// Set up the argument vectors.
1593	SmallVector<Value*, `8`> Args;
1594	SmallVector<Register, `8`> ArgRegs;
1595	SmallVector<MVT, `8`> ArgVTs;
1596	SmallVector<ISD::ArgFlagsTy, `8`> ArgFlags;
1597
1598	Args.reserve(N: NumArgs);
1599	ArgRegs.reserve(N: NumArgs);
1600	ArgVTs.reserve(N: NumArgs);
1601	ArgFlags.reserve(N: NumArgs);
1602
1603	for (unsigned i = `0`, ie = NumArgs; i != ie; ++i) {
1604	// Only handle easy calls for now. It would be reasonably easy
1605	// to handle <= 8-byte structures passed ByVal in registers, but we
1606	// have to ensure they are right-justified in the register.
1607	ISD::ArgFlagsTy Flags = CLI.OutFlags [i];
1608	if (Flags.isInReg() \|\| Flags.isSRet() \|\| Flags.isNest() \|\| Flags.isByVal())
1609	return false;
1610
1611	Value *ArgValue = CLI.OutVals [i];
1612	Type *ArgTy = ArgValue->getType();
1613	MVT ArgVT;
1614	if (!isTypeLegal(Ty: ArgTy, VT&: ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8)
1615	return false;
1616
1617	// FIXME: FastISel cannot handle non-simple types yet, including 128-bit FP
1618	// types, which is passed through vector register. Skip these types and
1619	// fallback to default SelectionDAG based selection.
1620	if (ArgVT.isVector() \|\| ArgVT == MVT::f128)
1621	return false;
1622
1623	Register Arg = getRegForValue(V: ArgValue);
1624	if (!Arg)
1625	return false;
1626
1627	Args.push_back(Elt: ArgValue);
1628	ArgRegs.push_back(Elt: Arg);
1629	ArgVTs.push_back(Elt: ArgVT);
1630	ArgFlags.push_back(Elt: Flags);
1631	}
1632
1633	// Process the arguments.
1634	SmallVector<unsigned, `8`> RegArgs;
1635	unsigned NumBytes;
1636
1637	if (!processCallArgs(Args, ArgRegs, ArgVTs, ArgFlags,
1638	RegArgs, CC, NumBytes, IsVarArg))
1639	return false;
1640
1641	MachineInstrBuilder MIB;
1642	// FIXME: No handling for function pointers yet. This requires
1643	// implementing the function descriptor (OPD) setup.
1644	const GlobalValue *GV = dyn_cast<GlobalValue>(Val: Callee);
1645	if (!GV) {
1646	// patchpoints are a special case; they always dispatch to a pointer value.
1647	// However, we don't actually want to generate the indirect call sequence
1648	// here (that will be generated, as necessary, during asm printing), and
1649	// the call we generate here will be erased by FastISel::selectPatchpoint,
1650	// so don't try very hard...
1651	if (CLI.IsPatchPoint)
1652	MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::NOP));
1653	else
1654	return false;
1655	} else {
1656	// Build direct call with NOP for TOC restore.
1657	// FIXME: We can and should optimize away the NOP for local calls.
1658	MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1659	MCID: TII.get(Opcode: PPC::BL8_NOP));
1660	// Add callee.
1661	MIB.addGlobalAddress(GV);
1662	}
1663
1664	// Add implicit physical register uses to the call.
1665	for (unsigned Reg : RegArgs)
1666	MIB.addReg(RegNo: Reg, Flags: RegState::Implicit);
1667
1668	// Direct calls, in both the ELF V1 and V2 ABIs, need the TOC register live
1669	// into the call.
1670	PPCFuncInfo->setUsesTOCBasePtr();
1671	MIB.addReg(RegNo: PPC::X2, Flags: RegState::Implicit);
1672
1673	// Add a register mask with the call-preserved registers. Proper
1674	// defs for return values will be added by setPhysRegsDeadExcept().
1675	MIB.addRegMask(Mask: TRI.getCallPreservedMask(MF: *FuncInfo.MF, CC));
1676
1677	CLI.Call = MIB;
1678
1679	// Finish off the call including any return values.
1680	return finishCall(RetVT, CLI, NumBytes);
1681	}
1682
1683	// Attempt to fast-select a return instruction.
1684	bool PPCFastISel::SelectRet(const Instruction *I) {
1685
1686	if (!FuncInfo.CanLowerReturn)
1687	return false;
1688
1689	const ReturnInst *Ret = cast<ReturnInst>(Val: I);
1690	const Function &F = *I->getParent()->getParent();
1691
1692	// Build a list of return value registers.
1693	SmallVector<Register, `4`> RetRegs;
1694	CallingConv::ID CC = F.getCallingConv();
1695
1696	if (Ret->getNumOperands() > `0`) {
1697	SmallVector<ISD::OutputArg, `4`> Outs;
1698	GetReturnInfo(CC, ReturnType: F.getReturnType(), attr: F.getAttributes(), Outs, TLI, DL);
1699
1700	// Analyze operands of the call, assigning locations to each operand.
1701	SmallVector<CCValAssign, `16`> ValLocs;
1702	CCState CCInfo(CC, F.isVarArg(), FuncInfo.MF, ValLocs, Context);
1703	CCInfo.AnalyzeReturn(Outs, Fn: RetCC_PPC64_ELF_FIS);
1704	const Value *RV = Ret->getOperand(i_nocapture: `0`);
1705
1706	// FIXME: Only one output register for now.
1707	if (ValLocs.size() > `1`)
1708	return false;
1709
1710	// Special case for returning a constant integer of any size - materialize
1711	// the constant as an i64 and copy it to the return register.
1712	if (isa<ConstantInt>(Val: RV) && RV->getType()->isIntegerTy()) {
1713	const ConstantInt *CI = cast<ConstantInt>(Val: RV);
1714	CCValAssign &VA = ValLocs [`0`];
1715
1716	Register RetReg = VA.getLocReg();
1717	// We still need to worry about properly extending the sign. For example,
1718	// we could have only a single bit or a constant that needs zero
1719	// extension rather than sign extension. Make sure we pass the return
1720	// value extension property to integer materialization.
1721	Register SrcReg =
1722	PPCMaterializeInt(CI, VT: MVT::i64, UseSExt: VA.getLocInfo() != CCValAssign::ZExt);
1723
1724	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1725	MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: RetReg).addReg(RegNo: SrcReg);
1726
1727	RetRegs.push_back(Elt: RetReg);
1728
1729	} else {
1730	Register Reg = getRegForValue(V: RV);
1731
1732	if (!Reg)
1733	return false;
1734
1735	// Copy the result values into the output registers.
1736	for (unsigned i = `0`; i < ValLocs.size(); ++i) {
1737
1738	CCValAssign &VA = ValLocs [i];
1739	assert(VA.isRegLoc() && "Can only return in registers!");
1740	RetRegs.push_back(Elt: VA.getLocReg());
1741	Register SrcReg = Reg + VA.getValNo();
1742
1743	EVT RVEVT = TLI.getValueType(DL, Ty: RV->getType());
1744	if (!RVEVT.isSimple())
1745	return false;
1746	MVT RVVT = RVEVT.getSimpleVT();
1747	MVT DestVT = VA.getLocVT();
1748
1749	if (RVVT != DestVT && RVVT != MVT::i8 &&
1750	RVVT != MVT::i16 && RVVT != MVT::i32)
1751	return false;
1752
1753	if (RVVT != DestVT) {
1754	switch (VA.getLocInfo()) {
1755	default:
1756	llvm_unreachable("Unknown loc info!");
1757	case CCValAssign::Full:
1758	llvm_unreachable("Full value assign but types don't match?");
1759	case CCValAssign::AExt:
1760	case CCValAssign::ZExt: {
1761	const TargetRegisterClass *RC =
1762	(DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1763	Register TmpReg = createResultReg(RC);
1764	if (!PPCEmitIntExt(SrcVT: RVVT, SrcReg, DestVT, DestReg: TmpReg, IsZExt: true))
1765	return false;
1766	SrcReg = TmpReg;
1767	break;
1768	}
1769	case CCValAssign::SExt: {
1770	const TargetRegisterClass *RC =
1771	(DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1772	Register TmpReg = createResultReg(RC);
1773	if (!PPCEmitIntExt(SrcVT: RVVT, SrcReg, DestVT, DestReg: TmpReg, IsZExt: false))
1774	return false;
1775	SrcReg = TmpReg;
1776	break;
1777	}
1778	}
1779	}
1780
1781	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1782	MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: RetRegs [i])
1783	.addReg(RegNo: SrcReg);
1784	}
1785	}
1786	}
1787
1788	MachineInstrBuilder MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1789	MCID: TII.get(Opcode: PPC::BLR8));
1790
1791	for (Register Reg : RetRegs)
1792	MIB.addReg(RegNo: Reg, Flags: RegState::Implicit);
1793
1794	return true;
1795	}
1796
1797	// Attempt to emit an integer extend of SrcReg into DestReg. Both
1798	// signed and zero extensions are supported. Return false if we
1799	// can't handle it.
1800	bool PPCFastISel::PPCEmitIntExt(MVT SrcVT, Register SrcReg, MVT DestVT,
1801	Register DestReg, bool IsZExt) {
1802	if (DestVT != MVT::i32 && DestVT != MVT::i64)
1803	return false;
1804	if (SrcVT != MVT::i8 && SrcVT != MVT::i16 && SrcVT != MVT::i32)
1805	return false;
1806
1807	// Signed extensions use EXTSB, EXTSH, EXTSW.
1808	if (!IsZExt) {
1809	unsigned Opc;
1810	if (SrcVT == MVT::i8)
1811	Opc = (DestVT == MVT::i32) ? PPC::EXTSB : PPC::EXTSB8_32_64;
1812	else if (SrcVT == MVT::i16)
1813	Opc = (DestVT == MVT::i32) ? PPC::EXTSH : PPC::EXTSH8_32_64;
1814	else {
1815	assert(DestVT == MVT::i64 && "Signed extend from i32 to i32??");
1816	Opc = PPC::EXTSW_32_64;
1817	}
1818	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg)
1819	.addReg(RegNo: SrcReg);
1820
1821	// Unsigned 32-bit extensions use RLWINM.
1822	} else if (DestVT == MVT::i32) {
1823	unsigned MB;
1824	if (SrcVT == MVT::i8)
1825	MB = `24`;
1826	else {
1827	assert(SrcVT == MVT::i16 && "Unsigned extend from i32 to i32??");
1828	MB = `16`;
1829	}
1830	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::RLWINM),
1831	DestReg)
1832	.addReg(RegNo: SrcReg).addImm(/SH=/Val: `0`).addImm(Val: MB).addImm(/ME=/Val: `31`);
1833
1834	// Unsigned 64-bit extensions use RLDICL (with a 32-bit source).
1835	} else {
1836	unsigned MB;
1837	if (SrcVT == MVT::i8)
1838	MB = `56`;
1839	else if (SrcVT == MVT::i16)
1840	MB = `48`;
1841	else
1842	MB = `32`;
1843	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1844	MCID: TII.get(Opcode: PPC::RLDICL_32_64), DestReg)
1845	.addReg(RegNo: SrcReg).addImm(/SH=/Val: `0`).addImm(Val: MB);
1846	}
1847
1848	return true;
1849	}
1850
1851	// Attempt to fast-select an indirect branch instruction.
1852	bool PPCFastISel::SelectIndirectBr(const Instruction *I) {
1853	Register AddrReg = getRegForValue(V: I->getOperand(i: `0`));
1854	if (!AddrReg)
1855	return false;
1856
1857	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::MTCTR8))
1858	.addReg(RegNo: AddrReg);
1859	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::BCTR8));
1860
1861	const IndirectBrInst *IB = cast<IndirectBrInst>(Val: I);
1862	for (const BasicBlock *SuccBB : IB->successors())
1863	FuncInfo.MBB->addSuccessor(Succ: FuncInfo.getMBB(BB: SuccBB));
1864
1865	return true;
1866	}
1867
1868	// Attempt to fast-select an integer truncate instruction.
1869	bool PPCFastISel::SelectTrunc(const Instruction *I) {
1870	Value *Src = I->getOperand(i: `0`);
1871	EVT SrcVT = TLI.getValueType(DL, Ty: Src->getType(), AllowUnknown: true);
1872	EVT DestVT = TLI.getValueType(DL, Ty: I->getType(), AllowUnknown: true);
1873
1874	if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16)
1875	return false;
1876
1877	if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8)
1878	return false;
1879
1880	Register SrcReg = getRegForValue(V: Src);
1881	if (!SrcReg)
1882	return false;
1883
1884	// The only interesting case is when we need to switch register classes.
1885	if (SrcVT == MVT::i64)
1886	SrcReg = copyRegToRegClass(ToRC: &PPC::GPRCRegClass, SrcReg, Flag: {}, SubReg: PPC::sub_32);
1887
1888	updateValueMap(I, Reg: SrcReg);
1889	return true;
1890	}
1891
1892	// Attempt to fast-select an integer extend instruction.
1893	bool PPCFastISel::SelectIntExt(const Instruction *I) {
1894	Type *DestTy = I->getType();
1895	Value *Src = I->getOperand(i: `0`);
1896	Type *SrcTy = Src->getType();
1897
1898	bool IsZExt = isa<ZExtInst>(Val: I);
1899	Register SrcReg = getRegForValue(V: Src);
1900	if (!SrcReg) return false;
1901
1902	EVT SrcEVT, DestEVT;
1903	SrcEVT = TLI.getValueType(DL, Ty: SrcTy, AllowUnknown: true);
1904	DestEVT = TLI.getValueType(DL, Ty: DestTy, AllowUnknown: true);
1905	if (!SrcEVT.isSimple())
1906	return false;
1907	if (!DestEVT.isSimple())
1908	return false;
1909
1910	MVT SrcVT = SrcEVT.getSimpleVT();
1911	MVT DestVT = DestEVT.getSimpleVT();
1912
1913	// If we know the register class needed for the result of this
1914	// instruction, use it. Otherwise pick the register class of the
1915	// correct size that does not contain X0/R0, since we don't know
1916	// whether downstream uses permit that assignment.
1917	Register AssignedReg = FuncInfo.ValueMap [I];
1918	const TargetRegisterClass *RC =
1919	(AssignedReg ? MRI.getRegClass(Reg: AssignedReg) :
1920	(DestVT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass :
1921	&PPC::GPRC_and_GPRC_NOR0RegClass));
1922	Register ResultReg = createResultReg(RC);
1923
1924	if (!PPCEmitIntExt(SrcVT, SrcReg, DestVT, DestReg: ResultReg, IsZExt))
1925	return false;
1926
1927	updateValueMap(I, Reg: ResultReg);
1928	return true;
1929	}
1930
1931	// Attempt to fast-select an instruction that wasn't handled by
1932	// the table-generated machinery.
1933	bool PPCFastISel::fastSelectInstruction(const Instruction *I) {
1934
1935	switch (I->getOpcode()) {
1936	case Instruction::Load:
1937	return SelectLoad(I);
1938	case Instruction::Store:
1939	return SelectStore(I);
1940	case Instruction::CondBr:
1941	return SelectBranch(I);
1942	case Instruction::IndirectBr:
1943	return SelectIndirectBr(I);
1944	case Instruction::FPExt:
1945	return SelectFPExt(I);
1946	case Instruction::FPTrunc:
1947	return SelectFPTrunc(I);
1948	case Instruction::SIToFP:
1949	return SelectIToFP(I, /IsSigned/ true);
1950	case Instruction::UIToFP:
1951	return SelectIToFP(I, /IsSigned/ false);
1952	case Instruction::FPToSI:
1953	return SelectFPToI(I, /IsSigned/ true);
1954	case Instruction::FPToUI:
1955	return SelectFPToI(I, /IsSigned/ false);
1956	case Instruction::Add:
1957	return SelectBinaryIntOp(I, ISDOpcode: ISD::ADD);
1958	case Instruction::Or:
1959	return SelectBinaryIntOp(I, ISDOpcode: ISD::OR);
1960	case Instruction::Sub:
1961	return SelectBinaryIntOp(I, ISDOpcode: ISD::SUB);
1962	case Instruction::Ret:
1963	return SelectRet(I);
1964	case Instruction::Trunc:
1965	return SelectTrunc(I);
1966	case Instruction::ZExt:
1967	case Instruction::SExt:
1968	return SelectIntExt(I);
1969	// Here add other flavors of Instruction::XXX that automated
1970	// cases don't catch. For example, switches are terminators
1971	// that aren't yet handled.
1972	default:
1973	break;
1974	}
1975	return false;
1976	}
1977
1978	// Materialize a floating-point constant into a register, and return
1979	// the register number (or zero if we failed to handle it).
1980	Register PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) {
1981	// If this is a PC-Rel function, let SDISel handle constant pool.
1982	if (Subtarget->isUsingPCRelativeCalls())
1983	return Register ();
1984
1985	// No plans to handle long double here.
1986	if (VT != MVT::f32 && VT != MVT::f64)
1987	return Register ();
1988
1989	// All FP constants are loaded from the constant pool.
1990	Align Alignment = DL.getPrefTypeAlign(Ty: CFP->getType());
1991	unsigned Idx = MCP.getConstantPoolIndex(C: cast<Constant>(Val: CFP), Alignment);
1992	const bool HasSPE = Subtarget->hasSPE();
1993	const TargetRegisterClass *RC;
1994	if (HasSPE)
1995	RC = ((VT == MVT::f32) ? &PPC::GPRCRegClass : &PPC::SPERCRegClass);
1996	else
1997	RC = ((VT == MVT::f32) ? &PPC::F4RCRegClass : &PPC::F8RCRegClass);
1998
1999	Register DestReg = createResultReg(RC);
2000	CodeModel::Model CModel = TM.getCodeModel();
2001
2002	MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
2003	PtrInfo: MachinePointerInfo::getConstantPool(MF&: *FuncInfo.MF),
2004	F: MachineMemOperand::MOLoad, Size: (VT == MVT::f32) ? `4` : `8`, BaseAlignment: Alignment);
2005
2006	unsigned Opc;
2007
2008	if (HasSPE)
2009	Opc = ((VT == MVT::f32) ? PPC::SPELWZ : PPC::EVLDD);
2010	else
2011	Opc = ((VT == MVT::f32) ? PPC::LFS : PPC::LFD);
2012
2013	Register TmpReg = createResultReg(RC: &PPC::G8RC_and_G8RC_NOX0RegClass);
2014
2015	PPCFuncInfo->setUsesTOCBasePtr();
2016	// For small code model, generate a LF[SD](0, LDtocCPT(Idx, X2)).
2017	if (CModel == CodeModel::Small) {
2018	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::LDtocCPT),
2019	DestReg: TmpReg)
2020	.addConstantPoolIndex(Idx).addReg(RegNo: PPC::X2);
2021	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg)
2022	.addImm(Val: `0`).addReg(RegNo: TmpReg).addMemOperand(MMO);
2023	} else {
2024	// Otherwise we generate LF[SD](Idx[lo], ADDIStocHA8(X2, Idx)).
2025	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::ADDIStocHA8),
2026	DestReg: TmpReg).addReg(RegNo: PPC::X2).addConstantPoolIndex(Idx);
2027	// But for large code model, we must generate a LDtocL followed
2028	// by the LF[SD].
2029	if (CModel == CodeModel::Large) {
2030	Register TmpReg2 = createResultReg(RC: &PPC::G8RC_and_G8RC_NOX0RegClass);
2031	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::LDtocL),
2032	DestReg: TmpReg2).addConstantPoolIndex(Idx).addReg(RegNo: TmpReg);
2033	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg)
2034	.addImm(Val: `0`)
2035	.addReg(RegNo: TmpReg2);
2036	} else
2037	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg)
2038	.addConstantPoolIndex(Idx, Offset: `0`, TargetFlags: PPCII::MO_TOC_LO)
2039	.addReg(RegNo: TmpReg)
2040	.addMemOperand(MMO);
2041	}
2042
2043	return DestReg;
2044	}
2045
2046	// Materialize the address of a global value into a register, and return
2047	// the register number (or zero if we failed to handle it).
2048	Register PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) {
2049	// If this is a PC-Rel function, let SDISel handle GV materialization.
2050	if (Subtarget->isUsingPCRelativeCalls())
2051	return Register ();
2052
2053	assert(VT == MVT::i64 && "Non-address!");
2054	const TargetRegisterClass *RC = &PPC::G8RC_and_G8RC_NOX0RegClass;
2055	Register DestReg = createResultReg(RC);
2056
2057	// Global values may be plain old object addresses, TLS object
2058	// addresses, constant pool entries, or jump tables. How we generate
2059	// code for these may depend on small, medium, or large code model.
2060	CodeModel::Model CModel = TM.getCodeModel();
2061
2062	// FIXME: Jump tables are not yet required because fast-isel doesn't
2063	// handle switches; if that changes, we need them as well. For now,
2064	// what follows assumes everything's a generic (or TLS) global address.
2065
2066	// FIXME: We don't yet handle the complexity of TLS.
2067	if (GV->isThreadLocal())
2068	return Register ();
2069
2070	PPCFuncInfo->setUsesTOCBasePtr();
2071	bool IsAIXTocData = TM.getTargetTriple().isOSAIX() &&
2072	isa<GlobalVariable>(Val: GV) &&
2073	cast<GlobalVariable>(Val: GV)->hasAttribute(Kind: "toc-data");
2074
2075	// For small code model, generate a simple TOC load.
2076	if (CModel == CodeModel::Small) {
2077	auto MIB = BuildMI(
2078	BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
2079	MCID: IsAIXTocData ? TII.get(Opcode: PPC::ADDItoc8) : TII.get(Opcode: PPC::LDtoc), DestReg);
2080	if (IsAIXTocData)
2081	MIB.addReg(RegNo: PPC::X2).addGlobalAddress(GV);
2082	else
2083	MIB.addGlobalAddress(GV).addReg(RegNo: PPC::X2);
2084	} else {
2085	// If the address is an externally defined symbol, a symbol with common
2086	// or externally available linkage, a non-local function address, or a
2087	// jump table address (not yet needed), or if we are generating code
2088	// for large code model, we generate:
2089	// LDtocL(GV, ADDIStocHA8(%x2, GV))
2090	// Otherwise we generate:
2091	// ADDItocL8(ADDIStocHA8(%x2, GV), GV)
2092	// Either way, start with the ADDIStocHA8:
2093	Register HighPartReg = createResultReg(RC);
2094	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::ADDIStocHA8),
2095	DestReg: HighPartReg).addReg(RegNo: PPC::X2).addGlobalAddress(GV);
2096
2097	if (Subtarget->isGVIndirectSymbol(GV)) {
2098	assert(!IsAIXTocData && "TOC data should always be direct.");
2099	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::LDtocL),
2100	DestReg).addGlobalAddress(GV).addReg(RegNo: HighPartReg);
2101	} else {
2102	// Otherwise generate the ADDItocL8.
2103	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::ADDItocL8),
2104	DestReg)
2105	.addReg(RegNo: HighPartReg)
2106	.addGlobalAddress(GV);
2107	}
2108	}
2109
2110	return DestReg;
2111	}
2112
2113	// Materialize a 32-bit integer constant into a register, and return
2114	// the register number (or zero if we failed to handle it).
2115	Register PPCFastISel::PPCMaterialize32BitInt(int64_t Imm,
2116	const TargetRegisterClass *RC) {
2117	unsigned Lo = Imm & `0xFFFF`;
2118	unsigned Hi = (Imm >> `16`) & `0xFFFF`;
2119
2120	Register ResultReg = createResultReg(RC);
2121	bool IsGPRC = RC->hasSuperClassEq(RC: &PPC::GPRCRegClass);
2122
2123	if (isInt<`16`>(x: Imm))
2124	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
2125	MCID: TII.get(Opcode: IsGPRC ? PPC::LI : PPC::LI8), DestReg: ResultReg)
2126	.addImm(Val: Imm);
2127	else if (Lo) {
2128	// Both Lo and Hi have nonzero bits.
2129	Register TmpReg = createResultReg(RC);
2130	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
2131	MCID: TII.get(Opcode: IsGPRC ? PPC::LIS : PPC::LIS8), DestReg: TmpReg)
2132	.addImm(Val: Hi);
2133	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
2134	MCID: TII.get(Opcode: IsGPRC ? PPC::ORI : PPC::ORI8), DestReg: ResultReg)
2135	.addReg(RegNo: TmpReg).addImm(Val: Lo);
2136	} else
2137	// Just Hi bits.
2138	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
2139	MCID: TII.get(Opcode: IsGPRC ? PPC::LIS : PPC::LIS8), DestReg: ResultReg)
2140	.addImm(Val: Hi);
2141
2142	return ResultReg;
2143	}
2144
2145	// Materialize a 64-bit integer constant into a register, and return
2146	// the register number (or zero if we failed to handle it).
2147	Register PPCFastISel::PPCMaterialize64BitInt(int64_t Imm,
2148	const TargetRegisterClass *RC) {
2149	unsigned Remainder = `0`;
2150	unsigned Shift = `0`;
2151
2152	// If the value doesn't fit in 32 bits, see if we can shift it
2153	// so that it fits in 32 bits.
2154	if (!isInt<`32`>(x: Imm)) {
2155	Shift = llvm::countr_zero<uint64_t>(Val: Imm);
2156	int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
2157
2158	if (isInt<`32`>(x: ImmSh))
2159	Imm = ImmSh;
2160	else {
2161	Remainder = Imm;
2162	Shift = `32`;
2163	Imm >>= `32`;
2164	}
2165	}
2166
2167	// Handle the high-order 32 bits (if shifted) or the whole 32 bits
2168	// (if not shifted).
2169	Register TmpReg1 = PPCMaterialize32BitInt(Imm, RC);
2170	if (!Shift)
2171	return TmpReg1;
2172
2173	// If upper 32 bits were not zero, we've built them and need to shift
2174	// them into place.
2175	Register TmpReg2;
2176	if (Imm) {
2177	TmpReg2 = createResultReg(RC);
2178	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::RLDICR),
2179	DestReg: TmpReg2).addReg(RegNo: TmpReg1).addImm(Val: Shift).addImm(Val: `63` - Shift);
2180	} else
2181	TmpReg2 = TmpReg1;
2182
2183	Register TmpReg3;
2184	unsigned Hi, Lo;
2185	if ((Hi = (Remainder >> `16`) & `0xFFFF`)) {
2186	TmpReg3 = createResultReg(RC);
2187	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::ORIS8),
2188	DestReg: TmpReg3).addReg(RegNo: TmpReg2).addImm(Val: Hi);
2189	} else
2190	TmpReg3 = TmpReg2;
2191
2192	if ((Lo = Remainder & `0xFFFF`)) {
2193	Register ResultReg = createResultReg(RC);
2194	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::ORI8),
2195	DestReg: ResultReg).addReg(RegNo: TmpReg3).addImm(Val: Lo);
2196	return ResultReg;
2197	}
2198
2199	return TmpReg3;
2200	}
2201
2202	// Materialize an integer constant into a register, and return
2203	// the register number (or zero if we failed to handle it).
2204	Register PPCFastISel::PPCMaterializeInt(const ConstantInt *CI, MVT VT,
2205	bool UseSExt) {
2206	// If we're using CR bit registers for i1 values, handle that as a special
2207	// case first.
2208	if (VT == MVT::i1 && Subtarget->useCRBits()) {
2209	Register ImmReg = createResultReg(RC: &PPC::CRBITRCRegClass);
2210	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
2211	MCID: TII.get(Opcode: CI->isZero() ? PPC::CRUNSET : PPC::CRSET), DestReg: ImmReg);
2212	return ImmReg;
2213	}
2214
2215	if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 &&
2216	VT != MVT::i1)
2217	return Register ();
2218
2219	const TargetRegisterClass *RC =
2220	((VT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass);
2221	int64_t Imm = UseSExt ? CI->getSExtValue() : CI->getZExtValue();
2222
2223	// If the constant is in range, use a load-immediate.
2224	// Since LI will sign extend the constant we need to make sure that for
2225	// our zeroext constants that the sign extended constant fits into 16-bits -
2226	// a range of 0..0x7fff.
2227	if (isInt<`16`>(x: Imm)) {
2228	unsigned Opc = (VT == MVT::i64) ? PPC::LI8 : PPC::LI;
2229	Register ImmReg = createResultReg(RC);
2230	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg: ImmReg)
2231	.addImm(Val: Imm);
2232	return ImmReg;
2233	}
2234
2235	// Construct the constant piecewise.
2236	if (VT == MVT::i64)
2237	return PPCMaterialize64BitInt(Imm, RC);
2238	else if (VT == MVT::i32)
2239	return PPCMaterialize32BitInt(Imm, RC);
2240
2241	return Register ();
2242	}
2243
2244	// Materialize a constant into a register, and return the register
2245	// number (or zero if we failed to handle it).
2246	Register PPCFastISel::fastMaterializeConstant(const Constant *C) {
2247	EVT CEVT = TLI.getValueType(DL, Ty: C->getType(), AllowUnknown: true);
2248
2249	// Only handle simple types.
2250	if (!CEVT.isSimple())
2251	return Register ();
2252	MVT VT = CEVT.getSimpleVT();
2253
2254	if (const ConstantFP *CFP = dyn_cast<ConstantFP>(Val: C))
2255	return PPCMaterializeFP(CFP, VT);
2256	else if (const GlobalValue *GV = dyn_cast<GlobalValue>(Val: C))
2257	return PPCMaterializeGV(GV, VT);
2258	else if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val: C))
2259	// Note that the code in FunctionLoweringInfo::ComputePHILiveOutRegInfo
2260	// assumes that constant PHI operands will be zero extended, and failure to
2261	// match that assumption will cause problems if we sign extend here but
2262	// some user of a PHI is in a block for which we fall back to full SDAG
2263	// instruction selection.
2264	return PPCMaterializeInt(CI, VT, UseSExt: false);
2265
2266	return Register ();
2267	}
2268
2269	// Materialize the address created by an alloca into a register, and
2270	// return the register number (or zero if we failed to handle it).
2271	Register PPCFastISel::fastMaterializeAlloca(const AllocaInst *AI) {
2272	auto SI = FuncInfo.StaticAllocaMap.find(Val: AI);
2273
2274	// Don't handle dynamic allocas.
2275	if (SI == FuncInfo.StaticAllocaMap.end())
2276	return Register ();
2277
2278	MVT VT;
2279	if (!isLoadTypeLegal(Ty: AI->getType(), VT))
2280	return Register ();
2281
2282	if (SI != FuncInfo.StaticAllocaMap.end()) {
2283	Register ResultReg = createResultReg(RC: &PPC::G8RC_and_G8RC_NOX0RegClass);
2284	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: PPC::ADDI8),
2285	DestReg: ResultReg).addFrameIndex(Idx: SI ->second).addImm(Val: `0`);
2286	return ResultReg;
2287	}
2288
2289	return Register ();
2290	}
2291
2292	// Fold loads into extends when possible.
2293	// FIXME: We can have multiple redundant extend/trunc instructions
2294	// following a load. The folding only picks up one. Extend this
2295	// to check subsequent instructions for the same pattern and remove
2296	// them. Thus ResultReg should be the def reg for the last redundant
2297	// instruction in a chain, and all intervening instructions can be
2298	// removed from parent. Change test/CodeGen/PowerPC/fast-isel-fold.ll
2299	// to add ELF64-NOT: rldicl to the appropriate tests when this works.
2300	bool PPCFastISel::tryToFoldLoadIntoMI(MachineInstr MI, unsigned* OpNo,
2301	const LoadInst *LI) {
2302	// Verify we have a legal type before going any further.
2303	MVT VT;
2304	if (!isLoadTypeLegal(Ty: LI->getType(), VT))
2305	return false;
2306
2307	// Combine load followed by zero- or sign-extend.
2308	bool IsZExt = false;
2309	switch(MI->getOpcode()) {
2310	default:
2311	return false;
2312
2313	case PPC::RLDICL:
2314	case PPC::RLDICL_32_64: {
2315	IsZExt = true;
2316	unsigned MB = MI->getOperand(i: `3`).getImm();
2317	if ((VT == MVT::i8 && MB <= `56`) \|\|
2318	(VT == MVT::i16 && MB <= `48`) \|\|
2319	(VT == MVT::i32 && MB <= `32`))
2320	break;
2321	return false;
2322	}
2323
2324	case PPC::RLWINM:
2325	case PPC::RLWINM8: {
2326	IsZExt = true;
2327	unsigned MB = MI->getOperand(i: `3`).getImm();
2328	if ((VT == MVT::i8 && MB <= `24`) \|\|
2329	(VT == MVT::i16 && MB <= `16`))
2330	break;
2331	return false;
2332	}
2333
2334	case PPC::EXTSB:
2335	case PPC::EXTSB8:
2336	case PPC::EXTSB8_32_64:
2337	/ There is no sign-extending load-byte instruction. /
2338	return false;
2339
2340	case PPC::EXTSH:
2341	case PPC::EXTSH8:
2342	case PPC::EXTSH8_32_64: {
2343	if (VT != MVT::i16 && VT != MVT::i8)
2344	return false;
2345	break;
2346	}
2347
2348	case PPC::EXTSW:
2349	case PPC::EXTSW_32:
2350	case PPC::EXTSW_32_64: {
2351	if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8)
2352	return false;
2353	break;
2354	}
2355	}
2356
2357	// See if we can handle this address.
2358	Address Addr;
2359	if (!PPCComputeAddress(Obj: LI->getOperand(i_nocapture: `0`), Addr))
2360	return false;
2361
2362	Register ResultReg = MI->getOperand(i: `0`).getReg();
2363
2364	if (!PPCEmitLoad(VT, ResultReg, Addr, RC: nullptr, IsZExt,
2365	FP64LoadOpc: Subtarget->hasSPE() ? PPC::EVLDD : PPC::LFD))
2366	return false;
2367
2368	MachineBasicBlock::iterator I(MI);
2369	removeDeadCode(I, E: std::next(x: I));
2370	return true;
2371	}
2372
2373	// Attempt to lower call arguments in a faster way than done by
2374	// the selection DAG code.
2375	bool PPCFastISel::fastLowerArguments() {
2376	// Defer to normal argument lowering for now. It's reasonably
2377	// efficient. Consider doing something like ARM to handle the
2378	// case where all args fit in registers, no varargs, no float
2379	// or vector args.
2380	return false;
2381	}
2382
2383	// Handle materializing integer constants into a register. This is not
2384	// automatically generated for PowerPC, so must be explicitly created here.
2385	Register PPCFastISel::fastEmit_i(MVT Ty, MVT VT, unsigned Opc, uint64_t Imm) {
2386
2387	if (Opc != ISD::Constant)
2388	return Register ();
2389
2390	// If we're using CR bit registers for i1 values, handle that as a special
2391	// case first.
2392	if (VT == MVT::i1 && Subtarget->useCRBits()) {
2393	Register ImmReg = createResultReg(RC: &PPC::CRBITRCRegClass);
2394	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
2395	MCID: TII.get(Opcode: Imm == `0` ? PPC::CRUNSET : PPC::CRSET), DestReg: ImmReg);
2396	return ImmReg;
2397	}
2398
2399	if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 &&
2400	VT != MVT::i1)
2401	return Register ();
2402
2403	const TargetRegisterClass *RC = ((VT == MVT::i64) ? &PPC::G8RCRegClass :
2404	&PPC::GPRCRegClass);
2405	if (VT == MVT::i64)
2406	return PPCMaterialize64BitInt(Imm, RC);
2407	else
2408	return PPCMaterialize32BitInt(Imm, RC);
2409	}
2410
2411	// Override for ADDI and ADDI8 to set the correct register class
2412	// on RHS operand 0. The automatic infrastructure naively assumes
2413	// GPRC for i32 and G8RC for i64; the concept of "no R0" is lost
2414	// for these cases. At the moment, none of the other automatically
2415	// generated RI instructions require special treatment. However, once
2416	// SelectSelect is implemented, "isel" requires similar handling.
2417	//
2418	// Also be conservative about the output register class. Avoid
2419	// assigning R0 or X0 to the output register for GPRC and G8RC
2420	// register classes, as any such result could be used in ADDI, etc.,
2421	// where those regs have another meaning.
2422	Register PPCFastISel::fastEmitInst_ri(unsigned MachineInstOpcode,
2423	const TargetRegisterClass *RC,
2424	Register Op0, uint64_t Imm) {
2425	if (MachineInstOpcode == PPC::ADDI)
2426	MRI.setRegClass(Reg: Op0, RC: &PPC::GPRC_and_GPRC_NOR0RegClass);
2427	else if (MachineInstOpcode == PPC::ADDI8)
2428	MRI.setRegClass(Reg: Op0, RC: &PPC::G8RC_and_G8RC_NOX0RegClass);
2429
2430	const TargetRegisterClass *UseRC =
2431	(RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
2432	(RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
2433
2434	return FastISel::fastEmitInst_ri(MachineInstOpcode, RC: UseRC, Op0, Imm);
2435	}
2436
2437	// Override for instructions with one register operand to avoid use of
2438	// R0/X0. The automatic infrastructure isn't aware of the context so
2439	// we must be conservative.
2440	Register PPCFastISel::fastEmitInst_r(unsigned MachineInstOpcode,
2441	const TargetRegisterClass *RC,
2442	Register Op0) {
2443	const TargetRegisterClass *UseRC =
2444	(RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
2445	(RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
2446
2447	return FastISel::fastEmitInst_r(MachineInstOpcode, RC: UseRC, Op0);
2448	}
2449
2450	// Override for instructions with two register operands to avoid use
2451	// of R0/X0. The automatic infrastructure isn't aware of the context
2452	// so we must be conservative.
2453	Register PPCFastISel::fastEmitInst_rr(unsigned MachineInstOpcode,
2454	const TargetRegisterClass *RC,
2455	Register Op0, Register Op1) {
2456	const TargetRegisterClass *UseRC =
2457	(RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
2458	(RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
2459
2460	return FastISel::fastEmitInst_rr(MachineInstOpcode, RC: UseRC, Op0, Op1);
2461	}
2462
2463	namespace llvm {
2464	// Create the fast instruction selector for PowerPC64 ELF.
2465	FastISel *PPC::createFastISel(FunctionLoweringInfo &FuncInfo,
2466	const TargetLibraryInfo *LibInfo,
2467	const LibcallLoweringInfo *LibcallLowering) {
2468	// Only available on 64-bit for now.
2469	const PPCSubtarget &Subtarget = FuncInfo.MF->getSubtarget<PPCSubtarget>();
2470	if (Subtarget.isPPC64())
2471	return new PPCFastISel (FuncInfo, LibInfo, LibcallLowering);
2472	return nullptr;
2473	}
2474	}
2475

Browse the source code of llvm_projects/llvm/lib/Target/PowerPC/PPCFastISel.cpp