ARMFastISel.cpp source code [llvm_projects/llvm/lib/Target/ARM/ARMFastISel.cpp]

1	//===- ARMFastISel.cpp - ARM FastISel implementation ----------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file defines the ARM-specific support for the FastISel class. Some
10	// of the target-specific code is generated by tablegen in the file
11	// ARMGenFastISel.inc, which is #included here.
12	//
13	//===----------------------------------------------------------------------===//
14
15	#include "ARM.h"
16	#include "ARMBaseInstrInfo.h"
17	#include "ARMBaseRegisterInfo.h"
18	#include "ARMCallingConv.h"
19	#include "ARMConstantPoolValue.h"
20	#include "ARMISelLowering.h"
21	#include "ARMMachineFunctionInfo.h"
22	#include "ARMSubtarget.h"
23	#include "ARMTargetMachine.h"
24	#include "MCTargetDesc/ARMAddressingModes.h"
25	#include "MCTargetDesc/ARMBaseInfo.h"
26	#include "Utils/ARMBaseInfo.h"
27	#include "llvm/ADT/APFloat.h"
28	#include "llvm/ADT/APInt.h"
29	#include "llvm/ADT/DenseMap.h"
30	#include "llvm/ADT/SmallVector.h"
31	#include "llvm/CodeGen/CallingConvLower.h"
32	#include "llvm/CodeGen/FastISel.h"
33	#include "llvm/CodeGen/FunctionLoweringInfo.h"
34	#include "llvm/CodeGen/ISDOpcodes.h"
35	#include "llvm/CodeGen/MachineBasicBlock.h"
36	#include "llvm/CodeGen/MachineConstantPool.h"
37	#include "llvm/CodeGen/MachineFrameInfo.h"
38	#include "llvm/CodeGen/MachineFunction.h"
39	#include "llvm/CodeGen/MachineInstr.h"
40	#include "llvm/CodeGen/MachineInstrBuilder.h"
41	#include "llvm/CodeGen/MachineMemOperand.h"
42	#include "llvm/CodeGen/MachineOperand.h"
43	#include "llvm/CodeGen/MachineRegisterInfo.h"
44	#include "llvm/CodeGen/TargetInstrInfo.h"
45	#include "llvm/CodeGen/TargetLowering.h"
46	#include "llvm/CodeGen/TargetOpcodes.h"
47	#include "llvm/CodeGen/TargetRegisterInfo.h"
48	#include "llvm/CodeGen/ValueTypes.h"
49	#include "llvm/CodeGenTypes/MachineValueType.h"
50	#include "llvm/IR/Argument.h"
51	#include "llvm/IR/Attributes.h"
52	#include "llvm/IR/CallingConv.h"
53	#include "llvm/IR/Constant.h"
54	#include "llvm/IR/Constants.h"
55	#include "llvm/IR/DataLayout.h"
56	#include "llvm/IR/DerivedTypes.h"
57	#include "llvm/IR/Function.h"
58	#include "llvm/IR/GetElementPtrTypeIterator.h"
59	#include "llvm/IR/GlobalValue.h"
60	#include "llvm/IR/GlobalVariable.h"
61	#include "llvm/IR/InstrTypes.h"
62	#include "llvm/IR/Instruction.h"
63	#include "llvm/IR/Instructions.h"
64	#include "llvm/IR/IntrinsicInst.h"
65	#include "llvm/IR/Intrinsics.h"
66	#include "llvm/IR/Module.h"
67	#include "llvm/IR/Operator.h"
68	#include "llvm/IR/Type.h"
69	#include "llvm/IR/User.h"
70	#include "llvm/IR/Value.h"
71	#include "llvm/MC/MCInstrDesc.h"
72	#include "llvm/Support/Casting.h"
73	#include "llvm/Support/Compiler.h"
74	#include "llvm/Support/ErrorHandling.h"
75	#include "llvm/Support/MathExtras.h"
76	#include "llvm/Target/TargetMachine.h"
77	#include "llvm/Target/TargetOptions.h"
78	#include <cassert>
79	#include <cstdint>
80	#include <utility>
81
82	using namespace llvm;
83
84	namespace {
85
86	// All possible address modes, plus some.
87	class Address {
88	public:
89	using BaseKind = enum { RegBase, FrameIndexBase };
90
91	private:
92	BaseKind Kind = RegBase;
93	union {
94	unsigned Reg;
95	int FI;
96	} Base;
97
98	int Offset = `0`;
99
100	public:
101	// Innocuous defaults for our address.
102	Address() { Base.Reg = `0`; }
103
104	void setKind(BaseKind K) { Kind = K; }
105	BaseKind getKind() const { return Kind; }
106	bool isRegBase() const { return Kind == RegBase; }
107	bool isFIBase() const { return Kind == FrameIndexBase; }
108
109	void setReg(Register Reg) {
110	assert(isRegBase() && "Invalid base register access!");
111	Base.Reg = Reg.id();
112	}
113
114	Register getReg() const {
115	assert(isRegBase() && "Invalid base register access!");
116	return Base.Reg;
117	}
118
119	void setFI(int FI) {
120	assert(isFIBase() && "Invalid base frame index access!");
121	Base.FI = FI;
122	}
123
124	int getFI() const {
125	assert(isFIBase() && "Invalid base frame index access!");
126	return Base.FI;
127	}
128
129	void setOffset(int O) { Offset = O; }
130	int getOffset() { return Offset; }
131	};
132
133	class ARMFastISel final : public FastISel {
134	/// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
135	/// make the right decision when generating code for different targets.
136	const ARMSubtarget *Subtarget;
137	Module &M;
138	const ARMBaseInstrInfo &TII;
139	const ARMTargetLowering &TLI;
140	const ARMBaseTargetMachine &TM;
141	ARMFunctionInfo *AFI;
142
143	// Convenience variables to avoid some queries.
144	bool isThumb2;
145	LLVMContext *Context;
146
147	public:
148	explicit ARMFastISel(FunctionLoweringInfo &funcInfo,
149	const TargetLibraryInfo *libInfo)
150	: FastISel (funcInfo, libInfo),
151	Subtarget(&funcInfo.MF->getSubtarget<ARMSubtarget>()),
152	M(const_cast<Module &>(*funcInfo.Fn->getParent())),
153	TII(Subtarget->getInstrInfo()), TLI(Subtarget->getTargetLowering()),
154	TM(TLI.getTM()) {
155	AFI = funcInfo.MF->getInfo<ARMFunctionInfo>();
156	isThumb2 = AFI->isThumbFunction();
157	Context = &funcInfo.Fn->getContext();
158	}
159
160	private:
161	// Code from FastISel.cpp.
162
163	Register fastEmitInst_r(unsigned MachineInstOpcode,
164	const TargetRegisterClass *RC, Register Op0);
165	Register fastEmitInst_rr(unsigned MachineInstOpcode,
166	const TargetRegisterClass *RC, Register Op0,
167	Register Op1);
168	Register fastEmitInst_ri(unsigned MachineInstOpcode,
169	const TargetRegisterClass *RC, Register Op0,
170	uint64_t Imm);
171	Register fastEmitInst_i(unsigned MachineInstOpcode,
172	const TargetRegisterClass *RC, uint64_t Imm);
173
174	// Backend specific FastISel code.
175
176	bool fastSelectInstruction(const Instruction *I) override;
177	Register fastMaterializeConstant(const Constant *C) override;
178	Register fastMaterializeAlloca(const AllocaInst *AI) override;
179	bool tryToFoldLoadIntoMI(MachineInstr MI, unsigned* OpNo,
180	const LoadInst *LI) override;
181	bool fastLowerArguments() override;
182
183	#include "ARMGenFastISel.inc"
184
185	// Instruction selection routines.
186
187	bool SelectLoad(const Instruction *I);
188	bool SelectStore(const Instruction *I);
189	bool SelectBranch(const Instruction *I);
190	bool SelectIndirectBr(const Instruction *I);
191	bool SelectCmp(const Instruction *I);
192	bool SelectFPExt(const Instruction *I);
193	bool SelectFPTrunc(const Instruction *I);
194	bool SelectBinaryIntOp(const Instruction I, unsigned* ISDOpcode);
195	bool SelectBinaryFPOp(const Instruction I, unsigned* ISDOpcode);
196	bool SelectIToFP(const Instruction I, bool* isSigned);
197	bool SelectFPToI(const Instruction I, bool* isSigned);
198	bool SelectDiv(const Instruction I, bool* isSigned);
199	bool SelectRem(const Instruction I, bool* isSigned);
200	bool SelectCall(const Instruction I, const* char *IntrMemName);
201	bool SelectIntrinsicCall(const IntrinsicInst &I);
202	bool SelectSelect(const Instruction *I);
203	bool SelectRet(const Instruction *I);
204	bool SelectTrunc(const Instruction *I);
205	bool SelectIntExt(const Instruction *I);
206	bool SelectShift(const Instruction *I, ARM_AM::ShiftOpc ShiftTy);
207
208	// Utility routines.
209
210	bool isPositionIndependent() const;
211	bool isTypeLegal(Type *Ty, MVT &VT);
212	bool isLoadTypeLegal(Type *Ty, MVT &VT);
213	bool ARMEmitCmp(const Value Src1Value, const* Value *Src2Value,
214	bool isZExt);
215	bool ARMEmitLoad(MVT VT, Register &ResultReg, Address &Addr,
216	MaybeAlign Alignment = std::nullopt, bool isZExt = true,
217	bool allocReg = true);
218	bool ARMEmitStore(MVT VT, Register SrcReg, Address &Addr,
219	MaybeAlign Alignment = std::nullopt);
220	bool ARMComputeAddress(const Value *Obj, Address &Addr);
221	void ARMSimplifyAddress(Address &Addr, MVT VT, bool useAM3);
222	bool ARMIsMemCpySmall(uint64_t Len);
223	bool ARMTryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
224	MaybeAlign Alignment);
225	Register ARMEmitIntExt(MVT SrcVT, Register SrcReg, MVT DestVT, bool isZExt);
226	Register ARMMaterializeFP(const ConstantFP *CFP, MVT VT);
227	Register ARMMaterializeInt(const Constant *C, MVT VT);
228	Register ARMMaterializeGV(const GlobalValue *GV, MVT VT);
229	Register ARMMoveToFPReg(MVT VT, Register SrcReg);
230	Register ARMMoveToIntReg(MVT VT, Register SrcReg);
231	unsigned ARMSelectCallOp(bool UseReg);
232	Register ARMLowerPICELF(const GlobalValue *GV, MVT VT);
233
234	const TargetLowering getTargetLowering() { return* &TLI; }
235
236	// Call handling routines.
237
238	CCAssignFn *CCAssignFnForCall(CallingConv::ID CC,
239	bool Return,
240	bool isVarArg);
241	bool ProcessCallArgs(SmallVectorImpl<Value*> &Args,
242	SmallVectorImpl<Register> &ArgRegs,
243	SmallVectorImpl<MVT> &ArgVTs,
244	SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
245	SmallVectorImpl<Register> &RegArgs,
246	CallingConv::ID CC,
247	unsigned &NumBytes,
248	bool isVarArg);
249	Register getLibcallReg(const Twine &Name);
250	bool FinishCall(MVT RetVT, SmallVectorImpl<Register> &UsedRegs,
251	const Instruction *I, CallingConv::ID CC,
252	unsigned &NumBytes, bool isVarArg);
253	bool ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call);
254
255	// OptionalDef handling routines.
256
257	bool isARMNEONPred(const MachineInstr *MI);
258	bool DefinesOptionalPredicate(MachineInstr MI, bool* *CPSR);
259	const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB);
260	void AddLoadStoreOperands(MVT VT, Address &Addr,
261	const MachineInstrBuilder &MIB,
262	MachineMemOperand::Flags Flags, bool useAM3);
263	};
264
265	} // end anonymous namespace
266
267	// DefinesOptionalPredicate - This is different from DefinesPredicate in that
268	// we don't care about implicit defs here, just places we'll need to add a
269	// default CCReg argument. Sets CPSR if we're setting CPSR instead of CCR.
270	bool ARMFastISel::DefinesOptionalPredicate(MachineInstr MI, bool* *CPSR) {
271	if (!MI->hasOptionalDef())
272	return false;
273
274	// Look to see if our OptionalDef is defining CPSR or CCR.
275	for (const MachineOperand &MO : MI->operands()) {
276	if (!MO.isReg() \|\| !MO.isDef()) continue;
277	if (MO.getReg() == ARM::CPSR)
278	CPSR = true*;
279	}
280	return true;
281	}
282
283	bool ARMFastISel::isARMNEONPred(const MachineInstr *MI) {
284	const MCInstrDesc &MCID = MI->getDesc();
285
286	// If we're a thumb2 or not NEON function we'll be handled via isPredicable.
287	if ((MCID.TSFlags & ARMII::DomainMask) != ARMII::DomainNEON \|\|
288	AFI->isThumb2Function())
289	return MI->isPredicable();
290
291	for (const MCOperandInfo &opInfo : MCID.operands())
292	if (opInfo.isPredicate())
293	return true;
294
295	return false;
296	}
297
298	// If the machine is predicable go ahead and add the predicate operands, if
299	// it needs default CC operands add those.
300	// TODO: If we want to support thumb1 then we'll need to deal with optional
301	// CPSR defs that need to be added before the remaining operands. See s_cc_out
302	// for descriptions why.
303	const MachineInstrBuilder &
304	ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) {
305	MachineInstr MI = &MIB;
306
307	// Do we use a predicate? or...
308	// Are we NEON in ARM mode and have a predicate operand? If so, I know
309	// we're not predicable but add it anyways.
310	if (isARMNEONPred(MI))
311	MIB.add(MOs: predOps(Pred: ARMCC::AL));
312
313	// Do we optionally set a predicate? Preds is size > 0 iff the predicate
314	// defines CPSR. All other OptionalDefines in ARM are the CCR register.
315	bool CPSR = false;
316	if (DefinesOptionalPredicate(MI, CPSR: &CPSR))
317	MIB.add(MO: CPSR ? t1CondCodeOp() : condCodeOp());
318	return MIB;
319	}
320
321	Register ARMFastISel::fastEmitInst_r(unsigned MachineInstOpcode,
322	const TargetRegisterClass *RC,
323	Register Op0) {
324	Register ResultReg = createResultReg(RC);
325	const MCInstrDesc &II = TII.get(Opcode: MachineInstOpcode);
326
327	// Make sure the input operand is sufficiently constrained to be legal
328	// for this instruction.
329	Op0 = constrainOperandRegClass(II, Op: Op0, OpNum: `1`);
330	if (II.getNumDefs() >= `1`) {
331	AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II,
332	DestReg: ResultReg).addReg(RegNo: Op0));
333	} else {
334	AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II)
335	.addReg(RegNo: Op0));
336	AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
337	MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: ResultReg)
338	.addReg(RegNo: II.implicit_defs()[`0`]));
339	}
340	return ResultReg;
341	}
342
343	Register ARMFastISel::fastEmitInst_rr(unsigned MachineInstOpcode,
344	const TargetRegisterClass *RC,
345	Register Op0, Register Op1) {
346	Register ResultReg = createResultReg(RC);
347	const MCInstrDesc &II = TII.get(Opcode: MachineInstOpcode);
348
349	// Make sure the input operands are sufficiently constrained to be legal
350	// for this instruction.
351	Op0 = constrainOperandRegClass(II, Op: Op0, OpNum: `1`);
352	Op1 = constrainOperandRegClass(II, Op: Op1, OpNum: `2`);
353
354	if (II.getNumDefs() >= `1`) {
355	AddOptionalDefs(
356	MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II, DestReg: ResultReg)
357	.addReg(RegNo: Op0)
358	.addReg(RegNo: Op1));
359	} else {
360	AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II)
361	.addReg(RegNo: Op0)
362	.addReg(RegNo: Op1));
363	AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
364	MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: ResultReg)
365	.addReg(RegNo: II.implicit_defs()[`0`]));
366	}
367	return ResultReg;
368	}
369
370	Register ARMFastISel::fastEmitInst_ri(unsigned MachineInstOpcode,
371	const TargetRegisterClass *RC,
372	Register Op0, uint64_t Imm) {
373	Register ResultReg = createResultReg(RC);
374	const MCInstrDesc &II = TII.get(Opcode: MachineInstOpcode);
375
376	// Make sure the input operand is sufficiently constrained to be legal
377	// for this instruction.
378	Op0 = constrainOperandRegClass(II, Op: Op0, OpNum: `1`);
379	if (II.getNumDefs() >= `1`) {
380	AddOptionalDefs(
381	MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II, DestReg: ResultReg)
382	.addReg(RegNo: Op0)
383	.addImm(Val: Imm));
384	} else {
385	AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II)
386	.addReg(RegNo: Op0)
387	.addImm(Val: Imm));
388	AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
389	MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: ResultReg)
390	.addReg(RegNo: II.implicit_defs()[`0`]));
391	}
392	return ResultReg;
393	}
394
395	Register ARMFastISel::fastEmitInst_i(unsigned MachineInstOpcode,
396	const TargetRegisterClass *RC,
397	uint64_t Imm) {
398	Register ResultReg = createResultReg(RC);
399	const MCInstrDesc &II = TII.get(Opcode: MachineInstOpcode);
400
401	if (II.getNumDefs() >= `1`) {
402	AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II,
403	DestReg: ResultReg).addImm(Val: Imm));
404	} else {
405	AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II)
406	.addImm(Val: Imm));
407	AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
408	MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: ResultReg)
409	.addReg(RegNo: II.implicit_defs()[`0`]));
410	}
411	return ResultReg;
412	}
413
414	// TODO: Don't worry about 64-bit now, but when this is fixed remove the
415	// checks from the various callers.
416	Register ARMFastISel::ARMMoveToFPReg(MVT VT, Register SrcReg) {
417	if (VT == MVT::f64)
418	return Register ();
419
420	Register MoveReg = createResultReg(RC: TLI.getRegClassFor(VT));
421	AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
422	MCID: TII.get(Opcode: ARM::VMOVSR), DestReg: MoveReg)
423	.addReg(RegNo: SrcReg));
424	return MoveReg;
425	}
426
427	Register ARMFastISel::ARMMoveToIntReg(MVT VT, Register SrcReg) {
428	if (VT == MVT::i64)
429	return Register ();
430
431	Register MoveReg = createResultReg(RC: TLI.getRegClassFor(VT));
432	AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
433	MCID: TII.get(Opcode: ARM::VMOVRS), DestReg: MoveReg)
434	.addReg(RegNo: SrcReg));
435	return MoveReg;
436	}
437
438	// For double width floating point we need to materialize two constants
439	// (the high and the low) into integer registers then use a move to get
440	// the combined constant into an FP reg.
441	Register ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, MVT VT) {
442	const APFloat Val = CFP->getValueAPF();
443	bool is64bit = VT == MVT::f64;
444
445	// This checks to see if we can use VFP3 instructions to materialize
446	// a constant, otherwise we have to go through the constant pool.
447	if (TLI.isFPImmLegal(Imm: Val, VT)) {
448	int Imm;
449	unsigned Opc;
450	if (is64bit) {
451	Imm = ARM_AM::getFP64Imm(FPImm: Val);
452	Opc = ARM::FCONSTD;
453	} else {
454	Imm = ARM_AM::getFP32Imm(FPImm: Val);
455	Opc = ARM::FCONSTS;
456	}
457	Register DestReg = createResultReg(RC: TLI.getRegClassFor(VT));
458	AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
459	MCID: TII.get(Opcode: Opc), DestReg).addImm(Val: Imm));
460	return DestReg;
461	}
462
463	// Require VFP2 for loading fp constants.
464	if (!Subtarget->hasVFP2Base()) return false;
465
466	// MachineConstantPool wants an explicit alignment.
467	Align Alignment = DL.getPrefTypeAlign(Ty: CFP->getType());
468	unsigned Idx = MCP.getConstantPoolIndex(C: cast<Constant>(Val: CFP), Alignment);
469	Register DestReg = createResultReg(RC: TLI.getRegClassFor(VT));
470	unsigned Opc = is64bit ? ARM::VLDRD : ARM::VLDRS;
471
472	// The extra reg is for addrmode5.
473	AddOptionalDefs(
474	MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg)
475	.addConstantPoolIndex(Idx)
476	.addReg(RegNo: `0`));
477	return DestReg;
478	}
479
480	Register ARMFastISel::ARMMaterializeInt(const Constant *C, MVT VT) {
481	if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 && VT != MVT::i1)
482	return Register ();
483
484	// If we can do this in a single instruction without a constant pool entry
485	// do so now.
486	const ConstantInt *CI = cast<ConstantInt>(Val: C);
487	if (Subtarget->hasV6T2Ops() && isUInt<`16`>(x: CI->getZExtValue())) {
488	unsigned Opc = isThumb2 ? ARM::t2MOVi16 : ARM::MOVi16;
489	const TargetRegisterClass *RC = isThumb2 ? &ARM::rGPRRegClass :
490	&ARM::GPRRegClass;
491	Register ImmReg = createResultReg(RC);
492	AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
493	MCID: TII.get(Opcode: Opc), DestReg: ImmReg)
494	.addImm(Val: CI->getZExtValue()));
495	return ImmReg;
496	}
497
498	// Use MVN to emit negative constants.
499	if (VT == MVT::i32 && Subtarget->hasV6T2Ops() && CI->isNegative()) {
500	unsigned Imm = (unsigned)~(CI->getSExtValue());
501	bool UseImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Arg: Imm) != -`1`) :
502	(ARM_AM::getSOImmVal(Arg: Imm) != -`1`);
503	if (UseImm) {
504	unsigned Opc = isThumb2 ? ARM::t2MVNi : ARM::MVNi;
505	const TargetRegisterClass *RC = isThumb2 ? &ARM::rGPRRegClass :
506	&ARM::GPRRegClass;
507	Register ImmReg = createResultReg(RC);
508	AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
509	MCID: TII.get(Opcode: Opc), DestReg: ImmReg)
510	.addImm(Val: Imm));
511	return ImmReg;
512	}
513	}
514
515	Register ResultReg;
516	if (Subtarget->useMovt())
517	ResultReg = fastEmit_i(VT, RetVT: VT, Opcode: ISD::Constant, imm0: CI->getZExtValue());
518
519	if (ResultReg)
520	return ResultReg;
521
522	// Load from constant pool. For now 32-bit only.
523	if (VT != MVT::i32)
524	return Register ();
525
526	// MachineConstantPool wants an explicit alignment.
527	Align Alignment = DL.getPrefTypeAlign(Ty: C->getType());
528	unsigned Idx = MCP.getConstantPoolIndex(C, Alignment);
529	ResultReg = createResultReg(RC: TLI.getRegClassFor(VT));
530	if (isThumb2)
531	AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
532	MCID: TII.get(Opcode: ARM::t2LDRpci), DestReg: ResultReg)
533	.addConstantPoolIndex(Idx));
534	else {
535	// The extra immediate is for addrmode2.
536	ResultReg = constrainOperandRegClass(II: TII.get(Opcode: ARM::LDRcp), Op: ResultReg, OpNum: `0`);
537	AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
538	MCID: TII.get(Opcode: ARM::LDRcp), DestReg: ResultReg)
539	.addConstantPoolIndex(Idx)
540	.addImm(Val: `0`));
541	}
542	return ResultReg;
543	}
544
545	bool ARMFastISel::isPositionIndependent() const {
546	return TLI.isPositionIndependent();
547	}
548
549	Register ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) {
550	// For now 32-bit only.
551	if (VT != MVT::i32 \|\| GV->isThreadLocal())
552	return Register ();
553
554	// ROPI/RWPI not currently supported.
555	if (Subtarget->isROPI() \|\| Subtarget->isRWPI())
556	return Register ();
557
558	bool IsIndirect = Subtarget->isGVIndirectSymbol(GV);
559	const TargetRegisterClass *RC = isThumb2 ? &ARM::rGPRRegClass
560	: &ARM::GPRRegClass;
561	Register DestReg = createResultReg(RC);
562
563	// FastISel TLS support on non-MachO is broken, punt to SelectionDAG.
564	const GlobalVariable *GVar = dyn_cast<GlobalVariable>(Val: GV);
565	bool IsThreadLocal = GVar && GVar->isThreadLocal();
566	if (!Subtarget->isTargetMachO() && IsThreadLocal)
567	return Register ();
568
569	bool IsPositionIndependent = isPositionIndependent();
570	// Use movw+movt when possible, it avoids constant pool entries.
571	// Non-darwin targets only support static movt relocations in FastISel.
572	if (Subtarget->useMovt() &&
573	(Subtarget->isTargetMachO() \|\| !IsPositionIndependent)) {
574	unsigned Opc;
575	unsigned char TF = `0`;
576	if (Subtarget->isTargetMachO())
577	TF = ARMII::MO_NONLAZY;
578
579	if (IsPositionIndependent)
580	Opc = isThumb2 ? ARM::t2MOV_ga_pcrel : ARM::MOV_ga_pcrel;
581	else
582	Opc = isThumb2 ? ARM::t2MOVi32imm : ARM::MOVi32imm;
583	AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
584	MCID: TII.get(Opcode: Opc), DestReg).addGlobalAddress(GV, Offset: `0`, TargetFlags: TF));
585	} else {
586	// MachineConstantPool wants an explicit alignment.
587	Align Alignment = DL.getPrefTypeAlign(Ty: GV->getType());
588
589	if (Subtarget->isTargetELF() && IsPositionIndependent)
590	return ARMLowerPICELF(GV, VT);
591
592	// Grab index.
593	unsigned PCAdj = IsPositionIndependent ? (Subtarget->isThumb() ? `4` : `8`) : `0`;
594	unsigned Id = AFI->createPICLabelUId();
595	ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(C: GV, ID: Id,
596	Kind: ARMCP::CPValue,
597	PCAdj);
598	unsigned Idx = MCP.getConstantPoolIndex(V: CPV, Alignment);
599
600	// Load value.
601	MachineInstrBuilder MIB;
602	if (isThumb2) {
603	unsigned Opc = IsPositionIndependent ? ARM::t2LDRpci_pic : ARM::t2LDRpci;
604	MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc),
605	DestReg).addConstantPoolIndex(Idx);
606	if (IsPositionIndependent)
607	MIB.addImm(Val: Id);
608	AddOptionalDefs(MIB);
609	} else {
610	// The extra immediate is for addrmode2.
611	DestReg = constrainOperandRegClass(II: TII.get(Opcode: ARM::LDRcp), Op: DestReg, OpNum: `0`);
612	MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
613	MCID: TII.get(Opcode: ARM::LDRcp), DestReg)
614	.addConstantPoolIndex(Idx)
615	.addImm(Val: `0`);
616	AddOptionalDefs(MIB);
617
618	if (IsPositionIndependent) {
619	unsigned Opc = IsIndirect ? ARM::PICLDR : ARM::PICADD;
620	Register NewDestReg = createResultReg(RC: TLI.getRegClassFor(VT));
621
622	MachineInstrBuilder MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt,
623	MIMD, MCID: TII.get(Opcode: Opc), DestReg: NewDestReg)
624	.addReg(RegNo: DestReg)
625	.addImm(Val: Id);
626	AddOptionalDefs(MIB);
627	return NewDestReg;
628	}
629	}
630	}
631
632	if ((Subtarget->isTargetELF() && Subtarget->isGVInGOT(GV)) \|\|
633	(Subtarget->isTargetMachO() && IsIndirect)) {
634	MachineInstrBuilder MIB;
635	Register NewDestReg = createResultReg(RC: TLI.getRegClassFor(VT));
636	if (isThumb2)
637	MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
638	MCID: TII.get(Opcode: ARM::t2LDRi12), DestReg: NewDestReg)
639	.addReg(RegNo: DestReg)
640	.addImm(Val: `0`);
641	else
642	MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
643	MCID: TII.get(Opcode: ARM::LDRi12), DestReg: NewDestReg)
644	.addReg(RegNo: DestReg)
645	.addImm(Val: `0`);
646	DestReg = NewDestReg;
647	AddOptionalDefs(MIB);
648	}
649
650	return DestReg;
651	}
652
653	Register ARMFastISel::fastMaterializeConstant(const Constant *C) {
654	EVT CEVT = TLI.getValueType(DL, Ty: C->getType(), AllowUnknown: true);
655
656	// Only handle simple types.
657	if (!CEVT.isSimple())
658	return Register ();
659	MVT VT = CEVT.getSimpleVT();
660
661	if (const ConstantFP *CFP = dyn_cast<ConstantFP>(Val: C))
662	return ARMMaterializeFP(CFP, VT);
663	else if (const GlobalValue *GV = dyn_cast<GlobalValue>(Val: C))
664	return ARMMaterializeGV(GV, VT);
665	else if (isa<ConstantInt>(Val: C))
666	return ARMMaterializeInt(C, VT);
667
668	return Register ();
669	}
670
671	// TODO: Register ARMFastISel::TargetMaterializeFloatZero(const ConstantFP CF);*
672
673	Register ARMFastISel::fastMaterializeAlloca(const AllocaInst *AI) {
674	// Don't handle dynamic allocas.
675	if (!FuncInfo.StaticAllocaMap.count(Val: AI))
676	return Register ();
677
678	MVT VT;
679	if (!isLoadTypeLegal(Ty: AI->getType(), VT))
680	return Register ();
681
682	DenseMap<const AllocaInst, int*>::iterator SI =
683	FuncInfo.StaticAllocaMap.find(Val: AI);
684
685	// This will get lowered later into the correct offsets and registers
686	// via rewriteXFrameIndex.
687	if (SI != FuncInfo.StaticAllocaMap.end()) {
688	unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri;
689	const TargetRegisterClass* RC = TLI.getRegClassFor(VT);
690	Register ResultReg = createResultReg(RC);
691	ResultReg = constrainOperandRegClass(II: TII.get(Opcode: Opc), Op: ResultReg, OpNum: `0`);
692
693	AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
694	MCID: TII.get(Opcode: Opc), DestReg: ResultReg)
695	.addFrameIndex(Idx: SI ->second)
696	.addImm(Val: `0`));
697	return ResultReg;
698	}
699
700	return Register ();
701	}
702
703	bool ARMFastISel::isTypeLegal(Type *Ty, MVT &VT) {
704	EVT evt = TLI.getValueType(DL, Ty, AllowUnknown: true);
705
706	// Only handle simple types.
707	if (evt == MVT::Other \|\| !evt.isSimple()) return false;
708	VT = evt.getSimpleVT();
709
710	// Handle all legal types, i.e. a register that will directly hold this
711	// value.
712	return TLI.isTypeLegal(VT);
713	}
714
715	bool ARMFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) {
716	if (isTypeLegal(Ty, VT)) return true;
717
718	// If this is a type than can be sign or zero-extended to a basic operation
719	// go ahead and accept it now.
720	if (VT == MVT::i1 \|\| VT == MVT::i8 \|\| VT == MVT::i16)
721	return true;
722
723	return false;
724	}
725
726	// Computes the address to get to an object.
727	bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) {
728	// Some boilerplate from the X86 FastISel.
729	const User U = nullptr*;
730	unsigned Opcode = Instruction::UserOp1;
731	if (const Instruction *I = dyn_cast<Instruction>(Val: Obj)) {
732	// Don't walk into other basic blocks unless the object is an alloca from
733	// another block, otherwise it may not have a virtual register assigned.
734	if (FuncInfo.StaticAllocaMap.count(Val: static_cast<const AllocaInst *>(Obj)) \|\|
735	FuncInfo.getMBB(BB: I->getParent()) == FuncInfo.MBB) {
736	Opcode = I->getOpcode();
737	U = I;
738	}
739	} else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Val: Obj)) {
740	Opcode = C->getOpcode();
741	U = C;
742	}
743
744	if (PointerType *Ty = dyn_cast<PointerType>(Val: Obj->getType()))
745	if (Ty->getAddressSpace() > `255`)
746	// Fast instruction selection doesn't support the special
747	// address spaces.
748	return false;
749
750	switch (Opcode) {
751	default:
752	break;
753	case Instruction::BitCast:
754	// Look through bitcasts.
755	return ARMComputeAddress(Obj: U->getOperand(i: `0`), Addr);
756	case Instruction::IntToPtr:
757	// Look past no-op inttoptrs.
758	if (TLI.getValueType(DL, Ty: U->getOperand(i: `0`)->getType()) ==
759	TLI.getPointerTy(DL))
760	return ARMComputeAddress(Obj: U->getOperand(i: `0`), Addr);
761	break;
762	case Instruction::PtrToInt:
763	// Look past no-op ptrtoints.
764	if (TLI.getValueType(DL, Ty: U->getType()) == TLI.getPointerTy(DL))
765	return ARMComputeAddress(Obj: U->getOperand(i: `0`), Addr);
766	break;
767	case Instruction::GetElementPtr: {
768	Address SavedAddr = Addr;
769	int TmpOffset = Addr.getOffset();
770
771	// Iterate through the GEP folding the constants into offsets where
772	// we can.
773	gep_type_iterator GTI = gep_type_begin(GEP: U);
774	for (User::const_op_iterator i = U->op_begin() + `1`, e = U->op_end();
775	i != e; ++i, ++GTI) {
776	const Value Op = i;
777	if (StructType *STy = GTI.getStructTypeOrNull()) {
778	const StructLayout *SL = DL.getStructLayout(Ty: STy);
779	unsigned Idx = cast<ConstantInt>(Val: Op)->getZExtValue();
780	TmpOffset += SL->getElementOffset(Idx);
781	} else {
782	uint64_t S = GTI.getSequentialElementStride(DL);
783	while (true) {
784	if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val: Op)) {
785	// Constant-offset addressing.
786	TmpOffset += CI->getSExtValue() * S;
787	break;
788	}
789	if (canFoldAddIntoGEP(GEP: U, Add: Op)) {
790	// A compatible add with a constant operand. Fold the constant.
791	ConstantInt *CI =
792	cast<ConstantInt>(Val: cast<AddOperator>(Val: Op)->getOperand(i_nocapture: `1`));
793	TmpOffset += CI->getSExtValue() * S;
794	// Iterate on the other operand.
795	Op = cast<AddOperator>(Val: Op)->getOperand(i_nocapture: `0`);
796	continue;
797	}
798	// Unsupported
799	goto unsupported_gep;
800	}
801	}
802	}
803
804	// Try to grab the base operand now.
805	Addr.setOffset(TmpOffset);
806	if (ARMComputeAddress(Obj: U->getOperand(i: `0`), Addr)) return true;
807
808	// We failed, restore everything and try the other options.
809	Addr = SavedAddr;
810
811	unsupported_gep:
812	break;
813	}
814	case Instruction::Alloca: {
815	const AllocaInst *AI = cast<AllocaInst>(Val: Obj);
816	DenseMap<const AllocaInst, int*>::iterator SI =
817	FuncInfo.StaticAllocaMap.find(Val: AI);
818	if (SI != FuncInfo.StaticAllocaMap.end()) {
819	Addr.setKind(Address::FrameIndexBase);
820	Addr.setFI(SI ->second);
821	return true;
822	}
823	break;
824	}
825	}
826
827	// Try to get this in a register if nothing else has worked.
828	if (!Addr.getReg())
829	Addr.setReg(getRegForValue(V: Obj));
830	return Addr.getReg();
831	}
832
833	void ARMFastISel::ARMSimplifyAddress(Address &Addr, MVT VT, bool useAM3) {
834	bool needsLowering = false;
835	switch (VT.SimpleTy) {
836	default: llvm_unreachable("Unhandled load/store type!");
837	case MVT::i1:
838	case MVT::i8:
839	case MVT::i16:
840	case MVT::i32:
841	if (!useAM3) {
842	// Integer loads/stores handle 12-bit offsets.
843	needsLowering = ((Addr.getOffset() & `0xfff`) != Addr.getOffset());
844	// Handle negative offsets.
845	if (needsLowering && isThumb2)
846	needsLowering = !(Subtarget->hasV6T2Ops() && Addr.getOffset() < `0` &&
847	Addr.getOffset() > -`256`);
848	} else {
849	// ARM halfword load/stores and signed byte loads use +/-imm8 offsets.
850	needsLowering = (Addr.getOffset() > `255` \|\| Addr.getOffset() < -`255`);
851	}
852	break;
853	case MVT::f32:
854	case MVT::f64:
855	// Floating point operands handle 8-bit offsets.
856	needsLowering = ((Addr.getOffset() & `0xff`) != Addr.getOffset());
857	break;
858	}
859
860	// If this is a stack pointer and the offset needs to be simplified then
861	// put the alloca address into a register, set the base type back to
862	// register and continue. This should almost never happen.
863	if (needsLowering && Addr.isFIBase()) {
864	const TargetRegisterClass *RC = isThumb2 ? &ARM::tGPRRegClass
865	: &ARM::GPRRegClass;
866	Register ResultReg = createResultReg(RC);
867	unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri;
868	AddOptionalDefs(
869	MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg: ResultReg)
870	.addFrameIndex(Idx: Addr.getFI())
871	.addImm(Val: `0`));
872	Addr.setKind(Address::RegBase);
873	Addr.setReg(ResultReg);
874	}
875
876	// Since the offset is too large for the load/store instruction
877	// get the reg+offset into a register.
878	if (needsLowering) {
879	Addr.setReg(fastEmit_ri_(VT: MVT::i32, Opcode: ISD::ADD, Op0: Addr.getReg(),
880	Imm: Addr.getOffset(), ImmType: MVT::i32));
881	Addr.setOffset(`0`);
882	}
883	}
884
885	void ARMFastISel::AddLoadStoreOperands(MVT VT, Address &Addr,
886	const MachineInstrBuilder &MIB,
887	MachineMemOperand::Flags Flags,
888	bool useAM3) {
889	// addrmode5 output depends on the selection dag addressing dividing the
890	// offset by 4 that it then later multiplies. Do this here as well.
891	if (VT.SimpleTy == MVT::f32 \|\| VT.SimpleTy == MVT::f64)
892	Addr.setOffset(Addr.getOffset() / `4`);
893
894	// Frame base works a bit differently. Handle it separately.
895	if (Addr.isFIBase()) {
896	int FI = Addr.getFI();
897	int Offset = Addr.getOffset();
898	MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
899	PtrInfo: MachinePointerInfo::getFixedStack(MF&: *FuncInfo.MF, FI, Offset), F: Flags,
900	Size: MFI.getObjectSize(ObjectIdx: FI), BaseAlignment: MFI.getObjectAlign(ObjectIdx: FI));
901	// Now add the rest of the operands.
902	MIB.addFrameIndex(Idx: FI);
903
904	// ARM halfword load/stores and signed byte loads need an additional
905	// operand.
906	if (useAM3) {
907	int Imm = (Addr.getOffset() < `0`) ? (`0x100` \| -Addr.getOffset())
908	: Addr.getOffset();
909	MIB.addReg(RegNo: `0`);
910	MIB.addImm(Val: Imm);
911	} else {
912	MIB.addImm(Val: Addr.getOffset());
913	}
914	MIB.addMemOperand(MMO);
915	} else {
916	// Now add the rest of the operands.
917	MIB.addReg(RegNo: Addr.getReg());
918
919	// ARM halfword load/stores and signed byte loads need an additional
920	// operand.
921	if (useAM3) {
922	int Imm = (Addr.getOffset() < `0`) ? (`0x100` \| -Addr.getOffset())
923	: Addr.getOffset();
924	MIB.addReg(RegNo: `0`);
925	MIB.addImm(Val: Imm);
926	} else {
927	MIB.addImm(Val: Addr.getOffset());
928	}
929	}
930	AddOptionalDefs(MIB);
931	}
932
933	bool ARMFastISel::ARMEmitLoad(MVT VT, Register &ResultReg, Address &Addr,
934	MaybeAlign Alignment, bool isZExt,
935	bool allocReg) {
936	unsigned Opc;
937	bool useAM3 = false;
938	bool needVMOV = false;
939	const TargetRegisterClass *RC;
940	switch (VT.SimpleTy) {
941	// This is mostly going to be Neon/vector support.
942	default: return false;
943	case MVT::i1:
944	case MVT::i8:
945	if (isThumb2) {
946	if (Addr.getOffset() < `0` && Addr.getOffset() > -`256` &&
947	Subtarget->hasV6T2Ops())
948	Opc = isZExt ? ARM::t2LDRBi8 : ARM::t2LDRSBi8;
949	else
950	Opc = isZExt ? ARM::t2LDRBi12 : ARM::t2LDRSBi12;
951	} else {
952	if (isZExt) {
953	Opc = ARM::LDRBi12;
954	} else {
955	Opc = ARM::LDRSB;
956	useAM3 = true;
957	}
958	}
959	RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
960	break;
961	case MVT::i16:
962	if (Alignment && *Alignment < Align (`2`) &&
963	!Subtarget->allowsUnalignedMem())
964	return false;
965
966	if (isThumb2) {
967	if (Addr.getOffset() < `0` && Addr.getOffset() > -`256` &&
968	Subtarget->hasV6T2Ops())
969	Opc = isZExt ? ARM::t2LDRHi8 : ARM::t2LDRSHi8;
970	else
971	Opc = isZExt ? ARM::t2LDRHi12 : ARM::t2LDRSHi12;
972	} else {
973	Opc = isZExt ? ARM::LDRH : ARM::LDRSH;
974	useAM3 = true;
975	}
976	RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
977	break;
978	case MVT::i32:
979	if (Alignment && *Alignment < Align (`4`) &&
980	!Subtarget->allowsUnalignedMem())
981	return false;
982
983	if (isThumb2) {
984	if (Addr.getOffset() < `0` && Addr.getOffset() > -`256` &&
985	Subtarget->hasV6T2Ops())
986	Opc = ARM::t2LDRi8;
987	else
988	Opc = ARM::t2LDRi12;
989	} else {
990	Opc = ARM::LDRi12;
991	}
992	RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
993	break;
994	case MVT::f32:
995	if (!Subtarget->hasVFP2Base()) return false;
996	// Unaligned loads need special handling. Floats require word-alignment.
997	if (Alignment && *Alignment < Align (`4`)) {
998	needVMOV = true;
999	VT = MVT::i32;
1000	Opc = isThumb2 ? ARM::t2LDRi12 : ARM::LDRi12;
1001	RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
1002	} else {
1003	Opc = ARM::VLDRS;
1004	RC = TLI.getRegClassFor(VT);
1005	}
1006	break;
1007	case MVT::f64:
1008	// Can load and store double precision even without FeatureFP64
1009	if (!Subtarget->hasVFP2Base()) return false;
1010	// FIXME: Unaligned loads need special handling. Doublewords require
1011	// word-alignment.
1012	if (Alignment && *Alignment < Align (`4`))
1013	return false;
1014
1015	Opc = ARM::VLDRD;
1016	RC = TLI.getRegClassFor(VT);
1017	break;
1018	}
1019	// Simplify this down to something we can handle.
1020	ARMSimplifyAddress(Addr, VT, useAM3);
1021
1022	// Create the base instruction, then add the operands.
1023	if (allocReg)
1024	ResultReg = createResultReg(RC);
1025	assert(ResultReg.isVirtual() && "Expected an allocated virtual register.");
1026	MachineInstrBuilder MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1027	MCID: TII.get(Opcode: Opc), DestReg: ResultReg);
1028	AddLoadStoreOperands(VT, Addr, MIB, Flags: MachineMemOperand::MOLoad, useAM3);
1029
1030	// If we had an unaligned load of a float we've converted it to an regular
1031	// load. Now we must move from the GRP to the FP register.
1032	if (needVMOV) {
1033	Register MoveReg = createResultReg(RC: TLI.getRegClassFor(VT: MVT::f32));
1034	AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1035	MCID: TII.get(Opcode: ARM::VMOVSR), DestReg: MoveReg)
1036	.addReg(RegNo: ResultReg));
1037	ResultReg = MoveReg;
1038	}
1039	return true;
1040	}
1041
1042	bool ARMFastISel::SelectLoad(const Instruction *I) {
1043	// Atomic loads need special handling.
1044	if (cast<LoadInst>(Val: I)->isAtomic())
1045	return false;
1046
1047	const Value *SV = I->getOperand(i: `0`);
1048	if (TLI.supportSwiftError()) {
1049	// Swifterror values can come from either a function parameter with
1050	// swifterror attribute or an alloca with swifterror attribute.
1051	if (const Argument *Arg = dyn_cast<Argument>(Val: SV)) {
1052	if (Arg->hasSwiftErrorAttr())
1053	return false;
1054	}
1055
1056	if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(Val: SV)) {
1057	if (Alloca->isSwiftError())
1058	return false;
1059	}
1060	}
1061
1062	// Verify we have a legal type before going any further.
1063	MVT VT;
1064	if (!isLoadTypeLegal(Ty: I->getType(), VT))
1065	return false;
1066
1067	// See if we can handle this address.
1068	Address Addr;
1069	if (!ARMComputeAddress(Obj: I->getOperand(i: `0`), Addr)) return false;
1070
1071	Register ResultReg;
1072	if (!ARMEmitLoad(VT, ResultReg, Addr, Alignment: cast<LoadInst>(Val: I)->getAlign()))
1073	return false;
1074	updateValueMap(I, Reg: ResultReg);
1075	return true;
1076	}
1077
1078	bool ARMFastISel::ARMEmitStore(MVT VT, Register SrcReg, Address &Addr,
1079	MaybeAlign Alignment) {
1080	unsigned StrOpc;
1081	bool useAM3 = false;
1082	switch (VT.SimpleTy) {
1083	// This is mostly going to be Neon/vector support.
1084	default: return false;
1085	case MVT::i1: {
1086	Register Res = createResultReg(RC: isThumb2 ? &ARM::tGPRRegClass
1087	: &ARM::GPRRegClass);
1088	unsigned Opc = isThumb2 ? ARM::t2ANDri : ARM::ANDri;
1089	SrcReg = constrainOperandRegClass(II: TII.get(Opcode: Opc), Op: SrcReg, OpNum: `1`);
1090	AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1091	MCID: TII.get(Opcode: Opc), DestReg: Res)
1092	.addReg(RegNo: SrcReg).addImm(Val: `1`));
1093	SrcReg = Res;
1094	[[fallthrough]];
1095	}
1096	case MVT::i8:
1097	if (isThumb2) {
1098	if (Addr.getOffset() < `0` && Addr.getOffset() > -`256` &&
1099	Subtarget->hasV6T2Ops())
1100	StrOpc = ARM::t2STRBi8;
1101	else
1102	StrOpc = ARM::t2STRBi12;
1103	} else {
1104	StrOpc = ARM::STRBi12;
1105	}
1106	break;
1107	case MVT::i16:
1108	if (Alignment && *Alignment < Align (`2`) &&
1109	!Subtarget->allowsUnalignedMem())
1110	return false;
1111
1112	if (isThumb2) {
1113	if (Addr.getOffset() < `0` && Addr.getOffset() > -`256` &&
1114	Subtarget->hasV6T2Ops())
1115	StrOpc = ARM::t2STRHi8;
1116	else
1117	StrOpc = ARM::t2STRHi12;
1118	} else {
1119	StrOpc = ARM::STRH;
1120	useAM3 = true;
1121	}
1122	break;
1123	case MVT::i32:
1124	if (Alignment && *Alignment < Align (`4`) &&
1125	!Subtarget->allowsUnalignedMem())
1126	return false;
1127
1128	if (isThumb2) {
1129	if (Addr.getOffset() < `0` && Addr.getOffset() > -`256` &&
1130	Subtarget->hasV6T2Ops())
1131	StrOpc = ARM::t2STRi8;
1132	else
1133	StrOpc = ARM::t2STRi12;
1134	} else {
1135	StrOpc = ARM::STRi12;
1136	}
1137	break;
1138	case MVT::f32:
1139	if (!Subtarget->hasVFP2Base()) return false;
1140	// Unaligned stores need special handling. Floats require word-alignment.
1141	if (Alignment && *Alignment < Align (`4`)) {
1142	Register MoveReg = createResultReg(RC: TLI.getRegClassFor(VT: MVT::i32));
1143	AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1144	MCID: TII.get(Opcode: ARM::VMOVRS), DestReg: MoveReg)
1145	.addReg(RegNo: SrcReg));
1146	SrcReg = MoveReg;
1147	VT = MVT::i32;
1148	StrOpc = isThumb2 ? ARM::t2STRi12 : ARM::STRi12;
1149	} else {
1150	StrOpc = ARM::VSTRS;
1151	}
1152	break;
1153	case MVT::f64:
1154	// Can load and store double precision even without FeatureFP64
1155	if (!Subtarget->hasVFP2Base()) return false;
1156	// FIXME: Unaligned stores need special handling. Doublewords require
1157	// word-alignment.
1158	if (Alignment && *Alignment < Align (`4`))
1159	return false;
1160
1161	StrOpc = ARM::VSTRD;
1162	break;
1163	}
1164	// Simplify this down to something we can handle.
1165	ARMSimplifyAddress(Addr, VT, useAM3);
1166
1167	// Create the base instruction, then add the operands.
1168	SrcReg = constrainOperandRegClass(II: TII.get(Opcode: StrOpc), Op: SrcReg, OpNum: `0`);
1169	MachineInstrBuilder MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1170	MCID: TII.get(Opcode: StrOpc))
1171	.addReg(RegNo: SrcReg);
1172	AddLoadStoreOperands(VT, Addr, MIB, Flags: MachineMemOperand::MOStore, useAM3);
1173	return true;
1174	}
1175
1176	bool ARMFastISel::SelectStore(const Instruction *I) {
1177	Value *Op0 = I->getOperand(i: `0`);
1178	Register SrcReg;
1179
1180	// Atomic stores need special handling.
1181	if (cast<StoreInst>(Val: I)->isAtomic())
1182	return false;
1183
1184	const Value *PtrV = I->getOperand(i: `1`);
1185	if (TLI.supportSwiftError()) {
1186	// Swifterror values can come from either a function parameter with
1187	// swifterror attribute or an alloca with swifterror attribute.
1188	if (const Argument *Arg = dyn_cast<Argument>(Val: PtrV)) {
1189	if (Arg->hasSwiftErrorAttr())
1190	return false;
1191	}
1192
1193	if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(Val: PtrV)) {
1194	if (Alloca->isSwiftError())
1195	return false;
1196	}
1197	}
1198
1199	// Verify we have a legal type before going any further.
1200	MVT VT;
1201	if (!isLoadTypeLegal(Ty: I->getOperand(i: `0`)->getType(), VT))
1202	return false;
1203
1204	// Get the value to be stored into a register.
1205	SrcReg = getRegForValue(V: Op0);
1206	if (!SrcReg)
1207	return false;
1208
1209	// See if we can handle this address.
1210	Address Addr;
1211	if (!ARMComputeAddress(Obj: I->getOperand(i: `1`), Addr))
1212	return false;
1213
1214	if (!ARMEmitStore(VT, SrcReg, Addr, Alignment: cast<StoreInst>(Val: I)->getAlign()))
1215	return false;
1216	return true;
1217	}
1218
1219	static ARMCC::CondCodes getComparePred(CmpInst::Predicate Pred) {
1220	switch (Pred) {
1221	// Needs two compares...
1222	case CmpInst::FCMP_ONE:
1223	case CmpInst::FCMP_UEQ:
1224	default:
1225	// AL is our "false" for now. The other two need more compares.
1226	return ARMCC::AL;
1227	case CmpInst::ICMP_EQ:
1228	case CmpInst::FCMP_OEQ:
1229	return ARMCC::EQ;
1230	case CmpInst::ICMP_SGT:
1231	case CmpInst::FCMP_OGT:
1232	return ARMCC::GT;
1233	case CmpInst::ICMP_SGE:
1234	case CmpInst::FCMP_OGE:
1235	return ARMCC::GE;
1236	case CmpInst::ICMP_UGT:
1237	case CmpInst::FCMP_UGT:
1238	return ARMCC::HI;
1239	case CmpInst::FCMP_OLT:
1240	return ARMCC::MI;
1241	case CmpInst::ICMP_ULE:
1242	case CmpInst::FCMP_OLE:
1243	return ARMCC::LS;
1244	case CmpInst::FCMP_ORD:
1245	return ARMCC::VC;
1246	case CmpInst::FCMP_UNO:
1247	return ARMCC::VS;
1248	case CmpInst::FCMP_UGE:
1249	return ARMCC::PL;
1250	case CmpInst::ICMP_SLT:
1251	case CmpInst::FCMP_ULT:
1252	return ARMCC::LT;
1253	case CmpInst::ICMP_SLE:
1254	case CmpInst::FCMP_ULE:
1255	return ARMCC::LE;
1256	case CmpInst::FCMP_UNE:
1257	case CmpInst::ICMP_NE:
1258	return ARMCC::NE;
1259	case CmpInst::ICMP_UGE:
1260	return ARMCC::HS;
1261	case CmpInst::ICMP_ULT:
1262	return ARMCC::LO;
1263	}
1264	}
1265
1266	bool ARMFastISel::SelectBranch(const Instruction *I) {
1267	const BranchInst *BI = cast<BranchInst>(Val: I);
1268	MachineBasicBlock *TBB = FuncInfo.getMBB(BB: BI->getSuccessor(i: `0`));
1269	MachineBasicBlock *FBB = FuncInfo.getMBB(BB: BI->getSuccessor(i: `1`));
1270
1271	// Simple branch support.
1272
1273	// If we can, avoid recomputing the compare - redoing it could lead to wonky
1274	// behavior.
1275	if (const CmpInst *CI = dyn_cast<CmpInst>(Val: BI->getCondition())) {
1276	if (CI->hasOneUse() && (CI->getParent() == I->getParent())) {
1277	// Get the compare predicate.
1278	// Try to take advantage of fallthrough opportunities.
1279	CmpInst::Predicate Predicate = CI->getPredicate();
1280	if (FuncInfo.MBB->isLayoutSuccessor(MBB: TBB)) {
1281	std::swap(a&: TBB, b&: FBB);
1282	Predicate = CmpInst::getInversePredicate(pred: Predicate);
1283	}
1284
1285	ARMCC::CondCodes ARMPred = getComparePred(Pred: Predicate);
1286
1287	// We may not handle every CC for now.
1288	if (ARMPred == ARMCC::AL) return false;
1289
1290	// Emit the compare.
1291	if (!ARMEmitCmp(Src1Value: CI->getOperand(i_nocapture: `0`), Src2Value: CI->getOperand(i_nocapture: `1`), isZExt: CI->isUnsigned()))
1292	return false;
1293
1294	unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
1295	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: BrOpc))
1296	.addMBB(MBB: TBB).addImm(Val: ARMPred).addReg(RegNo: ARM::CPSR);
1297	finishCondBranch(BranchBB: BI->getParent(), TrueMBB: TBB, FalseMBB: FBB);
1298	return true;
1299	}
1300	} else if (TruncInst *TI = dyn_cast<TruncInst>(Val: BI->getCondition())) {
1301	MVT SourceVT;
1302	if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
1303	(isLoadTypeLegal(Ty: TI->getOperand(i_nocapture: `0`)->getType(), VT&: SourceVT))) {
1304	unsigned TstOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri;
1305	Register OpReg = getRegForValue(V: TI->getOperand(i_nocapture: `0`));
1306	OpReg = constrainOperandRegClass(II: TII.get(Opcode: TstOpc), Op: OpReg, OpNum: `0`);
1307	AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1308	MCID: TII.get(Opcode: TstOpc))
1309	.addReg(RegNo: OpReg).addImm(Val: `1`));
1310
1311	unsigned CCMode = ARMCC::NE;
1312	if (FuncInfo.MBB->isLayoutSuccessor(MBB: TBB)) {
1313	std::swap(a&: TBB, b&: FBB);
1314	CCMode = ARMCC::EQ;
1315	}
1316
1317	unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
1318	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: BrOpc))
1319	.addMBB(MBB: TBB).addImm(Val: CCMode).addReg(RegNo: ARM::CPSR);
1320
1321	finishCondBranch(BranchBB: BI->getParent(), TrueMBB: TBB, FalseMBB: FBB);
1322	return true;
1323	}
1324	} else if (const ConstantInt *CI =
1325	dyn_cast<ConstantInt>(Val: BI->getCondition())) {
1326	uint64_t Imm = CI->getZExtValue();
1327	MachineBasicBlock *Target = (Imm == `0`) ? FBB : TBB;
1328	fastEmitBranch(MSucc: Target, DbgLoc: MIMD.getDL());
1329	return true;
1330	}
1331
1332	Register CmpReg = getRegForValue(V: BI->getCondition());
1333	if (!CmpReg)
1334	return false;
1335
1336	// We've been divorced from our compare! Our block was split, and
1337	// now our compare lives in a predecessor block. We musn't
1338	// re-compare here, as the children of the compare aren't guaranteed
1339	// live across the block boundary (we could* check for this).*
1340	// Regardless, the compare has been done in the predecessor block,
1341	// and it left a value for us in a virtual register. Ergo, we test
1342	// the one-bit value left in the virtual register.
1343	unsigned TstOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri;
1344	CmpReg = constrainOperandRegClass(II: TII.get(Opcode: TstOpc), Op: CmpReg, OpNum: `0`);
1345	AddOptionalDefs(
1346	MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: TstOpc))
1347	.addReg(RegNo: CmpReg)
1348	.addImm(Val: `1`));
1349
1350	unsigned CCMode = ARMCC::NE;
1351	if (FuncInfo.MBB->isLayoutSuccessor(MBB: TBB)) {
1352	std::swap(a&: TBB, b&: FBB);
1353	CCMode = ARMCC::EQ;
1354	}
1355
1356	unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
1357	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: BrOpc))
1358	.addMBB(MBB: TBB).addImm(Val: CCMode).addReg(RegNo: ARM::CPSR);
1359	finishCondBranch(BranchBB: BI->getParent(), TrueMBB: TBB, FalseMBB: FBB);
1360	return true;
1361	}
1362
1363	bool ARMFastISel::SelectIndirectBr(const Instruction *I) {
1364	Register AddrReg = getRegForValue(V: I->getOperand(i: `0`));
1365	if (!AddrReg)
1366	return false;
1367
1368	unsigned Opc = isThumb2 ? ARM::tBRIND : ARM::BX;
1369	assert(isThumb2 \|\| Subtarget->hasV4TOps());
1370
1371	AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1372	MCID: TII.get(Opcode: Opc)).addReg(RegNo: AddrReg));
1373
1374	const IndirectBrInst *IB = cast<IndirectBrInst>(Val: I);
1375	for (const BasicBlock *SuccBB : IB->successors())
1376	FuncInfo.MBB->addSuccessor(Succ: FuncInfo.getMBB(BB: SuccBB));
1377
1378	return true;
1379	}
1380
1381	bool ARMFastISel::ARMEmitCmp(const Value Src1Value, const* Value *Src2Value,
1382	bool isZExt) {
1383	Type *Ty = Src1Value->getType();
1384	EVT SrcEVT = TLI.getValueType(DL, Ty, AllowUnknown: true);
1385	if (!SrcEVT.isSimple()) return false;
1386	MVT SrcVT = SrcEVT.getSimpleVT();
1387
1388	if (Ty->isFloatTy() && !Subtarget->hasVFP2Base())
1389	return false;
1390
1391	if (Ty->isDoubleTy() && (!Subtarget->hasVFP2Base() \|\| !Subtarget->hasFP64()))
1392	return false;
1393
1394	// Check to see if the 2nd operand is a constant that we can encode directly
1395	// in the compare.
1396	int Imm = `0`;
1397	bool UseImm = false;
1398	bool isNegativeImm = false;
1399	// FIXME: At -O0 we don't have anything that canonicalizes operand order.
1400	// Thus, Src1Value may be a ConstantInt, but we're missing it.
1401	if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(Val: Src2Value)) {
1402	if (SrcVT == MVT::i32 \|\| SrcVT == MVT::i16 \|\| SrcVT == MVT::i8 \|\|
1403	SrcVT == MVT::i1) {
1404	const APInt &CIVal = ConstInt->getValue();
1405	Imm = (isZExt) ? (int)CIVal.getZExtValue() : (int)CIVal.getSExtValue();
1406	// For INT_MIN/LONG_MIN (i.e., 0x80000000) we need to use a cmp, rather
1407	// then a cmn, because there is no way to represent 2147483648 as a
1408	// signed 32-bit int.
1409	if (Imm < `0` && Imm != (int)`0x80000000`) {
1410	isNegativeImm = true;
1411	Imm = -Imm;
1412	}
1413	UseImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Arg: Imm) != -`1`) :
1414	(ARM_AM::getSOImmVal(Arg: Imm) != -`1`);
1415	}
1416	} else if (const ConstantFP *ConstFP = dyn_cast<ConstantFP>(Val: Src2Value)) {
1417	if (SrcVT == MVT::f32 \|\| SrcVT == MVT::f64)
1418	if (ConstFP->isZero() && !ConstFP->isNegative())
1419	UseImm = true;
1420	}
1421
1422	unsigned CmpOpc;
1423	bool isICmp = true;
1424	bool needsExt = false;
1425	switch (SrcVT.SimpleTy) {
1426	default: return false;
1427	// TODO: Verify compares.
1428	case MVT::f32:
1429	isICmp = false;
1430	CmpOpc = UseImm ? ARM::VCMPZS : ARM::VCMPS;
1431	break;
1432	case MVT::f64:
1433	isICmp = false;
1434	CmpOpc = UseImm ? ARM::VCMPZD : ARM::VCMPD;
1435	break;
1436	case MVT::i1:
1437	case MVT::i8:
1438	case MVT::i16:
1439	needsExt = true;
1440	[[fallthrough]];
1441	case MVT::i32:
1442	if (isThumb2) {
1443	if (!UseImm)
1444	CmpOpc = ARM::t2CMPrr;
1445	else
1446	CmpOpc = isNegativeImm ? ARM::t2CMNri : ARM::t2CMPri;
1447	} else {
1448	if (!UseImm)
1449	CmpOpc = ARM::CMPrr;
1450	else
1451	CmpOpc = isNegativeImm ? ARM::CMNri : ARM::CMPri;
1452	}
1453	break;
1454	}
1455
1456	Register SrcReg1 = getRegForValue(V: Src1Value);
1457	if (!SrcReg1)
1458	return false;
1459
1460	Register SrcReg2;
1461	if (!UseImm) {
1462	SrcReg2 = getRegForValue(V: Src2Value);
1463	if (!SrcReg2)
1464	return false;
1465	}
1466
1467	// We have i1, i8, or i16, we need to either zero extend or sign extend.
1468	if (needsExt) {
1469	SrcReg1 = ARMEmitIntExt(SrcVT, SrcReg: SrcReg1, DestVT: MVT::i32, isZExt);
1470	if (!SrcReg1)
1471	return false;
1472	if (!UseImm) {
1473	SrcReg2 = ARMEmitIntExt(SrcVT, SrcReg: SrcReg2, DestVT: MVT::i32, isZExt);
1474	if (!SrcReg2)
1475	return false;
1476	}
1477	}
1478
1479	const MCInstrDesc &II = TII.get(Opcode: CmpOpc);
1480	SrcReg1 = constrainOperandRegClass(II, Op: SrcReg1, OpNum: `0`);
1481	if (!UseImm) {
1482	SrcReg2 = constrainOperandRegClass(II, Op: SrcReg2, OpNum: `1`);
1483	AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II)
1484	.addReg(RegNo: SrcReg1).addReg(RegNo: SrcReg2));
1485	} else {
1486	MachineInstrBuilder MIB;
1487	MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II)
1488	.addReg(RegNo: SrcReg1);
1489
1490	// Only add immediate for icmp as the immediate for fcmp is an implicit 0.0.
1491	if (isICmp)
1492	MIB.addImm(Val: Imm);
1493	AddOptionalDefs(MIB);
1494	}
1495
1496	// For floating point we need to move the result to a comparison register
1497	// that we can then use for branches.
1498	if (Ty->isFloatTy() \|\| Ty->isDoubleTy())
1499	AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1500	MCID: TII.get(Opcode: ARM::FMSTAT)));
1501	return true;
1502	}
1503
1504	bool ARMFastISel::SelectCmp(const Instruction *I) {
1505	const CmpInst *CI = cast<CmpInst>(Val: I);
1506
1507	// Get the compare predicate.
1508	ARMCC::CondCodes ARMPred = getComparePred(Pred: CI->getPredicate());
1509
1510	// We may not handle every CC for now.
1511	if (ARMPred == ARMCC::AL) return false;
1512
1513	// Emit the compare.
1514	if (!ARMEmitCmp(Src1Value: CI->getOperand(i_nocapture: `0`), Src2Value: CI->getOperand(i_nocapture: `1`), isZExt: CI->isUnsigned()))
1515	return false;
1516
1517	// Now set a register based on the comparison. Explicitly set the predicates
1518	// here.
1519	unsigned MovCCOpc = isThumb2 ? ARM::t2MOVCCi : ARM::MOVCCi;
1520	const TargetRegisterClass *RC = isThumb2 ? &ARM::rGPRRegClass
1521	: &ARM::GPRRegClass;
1522	Register DestReg = createResultReg(RC);
1523	Constant Zero = ConstantInt::get(Ty: Type::getInt32Ty(C&: Context), V: `0`);
1524	Register ZeroReg = fastMaterializeConstant(C: Zero);
1525	// ARMEmitCmp emits a FMSTAT when necessary, so it's always safe to use CPSR.
1526	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: MovCCOpc), DestReg)
1527	.addReg(RegNo: ZeroReg).addImm(Val: `1`)
1528	.addImm(Val: ARMPred).addReg(RegNo: ARM::CPSR);
1529
1530	updateValueMap(I, Reg: DestReg);
1531	return true;
1532	}
1533
1534	bool ARMFastISel::SelectFPExt(const Instruction *I) {
1535	// Make sure we have VFP and that we're extending float to double.
1536	if (!Subtarget->hasVFP2Base() \|\| !Subtarget->hasFP64()) return false;
1537
1538	Value *V = I->getOperand(i: `0`);
1539	if (!I->getType()->isDoubleTy() \|\|
1540	!V->getType()->isFloatTy()) return false;
1541
1542	Register Op = getRegForValue(V);
1543	if (!Op)
1544	return false;
1545
1546	Register Result = createResultReg(RC: &ARM::DPRRegClass);
1547	AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1548	MCID: TII.get(Opcode: ARM::VCVTDS), DestReg: Result)
1549	.addReg(RegNo: Op));
1550	updateValueMap(I, Reg: Result);
1551	return true;
1552	}
1553
1554	bool ARMFastISel::SelectFPTrunc(const Instruction *I) {
1555	// Make sure we have VFP and that we're truncating double to float.
1556	if (!Subtarget->hasVFP2Base() \|\| !Subtarget->hasFP64()) return false;
1557
1558	Value *V = I->getOperand(i: `0`);
1559	if (!(I->getType()->isFloatTy() &&
1560	V->getType()->isDoubleTy())) return false;
1561
1562	Register Op = getRegForValue(V);
1563	if (!Op)
1564	return false;
1565
1566	Register Result = createResultReg(RC: &ARM::SPRRegClass);
1567	AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1568	MCID: TII.get(Opcode: ARM::VCVTSD), DestReg: Result)
1569	.addReg(RegNo: Op));
1570	updateValueMap(I, Reg: Result);
1571	return true;
1572	}
1573
1574	bool ARMFastISel::SelectIToFP(const Instruction I, bool* isSigned) {
1575	// Make sure we have VFP.
1576	if (!Subtarget->hasVFP2Base()) return false;
1577
1578	MVT DstVT;
1579	Type *Ty = I->getType();
1580	if (!isTypeLegal(Ty, VT&: DstVT))
1581	return false;
1582
1583	Value *Src = I->getOperand(i: `0`);
1584	EVT SrcEVT = TLI.getValueType(DL, Ty: Src->getType(), AllowUnknown: true);
1585	if (!SrcEVT.isSimple())
1586	return false;
1587	MVT SrcVT = SrcEVT.getSimpleVT();
1588	if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8)
1589	return false;
1590
1591	Register SrcReg = getRegForValue(V: Src);
1592	if (!SrcReg)
1593	return false;
1594
1595	// Handle sign-extension.
1596	if (SrcVT == MVT::i16 \|\| SrcVT == MVT::i8) {
1597	SrcReg = ARMEmitIntExt(SrcVT, SrcReg, DestVT: MVT::i32,
1598	/isZExt/!isSigned);
1599	if (!SrcReg)
1600	return false;
1601	}
1602
1603	// The conversion routine works on fp-reg to fp-reg and the operand above
1604	// was an integer, move it to the fp registers if possible.
1605	Register FP = ARMMoveToFPReg(VT: MVT::f32, SrcReg);
1606	if (!FP)
1607	return false;
1608
1609	unsigned Opc;
1610	if (Ty->isFloatTy()) Opc = isSigned ? ARM::VSITOS : ARM::VUITOS;
1611	else if (Ty->isDoubleTy() && Subtarget->hasFP64())
1612	Opc = isSigned ? ARM::VSITOD : ARM::VUITOD;
1613	else return false;
1614
1615	Register ResultReg = createResultReg(RC: TLI.getRegClassFor(VT: DstVT));
1616	AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1617	MCID: TII.get(Opcode: Opc), DestReg: ResultReg).addReg(RegNo: FP));
1618	updateValueMap(I, Reg: ResultReg);
1619	return true;
1620	}
1621
1622	bool ARMFastISel::SelectFPToI(const Instruction I, bool* isSigned) {
1623	// Make sure we have VFP.
1624	if (!Subtarget->hasVFP2Base()) return false;
1625
1626	MVT DstVT;
1627	Type *RetTy = I->getType();
1628	if (!isTypeLegal(Ty: RetTy, VT&: DstVT))
1629	return false;
1630
1631	Register Op = getRegForValue(V: I->getOperand(i: `0`));
1632	if (!Op)
1633	return false;
1634
1635	unsigned Opc;
1636	Type *OpTy = I->getOperand(i: `0`)->getType();
1637	if (OpTy->isFloatTy()) Opc = isSigned ? ARM::VTOSIZS : ARM::VTOUIZS;
1638	else if (OpTy->isDoubleTy() && Subtarget->hasFP64())
1639	Opc = isSigned ? ARM::VTOSIZD : ARM::VTOUIZD;
1640	else return false;
1641
1642	// f64->s32/u32 or f32->s32/u32 both need an intermediate f32 reg.
1643	Register ResultReg = createResultReg(RC: TLI.getRegClassFor(VT: MVT::f32));
1644	AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1645	MCID: TII.get(Opcode: Opc), DestReg: ResultReg).addReg(RegNo: Op));
1646
1647	// This result needs to be in an integer register, but the conversion only
1648	// takes place in fp-regs.
1649	Register IntReg = ARMMoveToIntReg(VT: DstVT, SrcReg: ResultReg);
1650	if (!IntReg)
1651	return false;
1652
1653	updateValueMap(I, Reg: IntReg);
1654	return true;
1655	}
1656
1657	bool ARMFastISel::SelectSelect(const Instruction *I) {
1658	MVT VT;
1659	if (!isTypeLegal(Ty: I->getType(), VT))
1660	return false;
1661
1662	// Things need to be register sized for register moves.
1663	if (VT != MVT::i32) return false;
1664
1665	Register CondReg = getRegForValue(V: I->getOperand(i: `0`));
1666	if (!CondReg)
1667	return false;
1668	Register Op1Reg = getRegForValue(V: I->getOperand(i: `1`));
1669	if (!Op1Reg)
1670	return false;
1671
1672	// Check to see if we can use an immediate in the conditional move.
1673	int Imm = `0`;
1674	bool UseImm = false;
1675	bool isNegativeImm = false;
1676	if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(Val: I->getOperand(i: `2`))) {
1677	assert(VT == MVT::i32 && "Expecting an i32.");
1678	Imm = (int)ConstInt->getValue().getZExtValue();
1679	if (Imm < `0`) {
1680	isNegativeImm = true;
1681	Imm = ~Imm;
1682	}
1683	UseImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Arg: Imm) != -`1`) :
1684	(ARM_AM::getSOImmVal(Arg: Imm) != -`1`);
1685	}
1686
1687	Register Op2Reg;
1688	if (!UseImm) {
1689	Op2Reg = getRegForValue(V: I->getOperand(i: `2`));
1690	if (!Op2Reg)
1691	return false;
1692	}
1693
1694	unsigned TstOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri;
1695	CondReg = constrainOperandRegClass(II: TII.get(Opcode: TstOpc), Op: CondReg, OpNum: `0`);
1696	AddOptionalDefs(
1697	MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: TstOpc))
1698	.addReg(RegNo: CondReg)
1699	.addImm(Val: `1`));
1700
1701	unsigned MovCCOpc;
1702	const TargetRegisterClass *RC;
1703	if (!UseImm) {
1704	RC = isThumb2 ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
1705	MovCCOpc = isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr;
1706	} else {
1707	RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass;
1708	if (!isNegativeImm)
1709	MovCCOpc = isThumb2 ? ARM::t2MOVCCi : ARM::MOVCCi;
1710	else
1711	MovCCOpc = isThumb2 ? ARM::t2MVNCCi : ARM::MVNCCi;
1712	}
1713	Register ResultReg = createResultReg(RC);
1714	if (!UseImm) {
1715	Op2Reg = constrainOperandRegClass(II: TII.get(Opcode: MovCCOpc), Op: Op2Reg, OpNum: `1`);
1716	Op1Reg = constrainOperandRegClass(II: TII.get(Opcode: MovCCOpc), Op: Op1Reg, OpNum: `2`);
1717	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: MovCCOpc),
1718	DestReg: ResultReg)
1719	.addReg(RegNo: Op2Reg)
1720	.addReg(RegNo: Op1Reg)
1721	.addImm(Val: ARMCC::NE)
1722	.addReg(RegNo: ARM::CPSR);
1723	} else {
1724	Op1Reg = constrainOperandRegClass(II: TII.get(Opcode: MovCCOpc), Op: Op1Reg, OpNum: `1`);
1725	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: MovCCOpc),
1726	DestReg: ResultReg)
1727	.addReg(RegNo: Op1Reg)
1728	.addImm(Val: Imm)
1729	.addImm(Val: ARMCC::EQ)
1730	.addReg(RegNo: ARM::CPSR);
1731	}
1732	updateValueMap(I, Reg: ResultReg);
1733	return true;
1734	}
1735
1736	bool ARMFastISel::SelectDiv(const Instruction I, bool* isSigned) {
1737	MVT VT;
1738	Type *Ty = I->getType();
1739	if (!isTypeLegal(Ty, VT))
1740	return false;
1741
1742	// If we have integer div support we should have selected this automagically.
1743	// In case we have a real miss go ahead and return false and we'll pick
1744	// it up later.
1745	if (Subtarget->hasDivideInThumbMode())
1746	return false;
1747
1748	// Otherwise emit a libcall.
1749	RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
1750	if (VT == MVT::i8)
1751	LC = isSigned ? RTLIB::SDIV_I8 : RTLIB::UDIV_I8;
1752	else if (VT == MVT::i16)
1753	LC = isSigned ? RTLIB::SDIV_I16 : RTLIB::UDIV_I16;
1754	else if (VT == MVT::i32)
1755	LC = isSigned ? RTLIB::SDIV_I32 : RTLIB::UDIV_I32;
1756	else if (VT == MVT::i64)
1757	LC = isSigned ? RTLIB::SDIV_I64 : RTLIB::UDIV_I64;
1758	else if (VT == MVT::i128)
1759	LC = isSigned ? RTLIB::SDIV_I128 : RTLIB::UDIV_I128;
1760	assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!");
1761
1762	return ARMEmitLibcall(I, Call: LC);
1763	}
1764
1765	bool ARMFastISel::SelectRem(const Instruction I, bool* isSigned) {
1766	MVT VT;
1767	Type *Ty = I->getType();
1768	if (!isTypeLegal(Ty, VT))
1769	return false;
1770
1771	// Many ABIs do not provide a libcall for standalone remainder, so we need to
1772	// use divrem (see the RTABI 4.3.1). Since FastISel can't handle non-double
1773	// multi-reg returns, we'll have to bail out.
1774	if (!TLI.hasStandaloneRem(VT)) {
1775	return false;
1776	}
1777
1778	RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
1779	if (VT == MVT::i8)
1780	LC = isSigned ? RTLIB::SREM_I8 : RTLIB::UREM_I8;
1781	else if (VT == MVT::i16)
1782	LC = isSigned ? RTLIB::SREM_I16 : RTLIB::UREM_I16;
1783	else if (VT == MVT::i32)
1784	LC = isSigned ? RTLIB::SREM_I32 : RTLIB::UREM_I32;
1785	else if (VT == MVT::i64)
1786	LC = isSigned ? RTLIB::SREM_I64 : RTLIB::UREM_I64;
1787	else if (VT == MVT::i128)
1788	LC = isSigned ? RTLIB::SREM_I128 : RTLIB::UREM_I128;
1789	assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!");
1790
1791	return ARMEmitLibcall(I, Call: LC);
1792	}
1793
1794	bool ARMFastISel::SelectBinaryIntOp(const Instruction I, unsigned* ISDOpcode) {
1795	EVT DestVT = TLI.getValueType(DL, Ty: I->getType(), AllowUnknown: true);
1796
1797	// We can get here in the case when we have a binary operation on a non-legal
1798	// type and the target independent selector doesn't know how to handle it.
1799	if (DestVT != MVT::i16 && DestVT != MVT::i8 && DestVT != MVT::i1)
1800	return false;
1801
1802	unsigned Opc;
1803	switch (ISDOpcode) {
1804	default: return false;
1805	case ISD::ADD:
1806	Opc = isThumb2 ? ARM::t2ADDrr : ARM::ADDrr;
1807	break;
1808	case ISD::OR:
1809	Opc = isThumb2 ? ARM::t2ORRrr : ARM::ORRrr;
1810	break;
1811	case ISD::SUB:
1812	Opc = isThumb2 ? ARM::t2SUBrr : ARM::SUBrr;
1813	break;
1814	}
1815
1816	Register SrcReg1 = getRegForValue(V: I->getOperand(i: `0`));
1817	if (!SrcReg1)
1818	return false;
1819
1820	// TODO: Often the 2nd operand is an immediate, which can be encoded directly
1821	// in the instruction, rather then materializing the value in a register.
1822	Register SrcReg2 = getRegForValue(V: I->getOperand(i: `1`));
1823	if (!SrcReg2)
1824	return false;
1825
1826	Register ResultReg = createResultReg(RC: &ARM::GPRnopcRegClass);
1827	SrcReg1 = constrainOperandRegClass(II: TII.get(Opcode: Opc), Op: SrcReg1, OpNum: `1`);
1828	SrcReg2 = constrainOperandRegClass(II: TII.get(Opcode: Opc), Op: SrcReg2, OpNum: `2`);
1829	AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1830	MCID: TII.get(Opcode: Opc), DestReg: ResultReg)
1831	.addReg(RegNo: SrcReg1).addReg(RegNo: SrcReg2));
1832	updateValueMap(I, Reg: ResultReg);
1833	return true;
1834	}
1835
1836	bool ARMFastISel::SelectBinaryFPOp(const Instruction I, unsigned* ISDOpcode) {
1837	EVT FPVT = TLI.getValueType(DL, Ty: I->getType(), AllowUnknown: true);
1838	if (!FPVT.isSimple()) return false;
1839	MVT VT = FPVT.getSimpleVT();
1840
1841	// FIXME: Support vector types where possible.
1842	if (VT.isVector())
1843	return false;
1844
1845	// We can get here in the case when we want to use NEON for our fp
1846	// operations, but can't figure out how to. Just use the vfp instructions
1847	// if we have them.
1848	// FIXME: It'd be nice to use NEON instructions.
1849	Type *Ty = I->getType();
1850	if (Ty->isFloatTy() && !Subtarget->hasVFP2Base())
1851	return false;
1852	if (Ty->isDoubleTy() && (!Subtarget->hasVFP2Base() \|\| !Subtarget->hasFP64()))
1853	return false;
1854
1855	unsigned Opc;
1856	bool is64bit = VT == MVT::f64 \|\| VT == MVT::i64;
1857	switch (ISDOpcode) {
1858	default: return false;
1859	case ISD::FADD:
1860	Opc = is64bit ? ARM::VADDD : ARM::VADDS;
1861	break;
1862	case ISD::FSUB:
1863	Opc = is64bit ? ARM::VSUBD : ARM::VSUBS;
1864	break;
1865	case ISD::FMUL:
1866	Opc = is64bit ? ARM::VMULD : ARM::VMULS;
1867	break;
1868	}
1869	Register Op1 = getRegForValue(V: I->getOperand(i: `0`));
1870	if (!Op1)
1871	return false;
1872
1873	Register Op2 = getRegForValue(V: I->getOperand(i: `1`));
1874	if (!Op2)
1875	return false;
1876
1877	Register ResultReg = createResultReg(RC: TLI.getRegClassFor(VT: VT.SimpleTy));
1878	AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1879	MCID: TII.get(Opcode: Opc), DestReg: ResultReg)
1880	.addReg(RegNo: Op1).addReg(RegNo: Op2));
1881	updateValueMap(I, Reg: ResultReg);
1882	return true;
1883	}
1884
1885	// Call Handling Code
1886
1887	// This is largely taken directly from CCAssignFnForNode
1888	// TODO: We may not support all of this.
1889	CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC,
1890	bool Return,
1891	bool isVarArg) {
1892	switch (CC) {
1893	default:
1894	report_fatal_error(reason: "Unsupported calling convention");
1895	case CallingConv::Fast:
1896	if (Subtarget->hasVFP2Base() && !isVarArg) {
1897	if (!TM.isAAPCS_ABI())
1898	return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
1899	// For AAPCS ABI targets, just use VFP variant of the calling convention.
1900	return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
1901	}
1902	[[fallthrough]];
1903	case CallingConv::C:
1904	case CallingConv::CXX_FAST_TLS:
1905	// Use target triple & subtarget features to do actual dispatch.
1906	if (TM.isAAPCS_ABI()) {
1907	if (Subtarget->hasFPRegs() &&
1908	TM.Options.FloatABIType == FloatABI::Hard && !isVarArg)
1909	return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
1910	else
1911	return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
1912	} else {
1913	return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
1914	}
1915	case CallingConv::ARM_AAPCS_VFP:
1916	case CallingConv::Swift:
1917	case CallingConv::SwiftTail:
1918	if (!isVarArg)
1919	return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
1920	// Fall through to soft float variant, variadic functions don't
1921	// use hard floating point ABI.
1922	[[fallthrough]];
1923	case CallingConv::ARM_AAPCS:
1924	return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
1925	case CallingConv::ARM_APCS:
1926	return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
1927	case CallingConv::GHC:
1928	if (Return)
1929	report_fatal_error(reason: "Can't return in GHC call convention");
1930	else
1931	return CC_ARM_APCS_GHC;
1932	case CallingConv::CFGuard_Check:
1933	return (Return ? RetCC_ARM_AAPCS : CC_ARM_Win32_CFGuard_Check);
1934	}
1935	}
1936
1937	bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
1938	SmallVectorImpl<Register> &ArgRegs,
1939	SmallVectorImpl<MVT> &ArgVTs,
1940	SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
1941	SmallVectorImpl<Register> &RegArgs,
1942	CallingConv::ID CC,
1943	unsigned &NumBytes,
1944	bool isVarArg) {
1945	SmallVector<CCValAssign, `16`> ArgLocs;
1946	CCState CCInfo(CC, isVarArg, FuncInfo.MF, ArgLocs, Context);
1947	CCInfo.AnalyzeCallOperands(ArgVTs, Flags&: ArgFlags,
1948	Fn: CCAssignFnForCall(CC, Return: false, isVarArg));
1949
1950	// Check that we can handle all of the arguments. If we can't, then bail out
1951	// now before we add code to the MBB.
1952	for (unsigned i = `0`, e = ArgLocs.size(); i != e; ++i) {
1953	CCValAssign &VA = ArgLocs [i];
1954	MVT ArgVT = ArgVTs [VA.getValNo()];
1955
1956	// We don't handle NEON/vector parameters yet.
1957	if (ArgVT.isVector() \|\| ArgVT.getSizeInBits() > `64`)
1958	return false;
1959
1960	// Now copy/store arg to correct locations.
1961	if (VA.isRegLoc() && !VA.needsCustom()) {
1962	continue;
1963	} else if (VA.needsCustom()) {
1964	// TODO: We need custom lowering for vector (v2f64) args.
1965	if (VA.getLocVT() != MVT::f64 \|\|
1966	// TODO: Only handle register args for now.
1967	!VA.isRegLoc() \|\| !ArgLocs [++i].isRegLoc())
1968	return false;
1969	} else {
1970	switch (ArgVT.SimpleTy) {
1971	default:
1972	return false;
1973	case MVT::i1:
1974	case MVT::i8:
1975	case MVT::i16:
1976	case MVT::i32:
1977	break;
1978	case MVT::f32:
1979	if (!Subtarget->hasVFP2Base())
1980	return false;
1981	break;
1982	case MVT::f64:
1983	if (!Subtarget->hasVFP2Base())
1984	return false;
1985	break;
1986	}
1987	}
1988	}
1989
1990	// At the point, we are able to handle the call's arguments in fast isel.
1991
1992	// Get a count of how many bytes are to be pushed on the stack.
1993	NumBytes = CCInfo.getStackSize();
1994
1995	// Issue CALLSEQ_START
1996	unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
1997	AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1998	MCID: TII.get(Opcode: AdjStackDown))
1999	.addImm(Val: NumBytes).addImm(Val: `0`));
2000
2001	// Process the args.
2002	for (unsigned i = `0`, e = ArgLocs.size(); i != e; ++i) {
2003	CCValAssign &VA = ArgLocs [i];
2004	const Value *ArgVal = Args [VA.getValNo()];
2005	Register Arg = ArgRegs [VA.getValNo()];
2006	MVT ArgVT = ArgVTs [VA.getValNo()];
2007
2008	assert((!ArgVT.isVector() && ArgVT.getSizeInBits() <= `64`) &&
2009	"We don't handle NEON/vector parameters yet.");
2010
2011	// Handle arg promotion, etc.
2012	switch (VA.getLocInfo()) {
2013	case CCValAssign::Full: break;
2014	case CCValAssign::SExt: {
2015	MVT DestVT = VA.getLocVT();
2016	Arg = ARMEmitIntExt(SrcVT: ArgVT, SrcReg: Arg, DestVT, /isZExt/false);
2017	assert(Arg && "Failed to emit a sext");
2018	ArgVT = DestVT;
2019	break;
2020	}
2021	case CCValAssign::AExt:
2022	// Intentional fall-through. Handle AExt and ZExt.
2023	case CCValAssign::ZExt: {
2024	MVT DestVT = VA.getLocVT();
2025	Arg = ARMEmitIntExt(SrcVT: ArgVT, SrcReg: Arg, DestVT, /isZExt/true);
2026	assert(Arg && "Failed to emit a zext");
2027	ArgVT = DestVT;
2028	break;
2029	}
2030	case CCValAssign::BCvt: {
2031	Register BC = fastEmit_r(VT: ArgVT, RetVT: VA.getLocVT(), Opcode: ISD::BITCAST, Op0: Arg);
2032	assert(BC && "Failed to emit a bitcast!");
2033	Arg = BC;
2034	ArgVT = VA.getLocVT();
2035	break;
2036	}
2037	default: llvm_unreachable("Unknown arg promotion!");
2038	}
2039
2040	// Now copy/store arg to correct locations.
2041	if (VA.isRegLoc() && !VA.needsCustom()) {
2042	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
2043	MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: VA.getLocReg()).addReg(RegNo: Arg);
2044	RegArgs.push_back(Elt: VA.getLocReg());
2045	} else if (VA.needsCustom()) {
2046	// TODO: We need custom lowering for vector (v2f64) args.
2047	assert(VA.getLocVT() == MVT::f64 &&
2048	"Custom lowering for v2f64 args not available");
2049
2050	// FIXME: ArgLocs[++i] may extend beyond ArgLocs.size()
2051	CCValAssign &NextVA = ArgLocs [++i];
2052
2053	assert(VA.isRegLoc() && NextVA.isRegLoc() &&
2054	"We only handle register args!");
2055
2056	AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
2057	MCID: TII.get(Opcode: ARM::VMOVRRD), DestReg: VA.getLocReg())
2058	.addReg(RegNo: NextVA.getLocReg(), flags: RegState::Define)
2059	.addReg(RegNo: Arg));
2060	RegArgs.push_back(Elt: VA.getLocReg());
2061	RegArgs.push_back(Elt: NextVA.getLocReg());
2062	} else {
2063	assert(VA.isMemLoc());
2064	// Need to store on the stack.
2065
2066	// Don't emit stores for undef values.
2067	if (isa<UndefValue>(Val: ArgVal))
2068	continue;
2069
2070	Address Addr;
2071	Addr.setKind(Address::RegBase);
2072	Addr.setReg(ARM::SP);
2073	Addr.setOffset(VA.getLocMemOffset());
2074
2075	bool EmitRet = ARMEmitStore(VT: ArgVT, SrcReg: Arg, Addr); (void)EmitRet;
2076	assert(EmitRet && "Could not emit a store for argument!");
2077	}
2078	}
2079
2080	return true;
2081	}
2082
2083	bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<Register> &UsedRegs,
2084	const Instruction *I, CallingConv::ID CC,
2085	unsigned &NumBytes, bool isVarArg) {
2086	// Issue CALLSEQ_END
2087	unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
2088	AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
2089	MCID: TII.get(Opcode: AdjStackUp))
2090	.addImm(Val: NumBytes).addImm(Val: -`1ULL`));
2091
2092	// Now the return value.
2093	if (RetVT != MVT::isVoid) {
2094	SmallVector<CCValAssign, `16`> RVLocs;
2095	CCState CCInfo(CC, isVarArg, FuncInfo.MF, RVLocs, Context);
2096	CCInfo.AnalyzeCallResult(VT: RetVT, Fn: CCAssignFnForCall(CC, Return: true, isVarArg));
2097
2098	// Copy all of the result registers out of their specified physreg.
2099	if (RVLocs.size() == `2` && RetVT == MVT::f64) {
2100	// For this move we copy into two registers and then move into the
2101	// double fp reg we want.
2102	MVT DestVT = RVLocs [`0`].getValVT();
2103	const TargetRegisterClass* DstRC = TLI.getRegClassFor(VT: DestVT);
2104	Register ResultReg = createResultReg(RC: DstRC);
2105	AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
2106	MCID: TII.get(Opcode: ARM::VMOVDRR), DestReg: ResultReg)
2107	.addReg(RegNo: RVLocs [`0`].getLocReg())
2108	.addReg(RegNo: RVLocs [`1`].getLocReg()));
2109
2110	UsedRegs.push_back(Elt: RVLocs [`0`].getLocReg());
2111	UsedRegs.push_back(Elt: RVLocs [`1`].getLocReg());
2112
2113	// Finally update the result.
2114	updateValueMap(I, Reg: ResultReg);
2115	} else {
2116	assert(RVLocs.size() == `1` &&"Can't handle non-double multi-reg retvals!");
2117	MVT CopyVT = RVLocs [`0`].getValVT();
2118
2119	// Special handling for extended integers.
2120	if (RetVT == MVT::i1 \|\| RetVT == MVT::i8 \|\| RetVT == MVT::i16)
2121	CopyVT = MVT::i32;
2122
2123	const TargetRegisterClass* DstRC = TLI.getRegClassFor(VT: CopyVT);
2124
2125	Register ResultReg = createResultReg(RC: DstRC);
2126	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
2127	MCID: TII.get(Opcode: TargetOpcode::COPY),
2128	DestReg: ResultReg).addReg(RegNo: RVLocs [`0`].getLocReg());
2129	UsedRegs.push_back(Elt: RVLocs [`0`].getLocReg());
2130
2131	// Finally update the result.
2132	updateValueMap(I, Reg: ResultReg);
2133	}
2134	}
2135
2136	return true;
2137	}
2138
2139	bool ARMFastISel::SelectRet(const Instruction *I) {
2140	const ReturnInst *Ret = cast<ReturnInst>(Val: I);
2141	const Function &F = *I->getParent()->getParent();
2142	const bool IsCmseNSEntry = F.hasFnAttribute(Kind: "cmse_nonsecure_entry");
2143
2144	if (!FuncInfo.CanLowerReturn)
2145	return false;
2146
2147	if (TLI.supportSwiftError() &&
2148	F.getAttributes().hasAttrSomewhere(Kind: Attribute::SwiftError))
2149	return false;
2150
2151	if (TLI.supportSplitCSR(MF: FuncInfo.MF))
2152	return false;
2153
2154	// Build a list of return value registers.
2155	SmallVector<Register, `4`> RetRegs;
2156
2157	CallingConv::ID CC = F.getCallingConv();
2158	if (Ret->getNumOperands() > `0`) {
2159	SmallVector<ISD::OutputArg, `4`> Outs;
2160	GetReturnInfo(CC, ReturnType: F.getReturnType(), attr: F.getAttributes(), Outs, TLI, DL);
2161
2162	// Analyze operands of the call, assigning locations to each operand.
2163	SmallVector<CCValAssign, `16`> ValLocs;
2164	CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
2165	CCInfo.AnalyzeReturn(Outs, Fn: CCAssignFnForCall(CC, Return: true / is Ret /,
2166	isVarArg: F.isVarArg()));
2167
2168	const Value *RV = Ret->getOperand(i_nocapture: `0`);
2169	Register Reg = getRegForValue(V: RV);
2170	if (!Reg)
2171	return false;
2172
2173	// Only handle a single return value for now.
2174	if (ValLocs.size() != `1`)
2175	return false;
2176
2177	CCValAssign &VA = ValLocs [`0`];
2178
2179	// Don't bother handling odd stuff for now.
2180	if (VA.getLocInfo() != CCValAssign::Full)
2181	return false;
2182	// Only handle register returns for now.
2183	if (!VA.isRegLoc())
2184	return false;
2185
2186	Register SrcReg = Reg + VA.getValNo();
2187	EVT RVEVT = TLI.getValueType(DL, Ty: RV->getType());
2188	if (!RVEVT.isSimple()) return false;
2189	MVT RVVT = RVEVT.getSimpleVT();
2190	MVT DestVT = VA.getValVT();
2191	// Special handling for extended integers.
2192	if (RVVT != DestVT) {
2193	if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
2194	return false;
2195
2196	assert(DestVT == MVT::i32 && "ARM should always ext to i32");
2197
2198	// Perform extension if flagged as either zext or sext. Otherwise, do
2199	// nothing.
2200	if (Outs [`0`].Flags.isZExt() \|\| Outs [`0`].Flags.isSExt()) {
2201	SrcReg = ARMEmitIntExt(SrcVT: RVVT, SrcReg, DestVT, isZExt: Outs [`0`].Flags.isZExt());
2202	if (!SrcReg)
2203	return false;
2204	}
2205	}
2206
2207	// Make the copy.
2208	Register DstReg = VA.getLocReg();
2209	const TargetRegisterClass* SrcRC = MRI.getRegClass(Reg: SrcReg);
2210	// Avoid a cross-class copy. This is very unlikely.
2211	if (!SrcRC->contains(Reg: DstReg))
2212	return false;
2213	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
2214	MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: DstReg).addReg(RegNo: SrcReg);
2215
2216	// Add register to return instruction.
2217	RetRegs.push_back(Elt: VA.getLocReg());
2218	}
2219
2220	unsigned RetOpc;
2221	if (IsCmseNSEntry)
2222	if (isThumb2)
2223	RetOpc = ARM::tBXNS_RET;
2224	else
2225	llvm_unreachable("CMSE not valid for non-Thumb targets");
2226	else
2227	RetOpc = Subtarget->getReturnOpcode();
2228
2229	MachineInstrBuilder MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
2230	MCID: TII.get(Opcode: RetOpc));
2231	AddOptionalDefs(MIB);
2232	for (Register R : RetRegs)
2233	MIB.addReg(RegNo: R, flags: RegState::Implicit);
2234	return true;
2235	}
2236
2237	unsigned ARMFastISel::ARMSelectCallOp(bool UseReg) {
2238	if (UseReg)
2239	return isThumb2 ? gettBLXrOpcode(MF: MF) : getBLXOpcode(MF: MF);
2240	else
2241	return isThumb2 ? ARM::tBL : ARM::BL;
2242	}
2243
2244	Register ARMFastISel::getLibcallReg(const Twine &Name) {
2245	// Manually compute the global's type to avoid building it when unnecessary.
2246	Type GVTy = PointerType::get(C&: Context, /AS=/AddressSpace: `0`);
2247	EVT LCREVT = TLI.getValueType(DL, Ty: GVTy);
2248	if (!LCREVT.isSimple())
2249	return Register ();
2250
2251	GlobalValue *GV = M.getNamedGlobal(Name: Name.str());
2252	if (!GV)
2253	GV = new GlobalVariable (M, Type::getInt32Ty(C&: Context), false*,
2254	GlobalValue::ExternalLinkage, nullptr, Name);
2255
2256	return ARMMaterializeGV(GV, VT: LCREVT.getSimpleVT());
2257	}
2258
2259	// A quick function that will emit a call for a named libcall in F with the
2260	// vector of passed arguments for the Instruction in I. We can assume that we
2261	// can emit a call for any libcall we can produce. This is an abridged version
2262	// of the full call infrastructure since we won't need to worry about things
2263	// like computed function pointers or strange arguments at call sites.
2264	// TODO: Try to unify this and the normal call bits for ARM, then try to unify
2265	// with X86.
2266	bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
2267	CallingConv::ID CC = TLI.getLibcallCallingConv(Call);
2268
2269	// Handle simple* calls for now.*
2270	Type *RetTy = I->getType();
2271	MVT RetVT;
2272	if (RetTy->isVoidTy())
2273	RetVT = MVT::isVoid;
2274	else if (!isTypeLegal(Ty: RetTy, VT&: RetVT))
2275	return false;
2276
2277	// Can't handle non-double multi-reg retvals.
2278	if (RetVT != MVT::isVoid && RetVT != MVT::i32) {
2279	SmallVector<CCValAssign, `16`> RVLocs;
2280	CCState CCInfo(CC, false, FuncInfo.MF, RVLocs, Context);
2281	CCInfo.AnalyzeCallResult(VT: RetVT, Fn: CCAssignFnForCall(CC, Return: true, isVarArg: false));
2282	if (RVLocs.size() >= `2` && RetVT != MVT::f64)
2283	return false;
2284	}
2285
2286	// Set up the argument vectors.
2287	SmallVector<Value*, `8`> Args;
2288	SmallVector<Register, `8`> ArgRegs;
2289	SmallVector<MVT, `8`> ArgVTs;
2290	SmallVector<ISD::ArgFlagsTy, `8`> ArgFlags;
2291	Args.reserve(N: I->getNumOperands());
2292	ArgRegs.reserve(N: I->getNumOperands());
2293	ArgVTs.reserve(N: I->getNumOperands());
2294	ArgFlags.reserve(N: I->getNumOperands());
2295	for (Value *Op : I->operands()) {
2296	Register Arg = getRegForValue(V: Op);
2297	if (!Arg)
2298	return false;
2299
2300	Type *ArgTy = Op->getType();
2301	MVT ArgVT;
2302	if (!isTypeLegal(Ty: ArgTy, VT&: ArgVT)) return false;
2303
2304	ISD::ArgFlagsTy Flags;
2305	Flags.setOrigAlign(DL.getABITypeAlign(Ty: ArgTy));
2306
2307	Args.push_back(Elt: Op);
2308	ArgRegs.push_back(Elt: Arg);
2309	ArgVTs.push_back(Elt: ArgVT);
2310	ArgFlags.push_back(Elt: Flags);
2311	}
2312
2313	// Handle the arguments now that we've gotten them.
2314	SmallVector<Register, `4`> RegArgs;
2315	unsigned NumBytes;
2316	if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags,
2317	RegArgs, CC, NumBytes, isVarArg: false))
2318	return false;
2319
2320	Register CalleeReg;
2321	if (Subtarget->genLongCalls()) {
2322	CalleeReg = getLibcallReg(Name: TLI.getLibcallName(Call));
2323	if (!CalleeReg)
2324	return false;
2325	}
2326
2327	// Issue the call.
2328	unsigned CallOpc = ARMSelectCallOp(UseReg: Subtarget->genLongCalls());
2329	MachineInstrBuilder MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt,
2330	MIMD, MCID: TII.get(Opcode: CallOpc));
2331	// BL / BLX don't take a predicate, but tBL / tBLX do.
2332	if (isThumb2)
2333	MIB.add(MOs: predOps(Pred: ARMCC::AL));
2334	if (Subtarget->genLongCalls()) {
2335	CalleeReg =
2336	constrainOperandRegClass(II: TII.get(Opcode: CallOpc), Op: CalleeReg, OpNum: isThumb2 ? `2` : `0`);
2337	MIB.addReg(RegNo: CalleeReg);
2338	} else
2339	MIB.addExternalSymbol(FnName: TLI.getLibcallName(Call));
2340
2341	// Add implicit physical register uses to the call.
2342	for (Register R : RegArgs)
2343	MIB.addReg(RegNo: R, flags: RegState::Implicit);
2344
2345	// Add a register mask with the call-preserved registers.
2346	// Proper defs for return values will be added by setPhysRegsDeadExcept().
2347	MIB.addRegMask(Mask: TRI.getCallPreservedMask(MF: *FuncInfo.MF, CC));
2348
2349	// Finish off the call including any return values.
2350	SmallVector<Register, `4`> UsedRegs;
2351	if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes, isVarArg: false)) return false;
2352
2353	// Set all unused physreg defs as dead.
2354	static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
2355
2356	return true;
2357	}
2358
2359	bool ARMFastISel::SelectCall(const Instruction *I,
2360	const char IntrMemName = nullptr*) {
2361	const CallInst *CI = cast<CallInst>(Val: I);
2362	const Value *Callee = CI->getCalledOperand();
2363
2364	// Can't handle inline asm.
2365	if (isa<InlineAsm>(Val: Callee)) return false;
2366
2367	// Allow SelectionDAG isel to handle tail calls.
2368	if (CI->isTailCall()) return false;
2369
2370	// Check the calling convention.
2371	CallingConv::ID CC = CI->getCallingConv();
2372
2373	// TODO: Avoid some calling conventions?
2374
2375	FunctionType *FTy = CI->getFunctionType();
2376	bool isVarArg = FTy->isVarArg();
2377
2378	// Handle simple* calls for now.*
2379	Type *RetTy = I->getType();
2380	MVT RetVT;
2381	if (RetTy->isVoidTy())
2382	RetVT = MVT::isVoid;
2383	else if (!isTypeLegal(Ty: RetTy, VT&: RetVT) && RetVT != MVT::i16 &&
2384	RetVT != MVT::i8 && RetVT != MVT::i1)
2385	return false;
2386
2387	// Can't handle non-double multi-reg retvals.
2388	if (RetVT != MVT::isVoid && RetVT != MVT::i1 && RetVT != MVT::i8 &&
2389	RetVT != MVT::i16 && RetVT != MVT::i32) {
2390	SmallVector<CCValAssign, `16`> RVLocs;
2391	CCState CCInfo(CC, isVarArg, FuncInfo.MF, RVLocs, Context);
2392	CCInfo.AnalyzeCallResult(VT: RetVT, Fn: CCAssignFnForCall(CC, Return: true, isVarArg));
2393	if (RVLocs.size() >= `2` && RetVT != MVT::f64)
2394	return false;
2395	}
2396
2397	// Set up the argument vectors.
2398	SmallVector<Value*, `8`> Args;
2399	SmallVector<Register, `8`> ArgRegs;
2400	SmallVector<MVT, `8`> ArgVTs;
2401	SmallVector<ISD::ArgFlagsTy, `8`> ArgFlags;
2402	unsigned arg_size = CI->arg_size();
2403	Args.reserve(N: arg_size);
2404	ArgRegs.reserve(N: arg_size);
2405	ArgVTs.reserve(N: arg_size);
2406	ArgFlags.reserve(N: arg_size);
2407	for (auto ArgI = CI->arg_begin(), ArgE = CI->arg_end(); ArgI != ArgE; ++ArgI) {
2408	// If we're lowering a memory intrinsic instead of a regular call, skip the
2409	// last argument, which shouldn't be passed to the underlying function.
2410	if (IntrMemName && ArgE - ArgI <= `1`)
2411	break;
2412
2413	ISD::ArgFlagsTy Flags;
2414	unsigned ArgIdx = ArgI - CI->arg_begin();
2415	if (CI->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::SExt))
2416	Flags.setSExt();
2417	if (CI->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::ZExt))
2418	Flags.setZExt();
2419
2420	// FIXME: Only handle easy* calls for now.*
2421	if (CI->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::InReg) \|\|
2422	CI->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::StructRet) \|\|
2423	CI->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::SwiftSelf) \|\|
2424	CI->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::SwiftError) \|\|
2425	CI->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::Nest) \|\|
2426	CI->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::ByVal))
2427	return false;
2428
2429	Type ArgTy = (ArgI)->getType();
2430	MVT ArgVT;
2431	if (!isTypeLegal(Ty: ArgTy, VT&: ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8 &&
2432	ArgVT != MVT::i1)
2433	return false;
2434
2435	Register Arg = getRegForValue(V: *ArgI);
2436	if (!Arg.isValid())
2437	return false;
2438
2439	Flags.setOrigAlign(DL.getABITypeAlign(Ty: ArgTy));
2440
2441	Args.push_back(Elt: *ArgI);
2442	ArgRegs.push_back(Elt: Arg);
2443	ArgVTs.push_back(Elt: ArgVT);
2444	ArgFlags.push_back(Elt: Flags);
2445	}
2446
2447	// Handle the arguments now that we've gotten them.
2448	SmallVector<Register, `4`> RegArgs;
2449	unsigned NumBytes;
2450	if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags,
2451	RegArgs, CC, NumBytes, isVarArg))
2452	return false;
2453
2454	bool UseReg = false;
2455	const GlobalValue *GV = dyn_cast<GlobalValue>(Val: Callee);
2456	if (!GV \|\| Subtarget->genLongCalls()) UseReg = true;
2457
2458	Register CalleeReg;
2459	if (UseReg) {
2460	if (IntrMemName)
2461	CalleeReg = getLibcallReg(Name: IntrMemName);
2462	else
2463	CalleeReg = getRegForValue(V: Callee);
2464
2465	if (!CalleeReg)
2466	return false;
2467	}
2468
2469	// Issue the call.
2470	unsigned CallOpc = ARMSelectCallOp(UseReg);
2471	MachineInstrBuilder MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt,
2472	MIMD, MCID: TII.get(Opcode: CallOpc));
2473
2474	// ARM calls don't take a predicate, but tBL / tBLX do.
2475	if(isThumb2)
2476	MIB.add(MOs: predOps(Pred: ARMCC::AL));
2477	if (UseReg) {
2478	CalleeReg =
2479	constrainOperandRegClass(II: TII.get(Opcode: CallOpc), Op: CalleeReg, OpNum: isThumb2 ? `2` : `0`);
2480	MIB.addReg(RegNo: CalleeReg);
2481	} else if (!IntrMemName)
2482	MIB.addGlobalAddress(GV, Offset: `0`, TargetFlags: `0`);
2483	else
2484	MIB.addExternalSymbol(FnName: IntrMemName, TargetFlags: `0`);
2485
2486	// Add implicit physical register uses to the call.
2487	for (Register R : RegArgs)
2488	MIB.addReg(RegNo: R, flags: RegState::Implicit);
2489
2490	// Add a register mask with the call-preserved registers.
2491	// Proper defs for return values will be added by setPhysRegsDeadExcept().
2492	MIB.addRegMask(Mask: TRI.getCallPreservedMask(MF: *FuncInfo.MF, CC));
2493
2494	// Finish off the call including any return values.
2495	SmallVector<Register, `4`> UsedRegs;
2496	if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes, isVarArg))
2497	return false;
2498
2499	// Set all unused physreg defs as dead.
2500	static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
2501
2502	return true;
2503	}
2504
2505	bool ARMFastISel::ARMIsMemCpySmall(uint64_t Len) {
2506	return Len <= `16`;
2507	}
2508
2509	bool ARMFastISel::ARMTryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
2510	MaybeAlign Alignment) {
2511	// Make sure we don't bloat code by inlining very large memcpy's.
2512	if (!ARMIsMemCpySmall(Len))
2513	return false;
2514
2515	while (Len) {
2516	MVT VT;
2517	if (!Alignment \|\| *Alignment >= `4`) {
2518	if (Len >= `4`)
2519	VT = MVT::i32;
2520	else if (Len >= `2`)
2521	VT = MVT::i16;
2522	else {
2523	assert(Len == `1` && "Expected a length of 1!");
2524	VT = MVT::i8;
2525	}
2526	} else {
2527	assert(Alignment && "Alignment is set in this branch");
2528	// Bound based on alignment.
2529	if (Len >= `2` && *Alignment == `2`)
2530	VT = MVT::i16;
2531	else {
2532	VT = MVT::i8;
2533	}
2534	}
2535
2536	bool RV;
2537	Register ResultReg;
2538	RV = ARMEmitLoad(VT, ResultReg, Addr&: Src);
2539	assert(RV && "Should be able to handle this load.");
2540	RV = ARMEmitStore(VT, SrcReg: ResultReg, Addr&: Dest);
2541	assert(RV && "Should be able to handle this store.");
2542	(void)RV;
2543
2544	unsigned Size = VT.getSizeInBits()/`8`;
2545	Len -= Size;
2546	Dest.setOffset(Dest.getOffset() + Size);
2547	Src.setOffset(Src.getOffset() + Size);
2548	}
2549
2550	return true;
2551	}
2552
2553	bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) {
2554	// FIXME: Handle more intrinsics.
2555	switch (I.getIntrinsicID()) {
2556	default: return false;
2557	case Intrinsic::frameaddress: {
2558	MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
2559	MFI.setFrameAddressIsTaken(true);
2560
2561	unsigned LdrOpc = isThumb2 ? ARM::t2LDRi12 : ARM::LDRi12;
2562	const TargetRegisterClass *RC = isThumb2 ? &ARM::tGPRRegClass
2563	: &ARM::GPRRegClass;
2564
2565	const ARMBaseRegisterInfo *RegInfo =
2566	static_cast<const ARMBaseRegisterInfo *>(Subtarget->getRegisterInfo());
2567	Register FramePtr = RegInfo->getFrameRegister(MF: *(FuncInfo.MF));
2568	Register SrcReg = FramePtr;
2569
2570	// Recursively load frame address
2571	// ldr r0 [fp]
2572	// ldr r0 [r0]
2573	// ldr r0 [r0]
2574	// ...
2575	Register DestReg;
2576	unsigned Depth = cast<ConstantInt>(Val: I.getOperand(i_nocapture: `0`))->getZExtValue();
2577	while (Depth--) {
2578	DestReg = createResultReg(RC);
2579	AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
2580	MCID: TII.get(Opcode: LdrOpc), DestReg)
2581	.addReg(RegNo: SrcReg).addImm(Val: `0`));
2582	SrcReg = DestReg;
2583	}
2584	updateValueMap(I: &I, Reg: SrcReg);
2585	return true;
2586	}
2587	case Intrinsic::memcpy:
2588	case Intrinsic::memmove: {
2589	const MemTransferInst &MTI = cast<MemTransferInst>(Val: I);
2590	// Don't handle volatile.
2591	if (MTI.isVolatile())
2592	return false;
2593
2594	// Disable inlining for memmove before calls to ComputeAddress. Otherwise,
2595	// we would emit dead code because we don't currently handle memmoves.
2596	bool isMemCpy = (I.getIntrinsicID() == Intrinsic::memcpy);
2597	if (isa<ConstantInt>(Val: MTI.getLength()) && isMemCpy) {
2598	// Small memcpy's are common enough that we want to do them without a call
2599	// if possible.
2600	uint64_t Len = cast<ConstantInt>(Val: MTI.getLength())->getZExtValue();
2601	if (ARMIsMemCpySmall(Len)) {
2602	Address Dest, Src;
2603	if (!ARMComputeAddress(Obj: MTI.getRawDest(), Addr&: Dest) \|\|
2604	!ARMComputeAddress(Obj: MTI.getRawSource(), Addr&: Src))
2605	return false;
2606	MaybeAlign Alignment;
2607	if (MTI.getDestAlign() \|\| MTI.getSourceAlign())
2608	Alignment = std::min(a: MTI.getDestAlign().valueOrOne(),
2609	b: MTI.getSourceAlign().valueOrOne());
2610	if (ARMTryEmitSmallMemCpy(Dest, Src, Len, Alignment))
2611	return true;
2612	}
2613	}
2614
2615	if (!MTI.getLength()->getType()->isIntegerTy(Bitwidth: `32`))
2616	return false;
2617
2618	if (MTI.getSourceAddressSpace() > `255` \|\| MTI.getDestAddressSpace() > `255`)
2619	return false;
2620
2621	const char *IntrMemName = isa<MemCpyInst>(Val: I) ? "memcpy" : "memmove";
2622	return SelectCall(I: &I, IntrMemName);
2623	}
2624	case Intrinsic::memset: {
2625	const MemSetInst &MSI = cast<MemSetInst>(Val: I);
2626	// Don't handle volatile.
2627	if (MSI.isVolatile())
2628	return false;
2629
2630	if (!MSI.getLength()->getType()->isIntegerTy(Bitwidth: `32`))
2631	return false;
2632
2633	if (MSI.getDestAddressSpace() > `255`)
2634	return false;
2635
2636	return SelectCall(I: &I, IntrMemName: "memset");
2637	}
2638	case Intrinsic::trap: {
2639	unsigned Opcode;
2640	if (Subtarget->isThumb())
2641	Opcode = ARM::tTRAP;
2642	else
2643	Opcode = Subtarget->useNaClTrap() ? ARM::TRAPNaCl : ARM::TRAP;
2644	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode));
2645	return true;
2646	}
2647	}
2648	}
2649
2650	bool ARMFastISel::SelectTrunc(const Instruction *I) {
2651	// The high bits for a type smaller than the register size are assumed to be
2652	// undefined.
2653	Value *Op = I->getOperand(i: `0`);
2654
2655	EVT SrcVT, DestVT;
2656	SrcVT = TLI.getValueType(DL, Ty: Op->getType(), AllowUnknown: true);
2657	DestVT = TLI.getValueType(DL, Ty: I->getType(), AllowUnknown: true);
2658
2659	if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8)
2660	return false;
2661	if (DestVT != MVT::i16 && DestVT != MVT::i8 && DestVT != MVT::i1)
2662	return false;
2663
2664	Register SrcReg = getRegForValue(V: Op);
2665	if (!SrcReg) return false;
2666
2667	// Because the high bits are undefined, a truncate doesn't generate
2668	// any code.
2669	updateValueMap(I, Reg: SrcReg);
2670	return true;
2671	}
2672
2673	Register ARMFastISel::ARMEmitIntExt(MVT SrcVT, Register SrcReg, MVT DestVT,
2674	bool isZExt) {
2675	if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8)
2676	return Register ();
2677	if (SrcVT != MVT::i16 && SrcVT != MVT::i8 && SrcVT != MVT::i1)
2678	return Register ();
2679
2680	// Table of which combinations can be emitted as a single instruction,
2681	// and which will require two.
2682	static const uint8_t isSingleInstrTbl[`3`][`2`][`2`][`2`] = {
2683	// ARM Thumb
2684	// !hasV6Ops hasV6Ops !hasV6Ops hasV6Ops
2685	// ext: s z s z s z s z
2686	/ 1 / { { { `0`, `1` }, { `0`, `1` } }, { { `0`, `0` }, { `0`, `1` } } },
2687	/ 8 / { { { `0`, `1` }, { `1`, `1` } }, { { `0`, `0` }, { `1`, `1` } } },
2688	/ 16 / { { { `0`, `0` }, { `1`, `1` } }, { { `0`, `0` }, { `1`, `1` } } }
2689	};
2690
2691	// Target registers for:
2692	// - For ARM can never be PC.
2693	// - For 16-bit Thumb are restricted to lower 8 registers.
2694	// - For 32-bit Thumb are restricted to non-SP and non-PC.
2695	static const TargetRegisterClass *RCTbl[`2`][`2`] = {
2696	// Instructions: Two Single
2697	/ ARM / { &ARM::GPRnopcRegClass, &ARM::GPRnopcRegClass },
2698	/ Thumb / { &ARM::tGPRRegClass, &ARM::rGPRRegClass }
2699	};
2700
2701	// Table governing the instruction(s) to be emitted.
2702	static const struct InstructionTable {
2703	uint32_t Opc : `16`;
2704	uint32_t hasS : `1`; // Some instructions have an S bit, always set it to 0.
2705	uint32_t Shift : `7`; // For shift operand addressing mode, used by MOVsi.
2706	uint32_t Imm : `8`; // All instructions have either a shift or a mask.
2707	} IT[`2`][`2`][`3`][`2`] = {
2708	{ // Two instructions (first is left shift, second is in this table).
2709	{ // ARM Opc S Shift Imm
2710	/ 1 bit sext / { { .Opc: ARM::MOVsi , .hasS: `1`, .Shift: ARM_AM::asr , .Imm: `31` },
2711	/ 1 bit zext / { .Opc: ARM::MOVsi , .hasS: `1`, .Shift: ARM_AM::lsr , .Imm: `31` } },
2712	/ 8 bit sext / { { .Opc: ARM::MOVsi , .hasS: `1`, .Shift: ARM_AM::asr , .Imm: `24` },
2713	/ 8 bit zext / { .Opc: ARM::MOVsi , .hasS: `1`, .Shift: ARM_AM::lsr , .Imm: `24` } },
2714	/ 16 bit sext / { { .Opc: ARM::MOVsi , .hasS: `1`, .Shift: ARM_AM::asr , .Imm: `16` },
2715	/ 16 bit zext / { .Opc: ARM::MOVsi , .hasS: `1`, .Shift: ARM_AM::lsr , .Imm: `16` } }
2716	},
2717	{ // Thumb Opc S Shift Imm
2718	/ 1 bit sext / { { .Opc: ARM::tASRri , .hasS: `0`, .Shift: ARM_AM::no_shift, .Imm: `31` },
2719	/ 1 bit zext / { .Opc: ARM::tLSRri , .hasS: `0`, .Shift: ARM_AM::no_shift, .Imm: `31` } },
2720	/ 8 bit sext / { { .Opc: ARM::tASRri , .hasS: `0`, .Shift: ARM_AM::no_shift, .Imm: `24` },
2721	/ 8 bit zext / { .Opc: ARM::tLSRri , .hasS: `0`, .Shift: ARM_AM::no_shift, .Imm: `24` } },
2722	/ 16 bit sext / { { .Opc: ARM::tASRri , .hasS: `0`, .Shift: ARM_AM::no_shift, .Imm: `16` },
2723	/ 16 bit zext / { .Opc: ARM::tLSRri , .hasS: `0`, .Shift: ARM_AM::no_shift, .Imm: `16` } }
2724	}
2725	},
2726	{ // Single instruction.
2727	{ // ARM Opc S Shift Imm
2728	/ 1 bit sext / { { .Opc: ARM::KILL , .hasS: `0`, .Shift: ARM_AM::no_shift, .Imm: `0` },
2729	/ 1 bit zext / { .Opc: ARM::ANDri , .hasS: `1`, .Shift: ARM_AM::no_shift, .Imm: `1` } },
2730	/ 8 bit sext / { { .Opc: ARM::SXTB , .hasS: `0`, .Shift: ARM_AM::no_shift, .Imm: `0` },
2731	/ 8 bit zext / { .Opc: ARM::ANDri , .hasS: `1`, .Shift: ARM_AM::no_shift, .Imm: `255` } },
2732	/ 16 bit sext / { { .Opc: ARM::SXTH , .hasS: `0`, .Shift: ARM_AM::no_shift, .Imm: `0` },
2733	/ 16 bit zext / { .Opc: ARM::UXTH , .hasS: `0`, .Shift: ARM_AM::no_shift, .Imm: `0` } }
2734	},
2735	{ // Thumb Opc S Shift Imm
2736	/ 1 bit sext / { { .Opc: ARM::KILL , .hasS: `0`, .Shift: ARM_AM::no_shift, .Imm: `0` },
2737	/ 1 bit zext / { .Opc: ARM::t2ANDri, .hasS: `1`, .Shift: ARM_AM::no_shift, .Imm: `1` } },
2738	/ 8 bit sext / { { .Opc: ARM::t2SXTB , .hasS: `0`, .Shift: ARM_AM::no_shift, .Imm: `0` },
2739	/ 8 bit zext / { .Opc: ARM::t2ANDri, .hasS: `1`, .Shift: ARM_AM::no_shift, .Imm: `255` } },
2740	/ 16 bit sext / { { .Opc: ARM::t2SXTH , .hasS: `0`, .Shift: ARM_AM::no_shift, .Imm: `0` },
2741	/ 16 bit zext / { .Opc: ARM::t2UXTH , .hasS: `0`, .Shift: ARM_AM::no_shift, .Imm: `0` } }
2742	}
2743	}
2744	};
2745
2746	unsigned SrcBits = SrcVT.getSizeInBits();
2747	unsigned DestBits = DestVT.getSizeInBits();
2748	(void) DestBits;
2749	assert((SrcBits < DestBits) && "can only extend to larger types");
2750	assert((DestBits == `32` \|\| DestBits == `16` \|\| DestBits == `8`) &&
2751	"other sizes unimplemented");
2752	assert((SrcBits == `16` \|\| SrcBits == `8` \|\| SrcBits == `1`) &&
2753	"other sizes unimplemented");
2754
2755	bool hasV6Ops = Subtarget->hasV6Ops();
2756	unsigned Bitness = SrcBits / `8`; // {1,8,16}=>{0,1,2}
2757	assert((Bitness < `3`) && "sanity-check table bounds");
2758
2759	bool isSingleInstr = isSingleInstrTbl[Bitness][isThumb2][hasV6Ops][isZExt];
2760	const TargetRegisterClass *RC = RCTbl[isThumb2][isSingleInstr];
2761	const InstructionTable *ITP = &IT[isSingleInstr][isThumb2][Bitness][isZExt];
2762	unsigned Opc = ITP->Opc;
2763	assert(ARM::KILL != Opc && "Invalid table entry");
2764	unsigned hasS = ITP->hasS;
2765	ARM_AM::ShiftOpc Shift = (ARM_AM::ShiftOpc) ITP->Shift;
2766	assert(((Shift == ARM_AM::no_shift) == (Opc != ARM::MOVsi)) &&
2767	"only MOVsi has shift operand addressing mode");
2768	unsigned Imm = ITP->Imm;
2769
2770	// 16-bit Thumb instructions always set CPSR (unless they're in an IT block).
2771	bool setsCPSR = &ARM::tGPRRegClass == RC;
2772	unsigned LSLOpc = isThumb2 ? ARM::tLSLri : ARM::MOVsi;
2773	Register ResultReg;
2774	// MOVsi encodes shift and immediate in shift operand addressing mode.
2775	// The following condition has the same value when emitting two
2776	// instruction sequences: both are shifts.
2777	bool ImmIsSO = (Shift != ARM_AM::no_shift);
2778
2779	// Either one or two instructions are emitted.
2780	// They're always of the form:
2781	// dst = in OP imm
2782	// CPSR is set only by 16-bit Thumb instructions.
2783	// Predicate, if any, is AL.
2784	// S bit, if available, is always 0.
2785	// When two are emitted the first's result will feed as the second's input,
2786	// that value is then dead.
2787	unsigned NumInstrsEmitted = isSingleInstr ? `1` : `2`;
2788	for (unsigned Instr = `0`; Instr != NumInstrsEmitted; ++Instr) {
2789	ResultReg = createResultReg(RC);
2790	bool isLsl = (`0` == Instr) && !isSingleInstr;
2791	unsigned Opcode = isLsl ? LSLOpc : Opc;
2792	ARM_AM::ShiftOpc ShiftAM = isLsl ? ARM_AM::lsl : Shift;
2793	unsigned ImmEnc = ImmIsSO ? ARM_AM::getSORegOpc(ShOp: ShiftAM, Imm) : Imm;
2794	bool isKill = `1` == Instr;
2795	MachineInstrBuilder MIB = BuildMI(
2796	BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode), DestReg: ResultReg);
2797	if (setsCPSR)
2798	MIB.addReg(RegNo: ARM::CPSR, flags: RegState::Define);
2799	SrcReg = constrainOperandRegClass(II: TII.get(Opcode), Op: SrcReg, OpNum: `1` + setsCPSR);
2800	MIB.addReg(RegNo: SrcReg, flags: isKill * RegState::Kill)
2801	.addImm(Val: ImmEnc)
2802	.add(MOs: predOps(Pred: ARMCC::AL));
2803	if (hasS)
2804	MIB.add(MO: condCodeOp());
2805	// Second instruction consumes the first's result.
2806	SrcReg = ResultReg;
2807	}
2808
2809	return ResultReg;
2810	}
2811
2812	bool ARMFastISel::SelectIntExt(const Instruction *I) {
2813	// On ARM, in general, integer casts don't involve legal types; this code
2814	// handles promotable integers.
2815	Type *DestTy = I->getType();
2816	Value *Src = I->getOperand(i: `0`);
2817	Type *SrcTy = Src->getType();
2818
2819	bool isZExt = isa<ZExtInst>(Val: I);
2820	Register SrcReg = getRegForValue(V: Src);
2821	if (!SrcReg) return false;
2822
2823	EVT SrcEVT, DestEVT;
2824	SrcEVT = TLI.getValueType(DL, Ty: SrcTy, AllowUnknown: true);
2825	DestEVT = TLI.getValueType(DL, Ty: DestTy, AllowUnknown: true);
2826	if (!SrcEVT.isSimple()) return false;
2827	if (!DestEVT.isSimple()) return false;
2828
2829	MVT SrcVT = SrcEVT.getSimpleVT();
2830	MVT DestVT = DestEVT.getSimpleVT();
2831	Register ResultReg = ARMEmitIntExt(SrcVT, SrcReg, DestVT, isZExt);
2832	if (!ResultReg)
2833	return false;
2834	updateValueMap(I, Reg: ResultReg);
2835	return true;
2836	}
2837
2838	bool ARMFastISel::SelectShift(const Instruction *I,
2839	ARM_AM::ShiftOpc ShiftTy) {
2840	// We handle thumb2 mode by target independent selector
2841	// or SelectionDAG ISel.
2842	if (isThumb2)
2843	return false;
2844
2845	// Only handle i32 now.
2846	EVT DestVT = TLI.getValueType(DL, Ty: I->getType(), AllowUnknown: true);
2847	if (DestVT != MVT::i32)
2848	return false;
2849
2850	unsigned Opc = ARM::MOVsr;
2851	unsigned ShiftImm;
2852	Value *Src2Value = I->getOperand(i: `1`);
2853	if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val: Src2Value)) {
2854	ShiftImm = CI->getZExtValue();
2855
2856	// Fall back to selection DAG isel if the shift amount
2857	// is zero or greater than the width of the value type.
2858	if (ShiftImm == `0` \|\| ShiftImm >=`32`)
2859	return false;
2860
2861	Opc = ARM::MOVsi;
2862	}
2863
2864	Value *Src1Value = I->getOperand(i: `0`);
2865	Register Reg1 = getRegForValue(V: Src1Value);
2866	if (!Reg1)
2867	return false;
2868
2869	Register Reg2;
2870	if (Opc == ARM::MOVsr) {
2871	Reg2 = getRegForValue(V: Src2Value);
2872	if (!Reg2)
2873	return false;
2874	}
2875
2876	Register ResultReg = createResultReg(RC: &ARM::GPRnopcRegClass);
2877	if (!ResultReg)
2878	return false;
2879
2880	MachineInstrBuilder MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
2881	MCID: TII.get(Opcode: Opc), DestReg: ResultReg)
2882	.addReg(RegNo: Reg1);
2883
2884	if (Opc == ARM::MOVsi)
2885	MIB.addImm(Val: ARM_AM::getSORegOpc(ShOp: ShiftTy, Imm: ShiftImm));
2886	else if (Opc == ARM::MOVsr) {
2887	MIB.addReg(RegNo: Reg2);
2888	MIB.addImm(Val: ARM_AM::getSORegOpc(ShOp: ShiftTy, Imm: `0`));
2889	}
2890
2891	AddOptionalDefs(MIB);
2892	updateValueMap(I, Reg: ResultReg);
2893	return true;
2894	}
2895
2896	// TODO: SoftFP support.
2897	bool ARMFastISel::fastSelectInstruction(const Instruction *I) {
2898	switch (I->getOpcode()) {
2899	case Instruction::Load:
2900	return SelectLoad(I);
2901	case Instruction::Store:
2902	return SelectStore(I);
2903	case Instruction::Br:
2904	return SelectBranch(I);
2905	case Instruction::IndirectBr:
2906	return SelectIndirectBr(I);
2907	case Instruction::ICmp:
2908	case Instruction::FCmp:
2909	return SelectCmp(I);
2910	case Instruction::FPExt:
2911	return SelectFPExt(I);
2912	case Instruction::FPTrunc:
2913	return SelectFPTrunc(I);
2914	case Instruction::SIToFP:
2915	return SelectIToFP(I, /isSigned/ true);
2916	case Instruction::UIToFP:
2917	return SelectIToFP(I, /isSigned/ false);
2918	case Instruction::FPToSI:
2919	return SelectFPToI(I, /isSigned/ true);
2920	case Instruction::FPToUI:
2921	return SelectFPToI(I, /isSigned/ false);
2922	case Instruction::Add:
2923	return SelectBinaryIntOp(I, ISDOpcode: ISD::ADD);
2924	case Instruction::Or:
2925	return SelectBinaryIntOp(I, ISDOpcode: ISD::OR);
2926	case Instruction::Sub:
2927	return SelectBinaryIntOp(I, ISDOpcode: ISD::SUB);
2928	case Instruction::FAdd:
2929	return SelectBinaryFPOp(I, ISDOpcode: ISD::FADD);
2930	case Instruction::FSub:
2931	return SelectBinaryFPOp(I, ISDOpcode: ISD::FSUB);
2932	case Instruction::FMul:
2933	return SelectBinaryFPOp(I, ISDOpcode: ISD::FMUL);
2934	case Instruction::SDiv:
2935	return SelectDiv(I, /isSigned/ true);
2936	case Instruction::UDiv:
2937	return SelectDiv(I, /isSigned/ false);
2938	case Instruction::SRem:
2939	return SelectRem(I, /isSigned/ true);
2940	case Instruction::URem:
2941	return SelectRem(I, /isSigned/ false);
2942	case Instruction::Call:
2943	if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I))
2944	return SelectIntrinsicCall(I: *II);
2945	return SelectCall(I);
2946	case Instruction::Select:
2947	return SelectSelect(I);
2948	case Instruction::Ret:
2949	return SelectRet(I);
2950	case Instruction::Trunc:
2951	return SelectTrunc(I);
2952	case Instruction::ZExt:
2953	case Instruction::SExt:
2954	return SelectIntExt(I);
2955	case Instruction::Shl:
2956	return SelectShift(I, ShiftTy: ARM_AM::lsl);
2957	case Instruction::LShr:
2958	return SelectShift(I, ShiftTy: ARM_AM::lsr);
2959	case Instruction::AShr:
2960	return SelectShift(I, ShiftTy: ARM_AM::asr);
2961	default: break;
2962	}
2963	return false;
2964	}
2965
2966	// This table describes sign- and zero-extend instructions which can be
2967	// folded into a preceding load. All of these extends have an immediate
2968	// (sometimes a mask and sometimes a shift) that's applied after
2969	// extension.
2970	static const struct FoldableLoadExtendsStruct {
2971	uint16_t Opc[`2`]; // ARM, Thumb.
2972	uint8_t ExpectedImm;
2973	uint8_t isZExt : `1`;
2974	uint8_t ExpectedVT : `7`;
2975	} FoldableLoadExtends[] = {
2976	{ .Opc: { ARM::SXTH, ARM::t2SXTH }, .ExpectedImm: `0`, .isZExt: `0`, .ExpectedVT: MVT::i16 },
2977	{ .Opc: { ARM::UXTH, ARM::t2UXTH }, .ExpectedImm: `0`, .isZExt: `1`, .ExpectedVT: MVT::i16 },
2978	{ .Opc: { ARM::ANDri, ARM::t2ANDri }, .ExpectedImm: `255`, .isZExt: `1`, .ExpectedVT: MVT::i8 },
2979	{ .Opc: { ARM::SXTB, ARM::t2SXTB }, .ExpectedImm: `0`, .isZExt: `0`, .ExpectedVT: MVT::i8 },
2980	{ .Opc: { ARM::UXTB, ARM::t2UXTB }, .ExpectedImm: `0`, .isZExt: `1`, .ExpectedVT: MVT::i8 }
2981	};
2982
2983	/// The specified machine instr operand is a vreg, and that
2984	/// vreg is being provided by the specified load instruction. If possible,
2985	/// try to fold the load as an operand to the instruction, returning true if
2986	/// successful.
2987	bool ARMFastISel::tryToFoldLoadIntoMI(MachineInstr MI, unsigned* OpNo,
2988	const LoadInst *LI) {
2989	// Verify we have a legal type before going any further.
2990	MVT VT;
2991	if (!isLoadTypeLegal(Ty: LI->getType(), VT))
2992	return false;
2993
2994	// Combine load followed by zero- or sign-extend.
2995	// ldrb r1, [r0] ldrb r1, [r0]
2996	// uxtb r2, r1 =>
2997	// mov r3, r2 mov r3, r1
2998	if (MI->getNumOperands() < `3` \|\| !MI->getOperand(i: `2`).isImm())
2999	return false;
3000	const uint64_t Imm = MI->getOperand(i: `2`).getImm();
3001
3002	bool Found = false;
3003	bool isZExt;
3004	for (const FoldableLoadExtendsStruct &FLE : FoldableLoadExtends) {
3005	if (FLE.Opc[isThumb2] == MI->getOpcode() &&
3006	(uint64_t)FLE.ExpectedImm == Imm &&
3007	MVT ((MVT::SimpleValueType)FLE.ExpectedVT) == VT) {
3008	Found = true;
3009	isZExt = FLE.isZExt;
3010	}
3011	}
3012	if (!Found) return false;
3013
3014	// See if we can handle this address.
3015	Address Addr;
3016	if (!ARMComputeAddress(Obj: LI->getOperand(i_nocapture: `0`), Addr)) return false;
3017
3018	Register ResultReg = MI->getOperand(i: `0`).getReg();
3019	if (!ARMEmitLoad(VT, ResultReg, Addr, Alignment: LI->getAlign(), isZExt, allocReg: false))
3020	return false;
3021	MachineBasicBlock::iterator I(MI);
3022	removeDeadCode(I, E: std::next(x: I));
3023	return true;
3024	}
3025
3026	Register ARMFastISel::ARMLowerPICELF(const GlobalValue *GV, MVT VT) {
3027	bool UseGOT_PREL = !GV->isDSOLocal();
3028	LLVMContext *Context = &MF->getFunction().getContext();
3029	unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3030	unsigned PCAdj = Subtarget->isThumb() ? `4` : `8`;
3031	ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(
3032	C: GV, ID: ARMPCLabelIndex, Kind: ARMCP::CPValue, PCAdj,
3033	Modifier: UseGOT_PREL ? ARMCP::GOT_PREL : ARMCP::no_modifier,
3034	/AddCurrentAddress=/UseGOT_PREL);
3035
3036	Align ConstAlign =
3037	MF->getDataLayout().getPrefTypeAlign(Ty: PointerType::get(C&: *Context, AddressSpace: `0`));
3038	unsigned Idx = MF->getConstantPool()->getConstantPoolIndex(V: CPV, Alignment: ConstAlign);
3039	MachineMemOperand *CPMMO =
3040	MF->getMachineMemOperand(PtrInfo: MachinePointerInfo::getConstantPool(MF&: *MF),
3041	F: MachineMemOperand::MOLoad, Size: `4`, BaseAlignment: Align (`4`));
3042
3043	Register TempReg = MF->getRegInfo().createVirtualRegister(RegClass: &ARM::rGPRRegClass);
3044	unsigned Opc = isThumb2 ? ARM::t2LDRpci : ARM::LDRcp;
3045	MachineInstrBuilder MIB =
3046	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg: TempReg)
3047	.addConstantPoolIndex(Idx)
3048	.addMemOperand(MMO: CPMMO);
3049	if (Opc == ARM::LDRcp)
3050	MIB.addImm(Val: `0`);
3051	MIB.add(MOs: predOps(Pred: ARMCC::AL));
3052
3053	// Fix the address by adding pc.
3054	Register DestReg = createResultReg(RC: TLI.getRegClassFor(VT));
3055	Opc = Subtarget->isThumb() ? ARM::tPICADD : UseGOT_PREL ? ARM::PICLDR
3056	: ARM::PICADD;
3057	DestReg = constrainOperandRegClass(II: TII.get(Opcode: Opc), Op: DestReg, OpNum: `0`);
3058	MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg)
3059	.addReg(RegNo: TempReg)
3060	.addImm(Val: ARMPCLabelIndex);
3061
3062	if (!Subtarget->isThumb())
3063	MIB.add(MOs: predOps(Pred: ARMCC::AL));
3064
3065	if (UseGOT_PREL && Subtarget->isThumb()) {
3066	Register NewDestReg = createResultReg(RC: TLI.getRegClassFor(VT));
3067	MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
3068	MCID: TII.get(Opcode: ARM::t2LDRi12), DestReg: NewDestReg)
3069	.addReg(RegNo: DestReg)
3070	.addImm(Val: `0`);
3071	DestReg = NewDestReg;
3072	AddOptionalDefs(MIB);
3073	}
3074	return DestReg;
3075	}
3076
3077	bool ARMFastISel::fastLowerArguments() {
3078	if (!FuncInfo.CanLowerReturn)
3079	return false;
3080
3081	const Function *F = FuncInfo.Fn;
3082	if (F->isVarArg())
3083	return false;
3084
3085	CallingConv::ID CC = F->getCallingConv();
3086	switch (CC) {
3087	default:
3088	return false;
3089	case CallingConv::Fast:
3090	case CallingConv::C:
3091	case CallingConv::ARM_AAPCS_VFP:
3092	case CallingConv::ARM_AAPCS:
3093	case CallingConv::ARM_APCS:
3094	case CallingConv::Swift:
3095	case CallingConv::SwiftTail:
3096	break;
3097	}
3098
3099	// Only handle simple cases. i.e. Up to 4 i8/i16/i32 scalar arguments
3100	// which are passed in r0 - r3.
3101	for (const Argument &Arg : F->args()) {
3102	if (Arg.getArgNo() >= `4`)
3103	return false;
3104
3105	if (Arg.hasAttribute(Kind: Attribute::InReg) \|\|
3106	Arg.hasAttribute(Kind: Attribute::StructRet) \|\|
3107	Arg.hasAttribute(Kind: Attribute::SwiftSelf) \|\|
3108	Arg.hasAttribute(Kind: Attribute::SwiftError) \|\|
3109	Arg.hasAttribute(Kind: Attribute::ByVal))
3110	return false;
3111
3112	Type *ArgTy = Arg.getType();
3113	if (ArgTy->isStructTy() \|\| ArgTy->isArrayTy() \|\| ArgTy->isVectorTy())
3114	return false;
3115
3116	EVT ArgVT = TLI.getValueType(DL, Ty: ArgTy);
3117	if (!ArgVT.isSimple()) return false;
3118	switch (ArgVT.getSimpleVT().SimpleTy) {
3119	case MVT::i8:
3120	case MVT::i16:
3121	case MVT::i32:
3122	break;
3123	default:
3124	return false;
3125	}
3126	}
3127
3128	static const MCPhysReg GPRArgRegs[] = {
3129	ARM::R0, ARM::R1, ARM::R2, ARM::R3
3130	};
3131
3132	const TargetRegisterClass *RC = &ARM::rGPRRegClass;
3133	for (const Argument &Arg : F->args()) {
3134	unsigned ArgNo = Arg.getArgNo();
3135	MCRegister SrcReg = GPRArgRegs[ArgNo];
3136	Register DstReg = FuncInfo.MF->addLiveIn(PReg: SrcReg, RC);
3137	// FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
3138	// Without this, EmitLiveInCopies may eliminate the livein if its only
3139	// use is a bitcast (which isn't turned into an instruction).
3140	Register ResultReg = createResultReg(RC);
3141	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
3142	MCID: TII.get(Opcode: TargetOpcode::COPY),
3143	DestReg: ResultReg).addReg(RegNo: DstReg, flags: getKillRegState(B: true));
3144	updateValueMap(I: &Arg, Reg: ResultReg);
3145	}
3146
3147	return true;
3148	}
3149
3150	namespace llvm {
3151
3152	FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo,
3153	const TargetLibraryInfo *libInfo) {
3154	if (funcInfo.MF->getSubtarget<ARMSubtarget>().useFastISel())
3155	return new ARMFastISel (funcInfo, libInfo);
3156
3157	return nullptr;
3158	}
3159
3160	} // end namespace llvm
3161

Browse the source code of llvm_projects/llvm/lib/Target/ARM/ARMFastISel.cpp