ARMFastISel.cpp source code [llvm_projects/llvm/lib/Target/ARM/ARMFastISel.cpp]

1	//===- ARMFastISel.cpp - ARM FastISel implementation ----------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file defines the ARM-specific support for the FastISel class. Some
10	// of the target-specific code is generated by tablegen in the file
11	// ARMGenFastISel.inc, which is #included here.
12	//
13	//===----------------------------------------------------------------------===//
14
15	#include "ARM.h"
16	#include "ARMBaseInstrInfo.h"
17	#include "ARMBaseRegisterInfo.h"
18	#include "ARMCallingConv.h"
19	#include "ARMConstantPoolValue.h"
20	#include "ARMISelLowering.h"
21	#include "ARMMachineFunctionInfo.h"
22	#include "ARMSubtarget.h"
23	#include "ARMTargetMachine.h"
24	#include "MCTargetDesc/ARMAddressingModes.h"
25	#include "MCTargetDesc/ARMBaseInfo.h"
26	#include "Utils/ARMBaseInfo.h"
27	#include "llvm/ADT/APFloat.h"
28	#include "llvm/ADT/APInt.h"
29	#include "llvm/ADT/DenseMap.h"
30	#include "llvm/ADT/SmallVector.h"
31	#include "llvm/CodeGen/CallingConvLower.h"
32	#include "llvm/CodeGen/FastISel.h"
33	#include "llvm/CodeGen/FunctionLoweringInfo.h"
34	#include "llvm/CodeGen/ISDOpcodes.h"
35	#include "llvm/CodeGen/MachineBasicBlock.h"
36	#include "llvm/CodeGen/MachineConstantPool.h"
37	#include "llvm/CodeGen/MachineFrameInfo.h"
38	#include "llvm/CodeGen/MachineFunction.h"
39	#include "llvm/CodeGen/MachineInstr.h"
40	#include "llvm/CodeGen/MachineInstrBuilder.h"
41	#include "llvm/CodeGen/MachineMemOperand.h"
42	#include "llvm/CodeGen/MachineOperand.h"
43	#include "llvm/CodeGen/MachineRegisterInfo.h"
44	#include "llvm/CodeGen/TargetInstrInfo.h"
45	#include "llvm/CodeGen/TargetLowering.h"
46	#include "llvm/CodeGen/TargetOpcodes.h"
47	#include "llvm/CodeGen/TargetRegisterInfo.h"
48	#include "llvm/CodeGen/ValueTypes.h"
49	#include "llvm/CodeGenTypes/MachineValueType.h"
50	#include "llvm/IR/Argument.h"
51	#include "llvm/IR/Attributes.h"
52	#include "llvm/IR/CallingConv.h"
53	#include "llvm/IR/Constant.h"
54	#include "llvm/IR/Constants.h"
55	#include "llvm/IR/DataLayout.h"
56	#include "llvm/IR/DerivedTypes.h"
57	#include "llvm/IR/Function.h"
58	#include "llvm/IR/GetElementPtrTypeIterator.h"
59	#include "llvm/IR/GlobalValue.h"
60	#include "llvm/IR/GlobalVariable.h"
61	#include "llvm/IR/InstrTypes.h"
62	#include "llvm/IR/Instruction.h"
63	#include "llvm/IR/Instructions.h"
64	#include "llvm/IR/IntrinsicInst.h"
65	#include "llvm/IR/Intrinsics.h"
66	#include "llvm/IR/Module.h"
67	#include "llvm/IR/Operator.h"
68	#include "llvm/IR/Type.h"
69	#include "llvm/IR/User.h"
70	#include "llvm/IR/Value.h"
71	#include "llvm/MC/MCInstrDesc.h"
72	#include "llvm/Support/Casting.h"
73	#include "llvm/Support/Compiler.h"
74	#include "llvm/Support/ErrorHandling.h"
75	#include "llvm/Support/MathExtras.h"
76	#include "llvm/Target/TargetMachine.h"
77	#include "llvm/Target/TargetOptions.h"
78	#include <cassert>
79	#include <cstdint>
80	#include <utility>
81
82	using namespace llvm;
83
84	namespace {
85
86	// All possible address modes, plus some.
87	class Address {
88	public:
89	enum BaseKind { RegBase, FrameIndexBase };
90
91	private:
92	BaseKind Kind = RegBase;
93	union {
94	unsigned Reg;
95	int FI;
96	} Base;
97
98	int Offset = `0`;
99
100	public:
101	// Innocuous defaults for our address.
102	Address() { Base.Reg = `0`; }
103
104	void setKind(BaseKind K) { Kind = K; }
105	BaseKind getKind() const { return Kind; }
106	bool isRegBase() const { return Kind == RegBase; }
107	bool isFIBase() const { return Kind == FrameIndexBase; }
108
109	void setReg(Register Reg) {
110	assert(isRegBase() && "Invalid base register access!");
111	Base.Reg = Reg.id();
112	}
113
114	Register getReg() const {
115	assert(isRegBase() && "Invalid base register access!");
116	return Base.Reg;
117	}
118
119	void setFI(int FI) {
120	assert(isFIBase() && "Invalid base frame index access!");
121	Base.FI = FI;
122	}
123
124	int getFI() const {
125	assert(isFIBase() && "Invalid base frame index access!");
126	return Base.FI;
127	}
128
129	void setOffset(int O) { Offset = O; }
130	int getOffset() { return Offset; }
131	};
132
133	class ARMFastISel final : public FastISel {
134	/// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
135	/// make the right decision when generating code for different targets.
136	const ARMSubtarget *Subtarget;
137	Module &M;
138	const ARMBaseInstrInfo &TII;
139	const ARMTargetLowering &TLI;
140	const ARMBaseTargetMachine &TM;
141	ARMFunctionInfo *AFI;
142
143	// Convenience variables to avoid some queries.
144	bool isThumb2;
145	LLVMContext *Context;
146
147	public:
148	explicit ARMFastISel(FunctionLoweringInfo &funcInfo,
149	const TargetLibraryInfo *libInfo,
150	const LibcallLoweringInfo *libcallLowering)
151	: FastISel (funcInfo, libInfo, libcallLowering),
152	Subtarget(&funcInfo.MF->getSubtarget<ARMSubtarget>()),
153	M(const_cast<Module &>(*funcInfo.Fn->getParent())),
154	TII(Subtarget->getInstrInfo()), TLI(Subtarget->getTargetLowering()),
155	TM(TLI.getTM()) {
156	AFI = funcInfo.MF->getInfo<ARMFunctionInfo>();
157	isThumb2 = AFI->isThumbFunction();
158	Context = &funcInfo.Fn->getContext();
159	}
160
161	private:
162	// Code from FastISel.cpp.
163
164	Register fastEmitInst_r(unsigned MachineInstOpcode,
165	const TargetRegisterClass *RC, Register Op0);
166	Register fastEmitInst_rr(unsigned MachineInstOpcode,
167	const TargetRegisterClass *RC, Register Op0,
168	Register Op1);
169	Register fastEmitInst_ri(unsigned MachineInstOpcode,
170	const TargetRegisterClass *RC, Register Op0,
171	uint64_t Imm);
172	Register fastEmitInst_i(unsigned MachineInstOpcode,
173	const TargetRegisterClass *RC, uint64_t Imm);
174
175	// Backend specific FastISel code.
176
177	bool fastSelectInstruction(const Instruction *I) override;
178	Register fastMaterializeConstant(const Constant *C) override;
179	Register fastMaterializeAlloca(const AllocaInst *AI) override;
180	bool tryToFoldLoadIntoMI(MachineInstr MI, unsigned* OpNo,
181	const LoadInst *LI) override;
182	bool fastLowerArguments() override;
183
184	#include "ARMGenFastISel.inc"
185
186	// Instruction selection routines.
187
188	bool SelectLoad(const Instruction *I);
189	bool SelectStore(const Instruction *I);
190	bool SelectBranch(const Instruction *I);
191	bool SelectIndirectBr(const Instruction *I);
192	bool SelectCmp(const Instruction *I);
193	bool SelectFPExt(const Instruction *I);
194	bool SelectFPTrunc(const Instruction *I);
195	bool SelectBinaryIntOp(const Instruction I, unsigned* ISDOpcode);
196	bool SelectBinaryFPOp(const Instruction I, unsigned* ISDOpcode);
197	bool SelectIToFP(const Instruction I, bool* isSigned);
198	bool SelectFPToI(const Instruction I, bool* isSigned);
199	bool SelectDiv(const Instruction I, bool* isSigned);
200	bool SelectRem(const Instruction I, bool* isSigned);
201	bool SelectCall(const Instruction I, const* char *IntrMemName);
202	bool SelectIntrinsicCall(const IntrinsicInst &I);
203	bool SelectSelect(const Instruction *I);
204	bool SelectRet(const Instruction *I);
205	bool SelectTrunc(const Instruction *I);
206	bool SelectIntExt(const Instruction *I);
207	bool SelectShift(const Instruction *I, ARM_AM::ShiftOpc ShiftTy);
208
209	// Utility routines.
210
211	bool isPositionIndependent() const;
212	bool isTypeLegal(Type *Ty, MVT &VT);
213	bool isLoadTypeLegal(Type *Ty, MVT &VT);
214	bool ARMEmitCmp(const Value Src1Value, const* Value *Src2Value,
215	bool isZExt);
216	bool ARMEmitLoad(MVT VT, Register &ResultReg, Address &Addr,
217	MaybeAlign Alignment = std::nullopt, bool isZExt = true,
218	bool allocReg = true);
219	bool ARMEmitStore(MVT VT, Register SrcReg, Address &Addr,
220	MaybeAlign Alignment = std::nullopt);
221	bool ARMComputeAddress(const Value *Obj, Address &Addr);
222	void ARMSimplifyAddress(Address &Addr, MVT VT, bool useAM3);
223	bool ARMIsMemCpySmall(uint64_t Len);
224	bool ARMTryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
225	MaybeAlign Alignment);
226	Register ARMEmitIntExt(MVT SrcVT, Register SrcReg, MVT DestVT, bool isZExt);
227	Register ARMMaterializeFP(const ConstantFP *CFP, MVT VT);
228	Register ARMMaterializeInt(const Constant *C, MVT VT);
229	Register ARMMaterializeGV(const GlobalValue *GV, MVT VT);
230	Register ARMMoveToFPReg(MVT VT, Register SrcReg);
231	Register ARMMoveToIntReg(MVT VT, Register SrcReg);
232	unsigned ARMSelectCallOp(bool UseReg);
233	Register ARMLowerPICELF(const GlobalValue *GV, MVT VT);
234
235	const TargetLowering getTargetLowering() { return* &TLI; }
236
237	// Call handling routines.
238
239	CCAssignFn *CCAssignFnForCall(CallingConv::ID CC,
240	bool Return,
241	bool isVarArg);
242	bool ProcessCallArgs(SmallVectorImpl<Value*> &Args,
243	SmallVectorImpl<Register> &ArgRegs,
244	SmallVectorImpl<MVT> &ArgVTs,
245	SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
246	SmallVectorImpl<Register> &RegArgs,
247	CallingConv::ID CC,
248	unsigned &NumBytes,
249	bool isVarArg);
250	Register getLibcallReg(const Twine &Name);
251	bool FinishCall(MVT RetVT, SmallVectorImpl<Register> &UsedRegs,
252	const Instruction *I, CallingConv::ID CC,
253	unsigned &NumBytes, bool isVarArg);
254	bool ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call);
255
256	// OptionalDef handling routines.
257
258	bool isARMNEONPred(const MachineInstr *MI);
259	bool DefinesOptionalPredicate(MachineInstr MI, bool* *CPSR);
260	const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB);
261	void AddLoadStoreOperands(MVT VT, Address &Addr,
262	const MachineInstrBuilder &MIB,
263	MachineMemOperand::Flags Flags, bool useAM3);
264	};
265
266	} // end anonymous namespace
267
268	// DefinesOptionalPredicate - This is different from DefinesPredicate in that
269	// we don't care about implicit defs here, just places we'll need to add a
270	// default CCReg argument. Sets CPSR if we're setting CPSR instead of CCR.
271	bool ARMFastISel::DefinesOptionalPredicate(MachineInstr MI, bool* *CPSR) {
272	if (!MI->hasOptionalDef())
273	return false;
274
275	// Look to see if our OptionalDef is defining CPSR or CCR.
276	for (const MachineOperand &MO : MI->operands()) {
277	if (!MO.isReg() \|\| !MO.isDef()) continue;
278	if (MO.getReg() == ARM::CPSR)
279	CPSR = true*;
280	}
281	return true;
282	}
283
284	bool ARMFastISel::isARMNEONPred(const MachineInstr *MI) {
285	const MCInstrDesc &MCID = MI->getDesc();
286
287	// If we're a thumb2 or not NEON function we'll be handled via isPredicable.
288	if ((MCID.TSFlags & ARMII::DomainMask) != ARMII::DomainNEON \|\|
289	AFI->isThumb2Function())
290	return MI->isPredicable();
291
292	for (const MCOperandInfo &opInfo : MCID.operands())
293	if (opInfo.isPredicate())
294	return true;
295
296	return false;
297	}
298
299	// If the machine is predicable go ahead and add the predicate operands, if
300	// it needs default CC operands add those.
301	// TODO: If we want to support thumb1 then we'll need to deal with optional
302	// CPSR defs that need to be added before the remaining operands. See s_cc_out
303	// for descriptions why.
304	const MachineInstrBuilder &
305	ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) {
306	MachineInstr MI = &MIB;
307
308	// Do we use a predicate? or...
309	// Are we NEON in ARM mode and have a predicate operand? If so, I know
310	// we're not predicable but add it anyways.
311	if (isARMNEONPred(MI))
312	MIB.add(MOs: predOps(Pred: ARMCC::AL));
313
314	// Do we optionally set a predicate? Preds is size > 0 iff the predicate
315	// defines CPSR. All other OptionalDefines in ARM are the CCR register.
316	bool CPSR = false;
317	if (DefinesOptionalPredicate(MI, CPSR: &CPSR))
318	MIB.add(MO: CPSR ? t1CondCodeOp() : condCodeOp());
319	return MIB;
320	}
321
322	Register ARMFastISel::fastEmitInst_r(unsigned MachineInstOpcode,
323	const TargetRegisterClass *RC,
324	Register Op0) {
325	Register ResultReg = createResultReg(RC);
326	const MCInstrDesc &II = TII.get(Opcode: MachineInstOpcode);
327
328	// Make sure the input operand is sufficiently constrained to be legal
329	// for this instruction.
330	Op0 = constrainOperandRegClass(II, Op: Op0, OpNum: `1`);
331	if (II.getNumDefs() >= `1`) {
332	AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II,
333	DestReg: ResultReg).addReg(RegNo: Op0));
334	} else {
335	AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II)
336	.addReg(RegNo: Op0));
337	AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
338	MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: ResultReg)
339	.addReg(RegNo: II.implicit_defs()[`0`]));
340	}
341	return ResultReg;
342	}
343
344	Register ARMFastISel::fastEmitInst_rr(unsigned MachineInstOpcode,
345	const TargetRegisterClass *RC,
346	Register Op0, Register Op1) {
347	Register ResultReg = createResultReg(RC);
348	const MCInstrDesc &II = TII.get(Opcode: MachineInstOpcode);
349
350	// Make sure the input operands are sufficiently constrained to be legal
351	// for this instruction.
352	Op0 = constrainOperandRegClass(II, Op: Op0, OpNum: `1`);
353	Op1 = constrainOperandRegClass(II, Op: Op1, OpNum: `2`);
354
355	if (II.getNumDefs() >= `1`) {
356	AddOptionalDefs(
357	MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II, DestReg: ResultReg)
358	.addReg(RegNo: Op0)
359	.addReg(RegNo: Op1));
360	} else {
361	AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II)
362	.addReg(RegNo: Op0)
363	.addReg(RegNo: Op1));
364	AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
365	MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: ResultReg)
366	.addReg(RegNo: II.implicit_defs()[`0`]));
367	}
368	return ResultReg;
369	}
370
371	Register ARMFastISel::fastEmitInst_ri(unsigned MachineInstOpcode,
372	const TargetRegisterClass *RC,
373	Register Op0, uint64_t Imm) {
374	Register ResultReg = createResultReg(RC);
375	const MCInstrDesc &II = TII.get(Opcode: MachineInstOpcode);
376
377	// Make sure the input operand is sufficiently constrained to be legal
378	// for this instruction.
379	Op0 = constrainOperandRegClass(II, Op: Op0, OpNum: `1`);
380	if (II.getNumDefs() >= `1`) {
381	AddOptionalDefs(
382	MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II, DestReg: ResultReg)
383	.addReg(RegNo: Op0)
384	.addImm(Val: Imm));
385	} else {
386	AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II)
387	.addReg(RegNo: Op0)
388	.addImm(Val: Imm));
389	AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
390	MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: ResultReg)
391	.addReg(RegNo: II.implicit_defs()[`0`]));
392	}
393	return ResultReg;
394	}
395
396	Register ARMFastISel::fastEmitInst_i(unsigned MachineInstOpcode,
397	const TargetRegisterClass *RC,
398	uint64_t Imm) {
399	Register ResultReg = createResultReg(RC);
400	const MCInstrDesc &II = TII.get(Opcode: MachineInstOpcode);
401
402	if (II.getNumDefs() >= `1`) {
403	AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II,
404	DestReg: ResultReg).addImm(Val: Imm));
405	} else {
406	AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II)
407	.addImm(Val: Imm));
408	AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
409	MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: ResultReg)
410	.addReg(RegNo: II.implicit_defs()[`0`]));
411	}
412	return ResultReg;
413	}
414
415	// TODO: Don't worry about 64-bit now, but when this is fixed remove the
416	// checks from the various callers.
417	Register ARMFastISel::ARMMoveToFPReg(MVT VT, Register SrcReg) {
418	if (VT == MVT::f64)
419	return Register ();
420
421	Register MoveReg = createResultReg(RC: TLI.getRegClassFor(VT));
422	AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
423	MCID: TII.get(Opcode: ARM::VMOVSR), DestReg: MoveReg)
424	.addReg(RegNo: SrcReg));
425	return MoveReg;
426	}
427
428	Register ARMFastISel::ARMMoveToIntReg(MVT VT, Register SrcReg) {
429	if (VT == MVT::i64)
430	return Register ();
431
432	Register MoveReg = createResultReg(RC: TLI.getRegClassFor(VT));
433	AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
434	MCID: TII.get(Opcode: ARM::VMOVRS), DestReg: MoveReg)
435	.addReg(RegNo: SrcReg));
436	return MoveReg;
437	}
438
439	// For double width floating point we need to materialize two constants
440	// (the high and the low) into integer registers then use a move to get
441	// the combined constant into an FP reg.
442	Register ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, MVT VT) {
443	if (VT != MVT::f32 && VT != MVT::f64)
444	return Register ();
445
446	const APFloat Val = CFP->getValueAPF();
447	bool is64bit = VT == MVT::f64;
448
449	// This checks to see if we can use VFP3 instructions to materialize
450	// a constant, otherwise we have to go through the constant pool.
451	if (TLI.isFPImmLegal(Imm: Val, VT)) {
452	int Imm;
453	unsigned Opc;
454	if (is64bit) {
455	Imm = ARM_AM::getFP64Imm(FPImm: Val);
456	Opc = ARM::FCONSTD;
457	} else {
458	Imm = ARM_AM::getFP32Imm(FPImm: Val);
459	Opc = ARM::FCONSTS;
460	}
461	Register DestReg = createResultReg(RC: TLI.getRegClassFor(VT));
462	AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
463	MCID: TII.get(Opcode: Opc), DestReg).addImm(Val: Imm));
464	return DestReg;
465	}
466
467	// Require VFP2 for loading fp constants.
468	if (!Subtarget->hasVFP2Base()) return false;
469
470	// MachineConstantPool wants an explicit alignment.
471	Align Alignment = DL.getPrefTypeAlign(Ty: CFP->getType());
472	unsigned Idx = MCP.getConstantPoolIndex(C: cast<Constant>(Val: CFP), Alignment);
473	Register DestReg = createResultReg(RC: TLI.getRegClassFor(VT));
474	unsigned Opc = is64bit ? ARM::VLDRD : ARM::VLDRS;
475
476	// The extra reg is for addrmode5.
477	AddOptionalDefs(
478	MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg)
479	.addConstantPoolIndex(Idx)
480	.addReg(RegNo: `0`));
481	return DestReg;
482	}
483
484	Register ARMFastISel::ARMMaterializeInt(const Constant *C, MVT VT) {
485	if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 && VT != MVT::i1)
486	return Register ();
487
488	// If we can do this in a single instruction without a constant pool entry
489	// do so now.
490	const ConstantInt *CI = cast<ConstantInt>(Val: C);
491	if (Subtarget->hasV6T2Ops() && isUInt<`16`>(x: CI->getZExtValue())) {
492	unsigned Opc = isThumb2 ? ARM::t2MOVi16 : ARM::MOVi16;
493	const TargetRegisterClass *RC = isThumb2 ? &ARM::rGPRRegClass :
494	&ARM::GPRRegClass;
495	Register ImmReg = createResultReg(RC);
496	AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
497	MCID: TII.get(Opcode: Opc), DestReg: ImmReg)
498	.addImm(Val: CI->getZExtValue()));
499	return ImmReg;
500	}
501
502	// Use MVN to emit negative constants.
503	if (VT == MVT::i32 && Subtarget->hasV6T2Ops() && CI->isNegative()) {
504	unsigned Imm = (unsigned)~(CI->getSExtValue());
505	bool UseImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Arg: Imm) != -`1`) :
506	(ARM_AM::getSOImmVal(Arg: Imm) != -`1`);
507	if (UseImm) {
508	unsigned Opc = isThumb2 ? ARM::t2MVNi : ARM::MVNi;
509	const TargetRegisterClass *RC = isThumb2 ? &ARM::rGPRRegClass :
510	&ARM::GPRRegClass;
511	Register ImmReg = createResultReg(RC);
512	AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
513	MCID: TII.get(Opcode: Opc), DestReg: ImmReg)
514	.addImm(Val: Imm));
515	return ImmReg;
516	}
517	}
518
519	Register ResultReg;
520	if (Subtarget->useMovt())
521	ResultReg = fastEmit_i(VT, RetVT: VT, Opcode: ISD::Constant, imm0: CI->getZExtValue());
522
523	if (ResultReg)
524	return ResultReg;
525
526	// Load from constant pool. For now 32-bit only.
527	if (VT != MVT::i32)
528	return Register ();
529
530	// MachineConstantPool wants an explicit alignment.
531	Align Alignment = DL.getPrefTypeAlign(Ty: C->getType());
532	unsigned Idx = MCP.getConstantPoolIndex(C, Alignment);
533	ResultReg = createResultReg(RC: TLI.getRegClassFor(VT));
534	if (isThumb2)
535	AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
536	MCID: TII.get(Opcode: ARM::t2LDRpci), DestReg: ResultReg)
537	.addConstantPoolIndex(Idx));
538	else {
539	// The extra immediate is for addrmode2.
540	ResultReg = constrainOperandRegClass(II: TII.get(Opcode: ARM::LDRcp), Op: ResultReg, OpNum: `0`);
541	AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
542	MCID: TII.get(Opcode: ARM::LDRcp), DestReg: ResultReg)
543	.addConstantPoolIndex(Idx)
544	.addImm(Val: `0`));
545	}
546	return ResultReg;
547	}
548
549	bool ARMFastISel::isPositionIndependent() const {
550	return TLI.isPositionIndependent();
551	}
552
553	Register ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) {
554	// For now 32-bit only.
555	if (VT != MVT::i32 \|\| GV->isThreadLocal())
556	return Register ();
557
558	// ROPI/RWPI not currently supported.
559	if (Subtarget->isROPI() \|\| Subtarget->isRWPI())
560	return Register ();
561
562	bool IsIndirect = Subtarget->isGVIndirectSymbol(GV);
563	const TargetRegisterClass *RC = isThumb2 ? &ARM::rGPRRegClass
564	: &ARM::GPRRegClass;
565	Register DestReg = createResultReg(RC);
566
567	// FastISel TLS support on non-MachO is broken, punt to SelectionDAG.
568	const GlobalVariable *GVar = dyn_cast<GlobalVariable>(Val: GV);
569	bool IsThreadLocal = GVar && GVar->isThreadLocal();
570	if (!Subtarget->isTargetMachO() && IsThreadLocal)
571	return Register ();
572
573	bool IsPositionIndependent = isPositionIndependent();
574	// Use movw+movt when possible, it avoids constant pool entries.
575	// Non-darwin targets only support static movt relocations in FastISel.
576	if (Subtarget->useMovt() &&
577	(Subtarget->isTargetMachO() \|\| !IsPositionIndependent)) {
578	unsigned Opc;
579	unsigned char TF = `0`;
580	if (Subtarget->isTargetMachO())
581	TF = ARMII::MO_NONLAZY;
582
583	if (IsPositionIndependent)
584	Opc = isThumb2 ? ARM::t2MOV_ga_pcrel : ARM::MOV_ga_pcrel;
585	else
586	Opc = isThumb2 ? ARM::t2MOVi32imm : ARM::MOVi32imm;
587	AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
588	MCID: TII.get(Opcode: Opc), DestReg).addGlobalAddress(GV, Offset: `0`, TargetFlags: TF));
589	} else {
590	// MachineConstantPool wants an explicit alignment.
591	Align Alignment = DL.getPrefTypeAlign(Ty: GV->getType());
592
593	if (Subtarget->isTargetELF() && IsPositionIndependent)
594	return ARMLowerPICELF(GV, VT);
595
596	// Grab index.
597	unsigned PCAdj = IsPositionIndependent ? (Subtarget->isThumb() ? `4` : `8`) : `0`;
598	unsigned Id = AFI->createPICLabelUId();
599	ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(C: GV, ID: Id,
600	Kind: ARMCP::CPValue,
601	PCAdj);
602	unsigned Idx = MCP.getConstantPoolIndex(V: CPV, Alignment);
603
604	// Load value.
605	MachineInstrBuilder MIB;
606	if (isThumb2) {
607	unsigned Opc = IsPositionIndependent ? ARM::t2LDRpci_pic : ARM::t2LDRpci;
608	MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc),
609	DestReg).addConstantPoolIndex(Idx);
610	if (IsPositionIndependent)
611	MIB.addImm(Val: Id);
612	AddOptionalDefs(MIB);
613	} else {
614	// The extra immediate is for addrmode2.
615	DestReg = constrainOperandRegClass(II: TII.get(Opcode: ARM::LDRcp), Op: DestReg, OpNum: `0`);
616	MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
617	MCID: TII.get(Opcode: ARM::LDRcp), DestReg)
618	.addConstantPoolIndex(Idx)
619	.addImm(Val: `0`);
620	AddOptionalDefs(MIB);
621
622	if (IsPositionIndependent) {
623	unsigned Opc = IsIndirect ? ARM::PICLDR : ARM::PICADD;
624	Register NewDestReg = createResultReg(RC: TLI.getRegClassFor(VT));
625
626	MachineInstrBuilder MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt,
627	MIMD, MCID: TII.get(Opcode: Opc), DestReg: NewDestReg)
628	.addReg(RegNo: DestReg)
629	.addImm(Val: Id);
630	AddOptionalDefs(MIB);
631	return NewDestReg;
632	}
633	}
634	}
635
636	if ((Subtarget->isTargetELF() && Subtarget->isGVInGOT(GV)) \|\|
637	(Subtarget->isTargetMachO() && IsIndirect)) {
638	MachineInstrBuilder MIB;
639	Register NewDestReg = createResultReg(RC: TLI.getRegClassFor(VT));
640	if (isThumb2)
641	MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
642	MCID: TII.get(Opcode: ARM::t2LDRi12), DestReg: NewDestReg)
643	.addReg(RegNo: DestReg)
644	.addImm(Val: `0`);
645	else
646	MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
647	MCID: TII.get(Opcode: ARM::LDRi12), DestReg: NewDestReg)
648	.addReg(RegNo: DestReg)
649	.addImm(Val: `0`);
650	DestReg = NewDestReg;
651	AddOptionalDefs(MIB);
652	}
653
654	return DestReg;
655	}
656
657	Register ARMFastISel::fastMaterializeConstant(const Constant *C) {
658	EVT CEVT = TLI.getValueType(DL, Ty: C->getType(), AllowUnknown: true);
659
660	// Only handle simple types.
661	if (!CEVT.isSimple())
662	return Register ();
663	MVT VT = CEVT.getSimpleVT();
664
665	if (const ConstantFP *CFP = dyn_cast<ConstantFP>(Val: C))
666	return ARMMaterializeFP(CFP, VT);
667	else if (const GlobalValue *GV = dyn_cast<GlobalValue>(Val: C))
668	return ARMMaterializeGV(GV, VT);
669	else if (isa<ConstantInt>(Val: C))
670	return ARMMaterializeInt(C, VT);
671
672	return Register ();
673	}
674
675	// TODO: Register ARMFastISel::TargetMaterializeFloatZero(const ConstantFP CF);*
676
677	Register ARMFastISel::fastMaterializeAlloca(const AllocaInst *AI) {
678	// Don't handle dynamic allocas.
679	if (!FuncInfo.StaticAllocaMap.count(Val: AI))
680	return Register ();
681
682	MVT VT;
683	if (!isLoadTypeLegal(Ty: AI->getType(), VT))
684	return Register ();
685
686	DenseMap<const AllocaInst, int*>::iterator SI =
687	FuncInfo.StaticAllocaMap.find(Val: AI);
688
689	// This will get lowered later into the correct offsets and registers
690	// via rewriteXFrameIndex.
691	if (SI != FuncInfo.StaticAllocaMap.end()) {
692	unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri;
693	const TargetRegisterClass* RC = TLI.getRegClassFor(VT);
694	Register ResultReg = createResultReg(RC);
695	ResultReg = constrainOperandRegClass(II: TII.get(Opcode: Opc), Op: ResultReg, OpNum: `0`);
696
697	AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
698	MCID: TII.get(Opcode: Opc), DestReg: ResultReg)
699	.addFrameIndex(Idx: SI ->second)
700	.addImm(Val: `0`));
701	return ResultReg;
702	}
703
704	return Register ();
705	}
706
707	bool ARMFastISel::isTypeLegal(Type *Ty, MVT &VT) {
708	EVT evt = TLI.getValueType(DL, Ty, AllowUnknown: true);
709
710	// Only handle simple types.
711	if (evt == MVT::Other \|\| !evt.isSimple()) return false;
712	VT = evt.getSimpleVT();
713
714	// Handle all legal types, i.e. a register that will directly hold this
715	// value.
716	return TLI.isTypeLegal(VT);
717	}
718
719	bool ARMFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) {
720	if (isTypeLegal(Ty, VT)) return true;
721
722	// If this is a type than can be sign or zero-extended to a basic operation
723	// go ahead and accept it now.
724	if (VT == MVT::i1 \|\| VT == MVT::i8 \|\| VT == MVT::i16)
725	return true;
726
727	return false;
728	}
729
730	// Computes the address to get to an object.
731	bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) {
732	// Some boilerplate from the X86 FastISel.
733	const User U = nullptr*;
734	unsigned Opcode = Instruction::UserOp1;
735	if (const Instruction *I = dyn_cast<Instruction>(Val: Obj)) {
736	// Don't walk into other basic blocks unless the object is an alloca from
737	// another block, otherwise it may not have a virtual register assigned.
738	if (FuncInfo.StaticAllocaMap.count(Val: static_cast<const AllocaInst *>(Obj)) \|\|
739	FuncInfo.getMBB(BB: I->getParent()) == FuncInfo.MBB) {
740	Opcode = I->getOpcode();
741	U = I;
742	}
743	} else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Val: Obj)) {
744	Opcode = C->getOpcode();
745	U = C;
746	}
747
748	if (PointerType *Ty = dyn_cast<PointerType>(Val: Obj->getType()))
749	if (Ty->getAddressSpace() > `255`)
750	// Fast instruction selection doesn't support the special
751	// address spaces.
752	return false;
753
754	switch (Opcode) {
755	default:
756	break;
757	case Instruction::BitCast:
758	// Look through bitcasts.
759	return ARMComputeAddress(Obj: U->getOperand(i: `0`), Addr);
760	case Instruction::IntToPtr:
761	// Look past no-op inttoptrs.
762	if (TLI.getValueType(DL, Ty: U->getOperand(i: `0`)->getType()) ==
763	TLI.getPointerTy(DL))
764	return ARMComputeAddress(Obj: U->getOperand(i: `0`), Addr);
765	break;
766	case Instruction::PtrToInt:
767	// Look past no-op ptrtoints.
768	if (TLI.getValueType(DL, Ty: U->getType()) == TLI.getPointerTy(DL))
769	return ARMComputeAddress(Obj: U->getOperand(i: `0`), Addr);
770	break;
771	case Instruction::GetElementPtr: {
772	Address SavedAddr = Addr;
773	int TmpOffset = Addr.getOffset();
774
775	// Iterate through the GEP folding the constants into offsets where
776	// we can.
777	gep_type_iterator GTI = gep_type_begin(GEP: U);
778	for (User::const_op_iterator i = U->op_begin() + `1`, e = U->op_end();
779	i != e; ++i, ++GTI) {
780	const Value Op = i;
781	if (StructType *STy = GTI.getStructTypeOrNull()) {
782	const StructLayout *SL = DL.getStructLayout(Ty: STy);
783	unsigned Idx = cast<ConstantInt>(Val: Op)->getZExtValue();
784	TmpOffset += SL->getElementOffset(Idx);
785	} else {
786	uint64_t S = GTI.getSequentialElementStride(DL);
787	while (true) {
788	if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val: Op)) {
789	// Constant-offset addressing.
790	TmpOffset += CI->getSExtValue() * S;
791	break;
792	}
793	if (canFoldAddIntoGEP(GEP: U, Add: Op)) {
794	// A compatible add with a constant operand. Fold the constant.
795	ConstantInt *CI =
796	cast<ConstantInt>(Val: cast<AddOperator>(Val: Op)->getOperand(i_nocapture: `1`));
797	TmpOffset += CI->getSExtValue() * S;
798	// Iterate on the other operand.
799	Op = cast<AddOperator>(Val: Op)->getOperand(i_nocapture: `0`);
800	continue;
801	}
802	// Unsupported
803	goto unsupported_gep;
804	}
805	}
806	}
807
808	// Try to grab the base operand now.
809	Addr.setOffset(TmpOffset);
810	if (ARMComputeAddress(Obj: U->getOperand(i: `0`), Addr)) return true;
811
812	// We failed, restore everything and try the other options.
813	Addr = SavedAddr;
814
815	unsupported_gep:
816	break;
817	}
818	case Instruction::Alloca: {
819	const AllocaInst *AI = cast<AllocaInst>(Val: Obj);
820	DenseMap<const AllocaInst, int*>::iterator SI =
821	FuncInfo.StaticAllocaMap.find(Val: AI);
822	if (SI != FuncInfo.StaticAllocaMap.end()) {
823	Addr.setKind(Address::FrameIndexBase);
824	Addr.setFI(SI ->second);
825	return true;
826	}
827	break;
828	}
829	}
830
831	// Try to get this in a register if nothing else has worked.
832	if (!Addr.getReg())
833	Addr.setReg(getRegForValue(V: Obj));
834	return Addr.getReg();
835	}
836
837	void ARMFastISel::ARMSimplifyAddress(Address &Addr, MVT VT, bool useAM3) {
838	bool needsLowering = false;
839	switch (VT.SimpleTy) {
840	default: llvm_unreachable("Unhandled load/store type!");
841	case MVT::i1:
842	case MVT::i8:
843	case MVT::i16:
844	case MVT::i32:
845	if (!useAM3) {
846	// Integer loads/stores handle 12-bit offsets.
847	needsLowering = ((Addr.getOffset() & `0xfff`) != Addr.getOffset());
848	// Handle negative offsets.
849	if (needsLowering && isThumb2)
850	needsLowering = !(Subtarget->hasV6T2Ops() && Addr.getOffset() < `0` &&
851	Addr.getOffset() > -`256`);
852	} else {
853	// ARM halfword load/stores and signed byte loads use +/-imm8 offsets.
854	needsLowering = (Addr.getOffset() > `255` \|\| Addr.getOffset() < -`255`);
855	}
856	break;
857	case MVT::f32:
858	case MVT::f64:
859	// Floating point operands handle 8-bit offsets.
860	needsLowering = ((Addr.getOffset() & `0xff`) != Addr.getOffset());
861	break;
862	}
863
864	// If this is a stack pointer and the offset needs to be simplified then
865	// put the alloca address into a register, set the base type back to
866	// register and continue. This should almost never happen.
867	if (needsLowering && Addr.isFIBase()) {
868	const TargetRegisterClass *RC = isThumb2 ? &ARM::tGPRRegClass
869	: &ARM::GPRRegClass;
870	Register ResultReg = createResultReg(RC);
871	unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri;
872	AddOptionalDefs(
873	MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg: ResultReg)
874	.addFrameIndex(Idx: Addr.getFI())
875	.addImm(Val: `0`));
876	Addr.setKind(Address::RegBase);
877	Addr.setReg(ResultReg);
878	}
879
880	// Since the offset is too large for the load/store instruction
881	// get the reg+offset into a register.
882	if (needsLowering) {
883	Addr.setReg(fastEmit_ri_(VT: MVT::i32, Opcode: ISD::ADD, Op0: Addr.getReg(),
884	Imm: Addr.getOffset(), ImmType: MVT::i32));
885	Addr.setOffset(`0`);
886	}
887	}
888
889	void ARMFastISel::AddLoadStoreOperands(MVT VT, Address &Addr,
890	const MachineInstrBuilder &MIB,
891	MachineMemOperand::Flags Flags,
892	bool useAM3) {
893	// addrmode5 output depends on the selection dag addressing dividing the
894	// offset by 4 that it then later multiplies. Do this here as well.
895	if (VT.SimpleTy == MVT::f32 \|\| VT.SimpleTy == MVT::f64)
896	Addr.setOffset(Addr.getOffset() / `4`);
897
898	// Frame base works a bit differently. Handle it separately.
899	if (Addr.isFIBase()) {
900	int FI = Addr.getFI();
901	int Offset = Addr.getOffset();
902	MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
903	PtrInfo: MachinePointerInfo::getFixedStack(MF&: *FuncInfo.MF, FI, Offset), F: Flags,
904	Size: MFI.getObjectSize(ObjectIdx: FI), BaseAlignment: MFI.getObjectAlign(ObjectIdx: FI));
905	// Now add the rest of the operands.
906	MIB.addFrameIndex(Idx: FI);
907
908	// ARM halfword load/stores and signed byte loads need an additional
909	// operand.
910	if (useAM3) {
911	int Imm = (Addr.getOffset() < `0`) ? (`0x100` \| -Addr.getOffset())
912	: Addr.getOffset();
913	MIB.addReg(RegNo: `0`);
914	MIB.addImm(Val: Imm);
915	} else {
916	MIB.addImm(Val: Addr.getOffset());
917	}
918	MIB.addMemOperand(MMO);
919	} else {
920	// Now add the rest of the operands.
921	MIB.addReg(RegNo: Addr.getReg());
922
923	// ARM halfword load/stores and signed byte loads need an additional
924	// operand.
925	if (useAM3) {
926	int Imm = (Addr.getOffset() < `0`) ? (`0x100` \| -Addr.getOffset())
927	: Addr.getOffset();
928	MIB.addReg(RegNo: `0`);
929	MIB.addImm(Val: Imm);
930	} else {
931	MIB.addImm(Val: Addr.getOffset());
932	}
933	}
934	AddOptionalDefs(MIB);
935	}
936
937	bool ARMFastISel::ARMEmitLoad(MVT VT, Register &ResultReg, Address &Addr,
938	MaybeAlign Alignment, bool isZExt,
939	bool allocReg) {
940	unsigned Opc;
941	bool useAM3 = false;
942	bool needVMOV = false;
943	const TargetRegisterClass *RC;
944	switch (VT.SimpleTy) {
945	// This is mostly going to be Neon/vector support.
946	default: return false;
947	case MVT::i1:
948	case MVT::i8:
949	if (isThumb2) {
950	if (Addr.getOffset() < `0` && Addr.getOffset() > -`256` &&
951	Subtarget->hasV6T2Ops())
952	Opc = isZExt ? ARM::t2LDRBi8 : ARM::t2LDRSBi8;
953	else
954	Opc = isZExt ? ARM::t2LDRBi12 : ARM::t2LDRSBi12;
955	} else {
956	if (isZExt) {
957	Opc = ARM::LDRBi12;
958	} else {
959	Opc = ARM::LDRSB;
960	useAM3 = true;
961	}
962	}
963	RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
964	break;
965	case MVT::i16:
966	if (Alignment && *Alignment < Align (`2`) &&
967	!Subtarget->allowsUnalignedMem())
968	return false;
969
970	if (isThumb2) {
971	if (Addr.getOffset() < `0` && Addr.getOffset() > -`256` &&
972	Subtarget->hasV6T2Ops())
973	Opc = isZExt ? ARM::t2LDRHi8 : ARM::t2LDRSHi8;
974	else
975	Opc = isZExt ? ARM::t2LDRHi12 : ARM::t2LDRSHi12;
976	} else {
977	Opc = isZExt ? ARM::LDRH : ARM::LDRSH;
978	useAM3 = true;
979	}
980	RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
981	break;
982	case MVT::i32:
983	if (Alignment && *Alignment < Align (`4`) &&
984	!Subtarget->allowsUnalignedMem())
985	return false;
986
987	if (isThumb2) {
988	if (Addr.getOffset() < `0` && Addr.getOffset() > -`256` &&
989	Subtarget->hasV6T2Ops())
990	Opc = ARM::t2LDRi8;
991	else
992	Opc = ARM::t2LDRi12;
993	} else {
994	Opc = ARM::LDRi12;
995	}
996	RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
997	break;
998	case MVT::f32:
999	if (!Subtarget->hasVFP2Base()) return false;
1000	// Unaligned loads need special handling. Floats require word-alignment.
1001	if (Alignment && *Alignment < Align (`4`)) {
1002	needVMOV = true;
1003	VT = MVT::i32;
1004	Opc = isThumb2 ? ARM::t2LDRi12 : ARM::LDRi12;
1005	RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
1006	} else {
1007	Opc = ARM::VLDRS;
1008	RC = TLI.getRegClassFor(VT);
1009	}
1010	break;
1011	case MVT::f64:
1012	// Can load and store double precision even without FeatureFP64
1013	if (!Subtarget->hasVFP2Base()) return false;
1014	// FIXME: Unaligned loads need special handling. Doublewords require
1015	// word-alignment.
1016	if (Alignment && *Alignment < Align (`4`))
1017	return false;
1018
1019	Opc = ARM::VLDRD;
1020	RC = TLI.getRegClassFor(VT);
1021	break;
1022	}
1023	// Simplify this down to something we can handle.
1024	ARMSimplifyAddress(Addr, VT, useAM3);
1025
1026	// Create the base instruction, then add the operands.
1027	if (allocReg)
1028	ResultReg = createResultReg(RC);
1029	assert(ResultReg.isVirtual() && "Expected an allocated virtual register.");
1030	MachineInstrBuilder MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1031	MCID: TII.get(Opcode: Opc), DestReg: ResultReg);
1032	AddLoadStoreOperands(VT, Addr, MIB, Flags: MachineMemOperand::MOLoad, useAM3);
1033
1034	// If we had an unaligned load of a float we've converted it to an regular
1035	// load. Now we must move from the GRP to the FP register.
1036	if (needVMOV) {
1037	Register MoveReg = createResultReg(RC: TLI.getRegClassFor(VT: MVT::f32));
1038	AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1039	MCID: TII.get(Opcode: ARM::VMOVSR), DestReg: MoveReg)
1040	.addReg(RegNo: ResultReg));
1041	ResultReg = MoveReg;
1042	}
1043	return true;
1044	}
1045
1046	bool ARMFastISel::SelectLoad(const Instruction *I) {
1047	// Atomic loads need special handling.
1048	if (cast<LoadInst>(Val: I)->isAtomic())
1049	return false;
1050
1051	const Value *SV = I->getOperand(i: `0`);
1052	if (TLI.supportSwiftError()) {
1053	// Swifterror values can come from either a function parameter with
1054	// swifterror attribute or an alloca with swifterror attribute.
1055	if (const Argument *Arg = dyn_cast<Argument>(Val: SV)) {
1056	if (Arg->hasSwiftErrorAttr())
1057	return false;
1058	}
1059
1060	if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(Val: SV)) {
1061	if (Alloca->isSwiftError())
1062	return false;
1063	}
1064	}
1065
1066	// Verify we have a legal type before going any further.
1067	MVT VT;
1068	if (!isLoadTypeLegal(Ty: I->getType(), VT))
1069	return false;
1070
1071	// See if we can handle this address.
1072	Address Addr;
1073	if (!ARMComputeAddress(Obj: I->getOperand(i: `0`), Addr)) return false;
1074
1075	Register ResultReg;
1076	if (!ARMEmitLoad(VT, ResultReg, Addr, Alignment: cast<LoadInst>(Val: I)->getAlign()))
1077	return false;
1078	updateValueMap(I, Reg: ResultReg);
1079	return true;
1080	}
1081
1082	bool ARMFastISel::ARMEmitStore(MVT VT, Register SrcReg, Address &Addr,
1083	MaybeAlign Alignment) {
1084	unsigned StrOpc;
1085	bool useAM3 = false;
1086	switch (VT.SimpleTy) {
1087	// This is mostly going to be Neon/vector support.
1088	default: return false;
1089	case MVT::i1: {
1090	Register Res = createResultReg(RC: isThumb2 ? &ARM::tGPRRegClass
1091	: &ARM::GPRRegClass);
1092	unsigned Opc = isThumb2 ? ARM::t2ANDri : ARM::ANDri;
1093	SrcReg = constrainOperandRegClass(II: TII.get(Opcode: Opc), Op: SrcReg, OpNum: `1`);
1094	AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1095	MCID: TII.get(Opcode: Opc), DestReg: Res)
1096	.addReg(RegNo: SrcReg).addImm(Val: `1`));
1097	SrcReg = Res;
1098	[[fallthrough]];
1099	}
1100	case MVT::i8:
1101	if (isThumb2) {
1102	if (Addr.getOffset() < `0` && Addr.getOffset() > -`256` &&
1103	Subtarget->hasV6T2Ops())
1104	StrOpc = ARM::t2STRBi8;
1105	else
1106	StrOpc = ARM::t2STRBi12;
1107	} else {
1108	StrOpc = ARM::STRBi12;
1109	}
1110	break;
1111	case MVT::i16:
1112	if (Alignment && *Alignment < Align (`2`) &&
1113	!Subtarget->allowsUnalignedMem())
1114	return false;
1115
1116	if (isThumb2) {
1117	if (Addr.getOffset() < `0` && Addr.getOffset() > -`256` &&
1118	Subtarget->hasV6T2Ops())
1119	StrOpc = ARM::t2STRHi8;
1120	else
1121	StrOpc = ARM::t2STRHi12;
1122	} else {
1123	StrOpc = ARM::STRH;
1124	useAM3 = true;
1125	}
1126	break;
1127	case MVT::i32:
1128	if (Alignment && *Alignment < Align (`4`) &&
1129	!Subtarget->allowsUnalignedMem())
1130	return false;
1131
1132	if (isThumb2) {
1133	if (Addr.getOffset() < `0` && Addr.getOffset() > -`256` &&
1134	Subtarget->hasV6T2Ops())
1135	StrOpc = ARM::t2STRi8;
1136	else
1137	StrOpc = ARM::t2STRi12;
1138	} else {
1139	StrOpc = ARM::STRi12;
1140	}
1141	break;
1142	case MVT::f32:
1143	if (!Subtarget->hasVFP2Base()) return false;
1144	// Unaligned stores need special handling. Floats require word-alignment.
1145	if (Alignment && *Alignment < Align (`4`)) {
1146	Register MoveReg = createResultReg(RC: TLI.getRegClassFor(VT: MVT::i32));
1147	AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1148	MCID: TII.get(Opcode: ARM::VMOVRS), DestReg: MoveReg)
1149	.addReg(RegNo: SrcReg));
1150	SrcReg = MoveReg;
1151	VT = MVT::i32;
1152	StrOpc = isThumb2 ? ARM::t2STRi12 : ARM::STRi12;
1153	} else {
1154	StrOpc = ARM::VSTRS;
1155	}
1156	break;
1157	case MVT::f64:
1158	// Can load and store double precision even without FeatureFP64
1159	if (!Subtarget->hasVFP2Base()) return false;
1160	// FIXME: Unaligned stores need special handling. Doublewords require
1161	// word-alignment.
1162	if (Alignment && *Alignment < Align (`4`))
1163	return false;
1164
1165	StrOpc = ARM::VSTRD;
1166	break;
1167	}
1168	// Simplify this down to something we can handle.
1169	ARMSimplifyAddress(Addr, VT, useAM3);
1170
1171	// Create the base instruction, then add the operands.
1172	SrcReg = constrainOperandRegClass(II: TII.get(Opcode: StrOpc), Op: SrcReg, OpNum: `0`);
1173	MachineInstrBuilder MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1174	MCID: TII.get(Opcode: StrOpc))
1175	.addReg(RegNo: SrcReg);
1176	AddLoadStoreOperands(VT, Addr, MIB, Flags: MachineMemOperand::MOStore, useAM3);
1177	return true;
1178	}
1179
1180	bool ARMFastISel::SelectStore(const Instruction *I) {
1181	Value *Op0 = I->getOperand(i: `0`);
1182	Register SrcReg;
1183
1184	// Atomic stores need special handling.
1185	if (cast<StoreInst>(Val: I)->isAtomic())
1186	return false;
1187
1188	const Value *PtrV = I->getOperand(i: `1`);
1189	if (TLI.supportSwiftError()) {
1190	// Swifterror values can come from either a function parameter with
1191	// swifterror attribute or an alloca with swifterror attribute.
1192	if (const Argument *Arg = dyn_cast<Argument>(Val: PtrV)) {
1193	if (Arg->hasSwiftErrorAttr())
1194	return false;
1195	}
1196
1197	if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(Val: PtrV)) {
1198	if (Alloca->isSwiftError())
1199	return false;
1200	}
1201	}
1202
1203	// Verify we have a legal type before going any further.
1204	MVT VT;
1205	if (!isLoadTypeLegal(Ty: I->getOperand(i: `0`)->getType(), VT))
1206	return false;
1207
1208	// Get the value to be stored into a register.
1209	SrcReg = getRegForValue(V: Op0);
1210	if (!SrcReg)
1211	return false;
1212
1213	// See if we can handle this address.
1214	Address Addr;
1215	if (!ARMComputeAddress(Obj: I->getOperand(i: `1`), Addr))
1216	return false;
1217
1218	if (!ARMEmitStore(VT, SrcReg, Addr, Alignment: cast<StoreInst>(Val: I)->getAlign()))
1219	return false;
1220	return true;
1221	}
1222
1223	static ARMCC::CondCodes getComparePred(CmpInst::Predicate Pred) {
1224	switch (Pred) {
1225	// Needs two compares...
1226	case CmpInst::FCMP_ONE:
1227	case CmpInst::FCMP_UEQ:
1228	default:
1229	// AL is our "false" for now. The other two need more compares.
1230	return ARMCC::AL;
1231	case CmpInst::ICMP_EQ:
1232	case CmpInst::FCMP_OEQ:
1233	return ARMCC::EQ;
1234	case CmpInst::ICMP_SGT:
1235	case CmpInst::FCMP_OGT:
1236	return ARMCC::GT;
1237	case CmpInst::ICMP_SGE:
1238	case CmpInst::FCMP_OGE:
1239	return ARMCC::GE;
1240	case CmpInst::ICMP_UGT:
1241	case CmpInst::FCMP_UGT:
1242	return ARMCC::HI;
1243	case CmpInst::FCMP_OLT:
1244	return ARMCC::MI;
1245	case CmpInst::ICMP_ULE:
1246	case CmpInst::FCMP_OLE:
1247	return ARMCC::LS;
1248	case CmpInst::FCMP_ORD:
1249	return ARMCC::VC;
1250	case CmpInst::FCMP_UNO:
1251	return ARMCC::VS;
1252	case CmpInst::FCMP_UGE:
1253	return ARMCC::PL;
1254	case CmpInst::ICMP_SLT:
1255	case CmpInst::FCMP_ULT:
1256	return ARMCC::LT;
1257	case CmpInst::ICMP_SLE:
1258	case CmpInst::FCMP_ULE:
1259	return ARMCC::LE;
1260	case CmpInst::FCMP_UNE:
1261	case CmpInst::ICMP_NE:
1262	return ARMCC::NE;
1263	case CmpInst::ICMP_UGE:
1264	return ARMCC::HS;
1265	case CmpInst::ICMP_ULT:
1266	return ARMCC::LO;
1267	}
1268	}
1269
1270	bool ARMFastISel::SelectBranch(const Instruction *I) {
1271	const CondBrInst *BI = cast<CondBrInst>(Val: I);
1272	MachineBasicBlock *TBB = FuncInfo.getMBB(BB: BI->getSuccessor(i: `0`));
1273	MachineBasicBlock *FBB = FuncInfo.getMBB(BB: BI->getSuccessor(i: `1`));
1274
1275	// Simple branch support.
1276
1277	// If we can, avoid recomputing the compare - redoing it could lead to wonky
1278	// behavior.
1279	if (const CmpInst *CI = dyn_cast<CmpInst>(Val: BI->getCondition())) {
1280	if (CI->hasOneUse() && (CI->getParent() == I->getParent())) {
1281	// Get the compare predicate.
1282	// Try to take advantage of fallthrough opportunities.
1283	CmpInst::Predicate Predicate = CI->getPredicate();
1284	if (FuncInfo.MBB->isLayoutSuccessor(MBB: TBB)) {
1285	std::swap(a&: TBB, b&: FBB);
1286	Predicate = CmpInst::getInversePredicate(pred: Predicate);
1287	}
1288
1289	ARMCC::CondCodes ARMPred = getComparePred(Pred: Predicate);
1290
1291	// We may not handle every CC for now.
1292	if (ARMPred == ARMCC::AL) return false;
1293
1294	// Emit the compare.
1295	if (!ARMEmitCmp(Src1Value: CI->getOperand(i_nocapture: `0`), Src2Value: CI->getOperand(i_nocapture: `1`), isZExt: CI->isUnsigned()))
1296	return false;
1297
1298	unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
1299	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: BrOpc))
1300	.addMBB(MBB: TBB).addImm(Val: ARMPred).addReg(RegNo: ARM::CPSR);
1301	finishCondBranch(BranchBB: BI->getParent(), TrueMBB: TBB, FalseMBB: FBB);
1302	return true;
1303	}
1304	} else if (TruncInst *TI = dyn_cast<TruncInst>(Val: BI->getCondition())) {
1305	MVT SourceVT;
1306	if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
1307	(isLoadTypeLegal(Ty: TI->getOperand(i_nocapture: `0`)->getType(), VT&: SourceVT))) {
1308	unsigned TstOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri;
1309	Register OpReg = getRegForValue(V: TI->getOperand(i_nocapture: `0`));
1310	OpReg = constrainOperandRegClass(II: TII.get(Opcode: TstOpc), Op: OpReg, OpNum: `0`);
1311	AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1312	MCID: TII.get(Opcode: TstOpc))
1313	.addReg(RegNo: OpReg).addImm(Val: `1`));
1314
1315	unsigned CCMode = ARMCC::NE;
1316	if (FuncInfo.MBB->isLayoutSuccessor(MBB: TBB)) {
1317	std::swap(a&: TBB, b&: FBB);
1318	CCMode = ARMCC::EQ;
1319	}
1320
1321	unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
1322	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: BrOpc))
1323	.addMBB(MBB: TBB).addImm(Val: CCMode).addReg(RegNo: ARM::CPSR);
1324
1325	finishCondBranch(BranchBB: BI->getParent(), TrueMBB: TBB, FalseMBB: FBB);
1326	return true;
1327	}
1328	} else if (const ConstantInt *CI =
1329	dyn_cast<ConstantInt>(Val: BI->getCondition())) {
1330	uint64_t Imm = CI->getZExtValue();
1331	MachineBasicBlock *Target = (Imm == `0`) ? FBB : TBB;
1332	fastEmitBranch(MSucc: Target, DbgLoc: MIMD.getDL());
1333	return true;
1334	}
1335
1336	Register CmpReg = getRegForValue(V: BI->getCondition());
1337	if (!CmpReg)
1338	return false;
1339
1340	// We've been divorced from our compare! Our block was split, and
1341	// now our compare lives in a predecessor block. We musn't
1342	// re-compare here, as the children of the compare aren't guaranteed
1343	// live across the block boundary (we could* check for this).*
1344	// Regardless, the compare has been done in the predecessor block,
1345	// and it left a value for us in a virtual register. Ergo, we test
1346	// the one-bit value left in the virtual register.
1347	unsigned TstOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri;
1348	CmpReg = constrainOperandRegClass(II: TII.get(Opcode: TstOpc), Op: CmpReg, OpNum: `0`);
1349	AddOptionalDefs(
1350	MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: TstOpc))
1351	.addReg(RegNo: CmpReg)
1352	.addImm(Val: `1`));
1353
1354	unsigned CCMode = ARMCC::NE;
1355	if (FuncInfo.MBB->isLayoutSuccessor(MBB: TBB)) {
1356	std::swap(a&: TBB, b&: FBB);
1357	CCMode = ARMCC::EQ;
1358	}
1359
1360	unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
1361	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: BrOpc))
1362	.addMBB(MBB: TBB).addImm(Val: CCMode).addReg(RegNo: ARM::CPSR);
1363	finishCondBranch(BranchBB: BI->getParent(), TrueMBB: TBB, FalseMBB: FBB);
1364	return true;
1365	}
1366
1367	bool ARMFastISel::SelectIndirectBr(const Instruction *I) {
1368	Register AddrReg = getRegForValue(V: I->getOperand(i: `0`));
1369	if (!AddrReg)
1370	return false;
1371
1372	unsigned Opc = isThumb2 ? ARM::tBRIND : ARM::BX;
1373	assert(isThumb2 \|\| Subtarget->hasV4TOps());
1374
1375	AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1376	MCID: TII.get(Opcode: Opc)).addReg(RegNo: AddrReg));
1377
1378	const IndirectBrInst *IB = cast<IndirectBrInst>(Val: I);
1379	for (const BasicBlock *SuccBB : IB->successors())
1380	FuncInfo.MBB->addSuccessor(Succ: FuncInfo.getMBB(BB: SuccBB));
1381
1382	return true;
1383	}
1384
1385	bool ARMFastISel::ARMEmitCmp(const Value Src1Value, const* Value *Src2Value,
1386	bool isZExt) {
1387	Type *Ty = Src1Value->getType();
1388	EVT SrcEVT = TLI.getValueType(DL, Ty, AllowUnknown: true);
1389	if (!SrcEVT.isSimple()) return false;
1390	MVT SrcVT = SrcEVT.getSimpleVT();
1391
1392	if (Ty->isFloatTy() && !Subtarget->hasVFP2Base())
1393	return false;
1394
1395	if (Ty->isDoubleTy() && (!Subtarget->hasVFP2Base() \|\| !Subtarget->hasFP64()))
1396	return false;
1397
1398	// Check to see if the 2nd operand is a constant that we can encode directly
1399	// in the compare.
1400	int Imm = `0`;
1401	bool UseImm = false;
1402	bool isNegativeImm = false;
1403	// FIXME: At -O0 we don't have anything that canonicalizes operand order.
1404	// Thus, Src1Value may be a ConstantInt, but we're missing it.
1405	if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(Val: Src2Value)) {
1406	if (SrcVT == MVT::i32 \|\| SrcVT == MVT::i16 \|\| SrcVT == MVT::i8 \|\|
1407	SrcVT == MVT::i1) {
1408	const APInt &CIVal = ConstInt->getValue();
1409	Imm = (isZExt) ? (int)CIVal.getZExtValue() : (int)CIVal.getSExtValue();
1410	// For INT_MIN/LONG_MIN (i.e., 0x80000000) we need to use a cmp, rather
1411	// then a cmn, because there is no way to represent 2147483648 as a
1412	// signed 32-bit int.
1413	if (Imm < `0` && Imm != (int)`0x80000000`) {
1414	isNegativeImm = true;
1415	Imm = -Imm;
1416	}
1417	UseImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Arg: Imm) != -`1`) :
1418	(ARM_AM::getSOImmVal(Arg: Imm) != -`1`);
1419	}
1420	} else if (const ConstantFP *ConstFP = dyn_cast<ConstantFP>(Val: Src2Value)) {
1421	if (SrcVT == MVT::f32 \|\| SrcVT == MVT::f64)
1422	if (ConstFP->isZero() && !ConstFP->isNegative())
1423	UseImm = true;
1424	}
1425
1426	unsigned CmpOpc;
1427	bool isICmp = true;
1428	bool needsExt = false;
1429	switch (SrcVT.SimpleTy) {
1430	default: return false;
1431	// TODO: Verify compares.
1432	case MVT::f32:
1433	isICmp = false;
1434	CmpOpc = UseImm ? ARM::VCMPZS : ARM::VCMPS;
1435	break;
1436	case MVT::f64:
1437	isICmp = false;
1438	CmpOpc = UseImm ? ARM::VCMPZD : ARM::VCMPD;
1439	break;
1440	case MVT::i1:
1441	case MVT::i8:
1442	case MVT::i16:
1443	needsExt = true;
1444	[[fallthrough]];
1445	case MVT::i32:
1446	if (isThumb2) {
1447	if (!UseImm)
1448	CmpOpc = ARM::t2CMPrr;
1449	else
1450	CmpOpc = isNegativeImm ? ARM::t2CMNri : ARM::t2CMPri;
1451	} else {
1452	if (!UseImm)
1453	CmpOpc = ARM::CMPrr;
1454	else
1455	CmpOpc = isNegativeImm ? ARM::CMNri : ARM::CMPri;
1456	}
1457	break;
1458	}
1459
1460	Register SrcReg1 = getRegForValue(V: Src1Value);
1461	if (!SrcReg1)
1462	return false;
1463
1464	Register SrcReg2;
1465	if (!UseImm) {
1466	SrcReg2 = getRegForValue(V: Src2Value);
1467	if (!SrcReg2)
1468	return false;
1469	}
1470
1471	// We have i1, i8, or i16, we need to either zero extend or sign extend.
1472	if (needsExt) {
1473	SrcReg1 = ARMEmitIntExt(SrcVT, SrcReg: SrcReg1, DestVT: MVT::i32, isZExt);
1474	if (!SrcReg1)
1475	return false;
1476	if (!UseImm) {
1477	SrcReg2 = ARMEmitIntExt(SrcVT, SrcReg: SrcReg2, DestVT: MVT::i32, isZExt);
1478	if (!SrcReg2)
1479	return false;
1480	}
1481	}
1482
1483	const MCInstrDesc &II = TII.get(Opcode: CmpOpc);
1484	SrcReg1 = constrainOperandRegClass(II, Op: SrcReg1, OpNum: `0`);
1485	if (!UseImm) {
1486	SrcReg2 = constrainOperandRegClass(II, Op: SrcReg2, OpNum: `1`);
1487	AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II)
1488	.addReg(RegNo: SrcReg1).addReg(RegNo: SrcReg2));
1489	} else {
1490	MachineInstrBuilder MIB;
1491	MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II)
1492	.addReg(RegNo: SrcReg1);
1493
1494	// Only add immediate for icmp as the immediate for fcmp is an implicit 0.0.
1495	if (isICmp)
1496	MIB.addImm(Val: Imm);
1497	AddOptionalDefs(MIB);
1498	}
1499
1500	// For floating point we need to move the result to a comparison register
1501	// that we can then use for branches.
1502	if (Ty->isFloatTy() \|\| Ty->isDoubleTy())
1503	AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1504	MCID: TII.get(Opcode: ARM::FMSTAT)));
1505	return true;
1506	}
1507
1508	bool ARMFastISel::SelectCmp(const Instruction *I) {
1509	const CmpInst *CI = cast<CmpInst>(Val: I);
1510
1511	// Get the compare predicate.
1512	ARMCC::CondCodes ARMPred = getComparePred(Pred: CI->getPredicate());
1513
1514	// We may not handle every CC for now.
1515	if (ARMPred == ARMCC::AL) return false;
1516
1517	// Emit the compare.
1518	if (!ARMEmitCmp(Src1Value: CI->getOperand(i_nocapture: `0`), Src2Value: CI->getOperand(i_nocapture: `1`), isZExt: CI->isUnsigned()))
1519	return false;
1520
1521	// Now set a register based on the comparison. Explicitly set the predicates
1522	// here.
1523	unsigned MovCCOpc = isThumb2 ? ARM::t2MOVCCi : ARM::MOVCCi;
1524	const TargetRegisterClass *RC = isThumb2 ? &ARM::rGPRRegClass
1525	: &ARM::GPRRegClass;
1526	Register DestReg = createResultReg(RC);
1527	Constant Zero = ConstantInt::get(Ty: Type::getInt32Ty(C&: Context), V: `0`);
1528	Register ZeroReg = fastMaterializeConstant(C: Zero);
1529	// ARMEmitCmp emits a FMSTAT when necessary, so it's always safe to use CPSR.
1530	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: MovCCOpc), DestReg)
1531	.addReg(RegNo: ZeroReg).addImm(Val: `1`)
1532	.addImm(Val: ARMPred).addReg(RegNo: ARM::CPSR);
1533
1534	updateValueMap(I, Reg: DestReg);
1535	return true;
1536	}
1537
1538	bool ARMFastISel::SelectFPExt(const Instruction *I) {
1539	// Make sure we have VFP and that we're extending float to double.
1540	if (!Subtarget->hasVFP2Base() \|\| !Subtarget->hasFP64()) return false;
1541
1542	Value *V = I->getOperand(i: `0`);
1543	if (!I->getType()->isDoubleTy() \|\|
1544	!V->getType()->isFloatTy()) return false;
1545
1546	Register Op = getRegForValue(V);
1547	if (!Op)
1548	return false;
1549
1550	Register Result = createResultReg(RC: &ARM::DPRRegClass);
1551	AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1552	MCID: TII.get(Opcode: ARM::VCVTDS), DestReg: Result)
1553	.addReg(RegNo: Op));
1554	updateValueMap(I, Reg: Result);
1555	return true;
1556	}
1557
1558	bool ARMFastISel::SelectFPTrunc(const Instruction *I) {
1559	// Make sure we have VFP and that we're truncating double to float.
1560	if (!Subtarget->hasVFP2Base() \|\| !Subtarget->hasFP64()) return false;
1561
1562	Value *V = I->getOperand(i: `0`);
1563	if (!(I->getType()->isFloatTy() &&
1564	V->getType()->isDoubleTy())) return false;
1565
1566	Register Op = getRegForValue(V);
1567	if (!Op)
1568	return false;
1569
1570	Register Result = createResultReg(RC: &ARM::SPRRegClass);
1571	AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1572	MCID: TII.get(Opcode: ARM::VCVTSD), DestReg: Result)
1573	.addReg(RegNo: Op));
1574	updateValueMap(I, Reg: Result);
1575	return true;
1576	}
1577
1578	bool ARMFastISel::SelectIToFP(const Instruction I, bool* isSigned) {
1579	// Make sure we have VFP.
1580	if (!Subtarget->hasVFP2Base()) return false;
1581
1582	MVT DstVT;
1583	Type *Ty = I->getType();
1584	if (!isTypeLegal(Ty, VT&: DstVT))
1585	return false;
1586
1587	Value *Src = I->getOperand(i: `0`);
1588	EVT SrcEVT = TLI.getValueType(DL, Ty: Src->getType(), AllowUnknown: true);
1589	if (!SrcEVT.isSimple())
1590	return false;
1591	MVT SrcVT = SrcEVT.getSimpleVT();
1592	if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8)
1593	return false;
1594
1595	Register SrcReg = getRegForValue(V: Src);
1596	if (!SrcReg)
1597	return false;
1598
1599	// Handle sign-extension.
1600	if (SrcVT == MVT::i16 \|\| SrcVT == MVT::i8) {
1601	SrcReg = ARMEmitIntExt(SrcVT, SrcReg, DestVT: MVT::i32,
1602	/isZExt/!isSigned);
1603	if (!SrcReg)
1604	return false;
1605	}
1606
1607	// The conversion routine works on fp-reg to fp-reg and the operand above
1608	// was an integer, move it to the fp registers if possible.
1609	Register FP = ARMMoveToFPReg(VT: MVT::f32, SrcReg);
1610	if (!FP)
1611	return false;
1612
1613	unsigned Opc;
1614	if (Ty->isFloatTy()) Opc = isSigned ? ARM::VSITOS : ARM::VUITOS;
1615	else if (Ty->isDoubleTy() && Subtarget->hasFP64())
1616	Opc = isSigned ? ARM::VSITOD : ARM::VUITOD;
1617	else return false;
1618
1619	Register ResultReg = createResultReg(RC: TLI.getRegClassFor(VT: DstVT));
1620	AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1621	MCID: TII.get(Opcode: Opc), DestReg: ResultReg).addReg(RegNo: FP));
1622	updateValueMap(I, Reg: ResultReg);
1623	return true;
1624	}
1625
1626	bool ARMFastISel::SelectFPToI(const Instruction I, bool* isSigned) {
1627	// Make sure we have VFP.
1628	if (!Subtarget->hasVFP2Base()) return false;
1629
1630	MVT DstVT;
1631	Type *RetTy = I->getType();
1632	if (!isTypeLegal(Ty: RetTy, VT&: DstVT))
1633	return false;
1634
1635	Register Op = getRegForValue(V: I->getOperand(i: `0`));
1636	if (!Op)
1637	return false;
1638
1639	unsigned Opc;
1640	Type *OpTy = I->getOperand(i: `0`)->getType();
1641	if (OpTy->isFloatTy()) Opc = isSigned ? ARM::VTOSIZS : ARM::VTOUIZS;
1642	else if (OpTy->isDoubleTy() && Subtarget->hasFP64())
1643	Opc = isSigned ? ARM::VTOSIZD : ARM::VTOUIZD;
1644	else return false;
1645
1646	// f64->s32/u32 or f32->s32/u32 both need an intermediate f32 reg.
1647	Register ResultReg = createResultReg(RC: TLI.getRegClassFor(VT: MVT::f32));
1648	AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1649	MCID: TII.get(Opcode: Opc), DestReg: ResultReg).addReg(RegNo: Op));
1650
1651	// This result needs to be in an integer register, but the conversion only
1652	// takes place in fp-regs.
1653	Register IntReg = ARMMoveToIntReg(VT: DstVT, SrcReg: ResultReg);
1654	if (!IntReg)
1655	return false;
1656
1657	updateValueMap(I, Reg: IntReg);
1658	return true;
1659	}
1660
1661	bool ARMFastISel::SelectSelect(const Instruction *I) {
1662	MVT VT;
1663	if (!isTypeLegal(Ty: I->getType(), VT))
1664	return false;
1665
1666	// Things need to be register sized for register moves.
1667	if (VT != MVT::i32) return false;
1668
1669	Register CondReg = getRegForValue(V: I->getOperand(i: `0`));
1670	if (!CondReg)
1671	return false;
1672	Register Op1Reg = getRegForValue(V: I->getOperand(i: `1`));
1673	if (!Op1Reg)
1674	return false;
1675
1676	// Check to see if we can use an immediate in the conditional move.
1677	int Imm = `0`;
1678	bool UseImm = false;
1679	bool isNegativeImm = false;
1680	if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(Val: I->getOperand(i: `2`))) {
1681	assert(VT == MVT::i32 && "Expecting an i32.");
1682	Imm = (int)ConstInt->getValue().getZExtValue();
1683	if (Imm < `0`) {
1684	isNegativeImm = true;
1685	Imm = ~Imm;
1686	}
1687	UseImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Arg: Imm) != -`1`) :
1688	(ARM_AM::getSOImmVal(Arg: Imm) != -`1`);
1689	}
1690
1691	Register Op2Reg;
1692	if (!UseImm) {
1693	Op2Reg = getRegForValue(V: I->getOperand(i: `2`));
1694	if (!Op2Reg)
1695	return false;
1696	}
1697
1698	unsigned TstOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri;
1699	CondReg = constrainOperandRegClass(II: TII.get(Opcode: TstOpc), Op: CondReg, OpNum: `0`);
1700	AddOptionalDefs(
1701	MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: TstOpc))
1702	.addReg(RegNo: CondReg)
1703	.addImm(Val: `1`));
1704
1705	unsigned MovCCOpc;
1706	const TargetRegisterClass *RC;
1707	if (!UseImm) {
1708	RC = isThumb2 ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
1709	MovCCOpc = isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr;
1710	} else {
1711	RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass;
1712	if (!isNegativeImm)
1713	MovCCOpc = isThumb2 ? ARM::t2MOVCCi : ARM::MOVCCi;
1714	else
1715	MovCCOpc = isThumb2 ? ARM::t2MVNCCi : ARM::MVNCCi;
1716	}
1717	Register ResultReg = createResultReg(RC);
1718	if (!UseImm) {
1719	Op2Reg = constrainOperandRegClass(II: TII.get(Opcode: MovCCOpc), Op: Op2Reg, OpNum: `1`);
1720	Op1Reg = constrainOperandRegClass(II: TII.get(Opcode: MovCCOpc), Op: Op1Reg, OpNum: `2`);
1721	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: MovCCOpc),
1722	DestReg: ResultReg)
1723	.addReg(RegNo: Op2Reg)
1724	.addReg(RegNo: Op1Reg)
1725	.addImm(Val: ARMCC::NE)
1726	.addReg(RegNo: ARM::CPSR);
1727	} else {
1728	Op1Reg = constrainOperandRegClass(II: TII.get(Opcode: MovCCOpc), Op: Op1Reg, OpNum: `1`);
1729	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: MovCCOpc),
1730	DestReg: ResultReg)
1731	.addReg(RegNo: Op1Reg)
1732	.addImm(Val: Imm)
1733	.addImm(Val: ARMCC::EQ)
1734	.addReg(RegNo: ARM::CPSR);
1735	}
1736	updateValueMap(I, Reg: ResultReg);
1737	return true;
1738	}
1739
1740	bool ARMFastISel::SelectDiv(const Instruction I, bool* isSigned) {
1741	MVT VT;
1742	Type *Ty = I->getType();
1743	if (!isTypeLegal(Ty, VT))
1744	return false;
1745
1746	// If we have integer div support we should have selected this automagically.
1747	// In case we have a real miss go ahead and return false and we'll pick
1748	// it up later.
1749	if (Subtarget->hasDivideInThumbMode())
1750	return false;
1751
1752	// Otherwise emit a libcall.
1753	RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
1754	if (VT == MVT::i8)
1755	LC = isSigned ? RTLIB::SDIV_I8 : RTLIB::UDIV_I8;
1756	else if (VT == MVT::i16)
1757	LC = isSigned ? RTLIB::SDIV_I16 : RTLIB::UDIV_I16;
1758	else if (VT == MVT::i32)
1759	LC = isSigned ? RTLIB::SDIV_I32 : RTLIB::UDIV_I32;
1760	else if (VT == MVT::i64)
1761	LC = isSigned ? RTLIB::SDIV_I64 : RTLIB::UDIV_I64;
1762	else if (VT == MVT::i128)
1763	LC = isSigned ? RTLIB::SDIV_I128 : RTLIB::UDIV_I128;
1764	assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!");
1765
1766	return ARMEmitLibcall(I, Call: LC);
1767	}
1768
1769	bool ARMFastISel::SelectRem(const Instruction I, bool* isSigned) {
1770	MVT VT;
1771	Type *Ty = I->getType();
1772	if (!isTypeLegal(Ty, VT))
1773	return false;
1774
1775	// Many ABIs do not provide a libcall for standalone remainder, so we need to
1776	// use divrem (see the RTABI 4.3.1). Since FastISel can't handle non-double
1777	// multi-reg returns, we'll have to bail out.
1778	if (!TLI.hasStandaloneRem(VT)) {
1779	return false;
1780	}
1781
1782	RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
1783	if (VT == MVT::i8)
1784	LC = isSigned ? RTLIB::SREM_I8 : RTLIB::UREM_I8;
1785	else if (VT == MVT::i16)
1786	LC = isSigned ? RTLIB::SREM_I16 : RTLIB::UREM_I16;
1787	else if (VT == MVT::i32)
1788	LC = isSigned ? RTLIB::SREM_I32 : RTLIB::UREM_I32;
1789	else if (VT == MVT::i64)
1790	LC = isSigned ? RTLIB::SREM_I64 : RTLIB::UREM_I64;
1791	else if (VT == MVT::i128)
1792	LC = isSigned ? RTLIB::SREM_I128 : RTLIB::UREM_I128;
1793	assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!");
1794
1795	return ARMEmitLibcall(I, Call: LC);
1796	}
1797
1798	bool ARMFastISel::SelectBinaryIntOp(const Instruction I, unsigned* ISDOpcode) {
1799	EVT DestVT = TLI.getValueType(DL, Ty: I->getType(), AllowUnknown: true);
1800
1801	// We can get here in the case when we have a binary operation on a non-legal
1802	// type and the target independent selector doesn't know how to handle it.
1803	if (DestVT != MVT::i16 && DestVT != MVT::i8 && DestVT != MVT::i1)
1804	return false;
1805
1806	unsigned Opc;
1807	switch (ISDOpcode) {
1808	default: return false;
1809	case ISD::ADD:
1810	Opc = isThumb2 ? ARM::t2ADDrr : ARM::ADDrr;
1811	break;
1812	case ISD::OR:
1813	Opc = isThumb2 ? ARM::t2ORRrr : ARM::ORRrr;
1814	break;
1815	case ISD::SUB:
1816	Opc = isThumb2 ? ARM::t2SUBrr : ARM::SUBrr;
1817	break;
1818	}
1819
1820	Register SrcReg1 = getRegForValue(V: I->getOperand(i: `0`));
1821	if (!SrcReg1)
1822	return false;
1823
1824	// TODO: Often the 2nd operand is an immediate, which can be encoded directly
1825	// in the instruction, rather then materializing the value in a register.
1826	Register SrcReg2 = getRegForValue(V: I->getOperand(i: `1`));
1827	if (!SrcReg2)
1828	return false;
1829
1830	Register ResultReg = createResultReg(RC: &ARM::GPRnopcRegClass);
1831	SrcReg1 = constrainOperandRegClass(II: TII.get(Opcode: Opc), Op: SrcReg1, OpNum: `1`);
1832	SrcReg2 = constrainOperandRegClass(II: TII.get(Opcode: Opc), Op: SrcReg2, OpNum: `2`);
1833	AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1834	MCID: TII.get(Opcode: Opc), DestReg: ResultReg)
1835	.addReg(RegNo: SrcReg1).addReg(RegNo: SrcReg2));
1836	updateValueMap(I, Reg: ResultReg);
1837	return true;
1838	}
1839
1840	bool ARMFastISel::SelectBinaryFPOp(const Instruction I, unsigned* ISDOpcode) {
1841	EVT FPVT = TLI.getValueType(DL, Ty: I->getType(), AllowUnknown: true);
1842	if (!FPVT.isSimple()) return false;
1843	MVT VT = FPVT.getSimpleVT();
1844
1845	// FIXME: Support vector types where possible.
1846	if (VT.isVector())
1847	return false;
1848
1849	// We can get here in the case when we want to use NEON for our fp
1850	// operations, but can't figure out how to. Just use the vfp instructions
1851	// if we have them.
1852	// FIXME: It'd be nice to use NEON instructions.
1853	Type *Ty = I->getType();
1854	if (Ty->isFloatTy() && !Subtarget->hasVFP2Base())
1855	return false;
1856	if (Ty->isDoubleTy() && (!Subtarget->hasVFP2Base() \|\| !Subtarget->hasFP64()))
1857	return false;
1858
1859	unsigned Opc;
1860	bool is64bit = VT == MVT::f64 \|\| VT == MVT::i64;
1861	switch (ISDOpcode) {
1862	default: return false;
1863	case ISD::FADD:
1864	Opc = is64bit ? ARM::VADDD : ARM::VADDS;
1865	break;
1866	case ISD::FSUB:
1867	Opc = is64bit ? ARM::VSUBD : ARM::VSUBS;
1868	break;
1869	case ISD::FMUL:
1870	Opc = is64bit ? ARM::VMULD : ARM::VMULS;
1871	break;
1872	}
1873	Register Op1 = getRegForValue(V: I->getOperand(i: `0`));
1874	if (!Op1)
1875	return false;
1876
1877	Register Op2 = getRegForValue(V: I->getOperand(i: `1`));
1878	if (!Op2)
1879	return false;
1880
1881	Register ResultReg = createResultReg(RC: TLI.getRegClassFor(VT: VT.SimpleTy));
1882	AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1883	MCID: TII.get(Opcode: Opc), DestReg: ResultReg)
1884	.addReg(RegNo: Op1).addReg(RegNo: Op2));
1885	updateValueMap(I, Reg: ResultReg);
1886	return true;
1887	}
1888
1889	// Call Handling Code
1890
1891	// This is largely taken directly from CCAssignFnForNode
1892	// TODO: We may not support all of this.
1893	CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC,
1894	bool Return,
1895	bool isVarArg) {
1896	switch (CC) {
1897	default:
1898	report_fatal_error(reason: "Unsupported calling convention");
1899	case CallingConv::Fast:
1900	if (Subtarget->hasFPRegs() && !isVarArg) {
1901	if (!TM.isAAPCS_ABI())
1902	return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
1903	// For AAPCS ABI targets, just use VFP variant of the calling convention.
1904	return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
1905	}
1906	[[fallthrough]];
1907	case CallingConv::C:
1908	case CallingConv::CXX_FAST_TLS:
1909	// Use target triple & subtarget features to do actual dispatch.
1910	if (TM.isAAPCS_ABI()) {
1911	if (Subtarget->hasFPRegs() &&
1912	TM.Options.FloatABIType == FloatABI::Hard && !isVarArg)
1913	return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
1914	else
1915	return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
1916	} else {
1917	return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
1918	}
1919	case CallingConv::ARM_AAPCS_VFP:
1920	case CallingConv::Swift:
1921	case CallingConv::SwiftTail:
1922	if (!isVarArg)
1923	return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
1924	// Fall through to soft float variant, variadic functions don't
1925	// use hard floating point ABI.
1926	[[fallthrough]];
1927	case CallingConv::ARM_AAPCS:
1928	return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
1929	case CallingConv::ARM_APCS:
1930	return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
1931	case CallingConv::GHC:
1932	if (Return)
1933	report_fatal_error(reason: "Can't return in GHC call convention");
1934	else
1935	return CC_ARM_APCS_GHC;
1936	case CallingConv::CFGuard_Check:
1937	return (Return ? RetCC_ARM_AAPCS : CC_ARM_Win32_CFGuard_Check);
1938	}
1939	}
1940
1941	bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
1942	SmallVectorImpl<Register> &ArgRegs,
1943	SmallVectorImpl<MVT> &ArgVTs,
1944	SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
1945	SmallVectorImpl<Register> &RegArgs,
1946	CallingConv::ID CC,
1947	unsigned &NumBytes,
1948	bool isVarArg) {
1949	SmallVector<CCValAssign, `16`> ArgLocs;
1950	SmallVector<Type *, `16`> OrigTys;
1951	for (Value *Arg : Args)
1952	OrigTys.push_back(Elt: Arg->getType());
1953	CCState CCInfo(CC, isVarArg, FuncInfo.MF, ArgLocs, Context);
1954	CCInfo.AnalyzeCallOperands(ArgVTs, Flags&: ArgFlags, OrigTys,
1955	Fn: CCAssignFnForCall(CC, Return: false, isVarArg));
1956
1957	// Check that we can handle all of the arguments. If we can't, then bail out
1958	// now before we add code to the MBB.
1959	for (unsigned i = `0`, e = ArgLocs.size(); i != e; ++i) {
1960	CCValAssign &VA = ArgLocs [i];
1961	MVT ArgVT = ArgVTs [VA.getValNo()];
1962
1963	// We don't handle NEON/vector parameters yet.
1964	if (ArgVT.isVector() \|\| ArgVT.getSizeInBits() > `64`)
1965	return false;
1966
1967	// Now copy/store arg to correct locations.
1968	if (VA.isRegLoc() && !VA.needsCustom()) {
1969	continue;
1970	} else if (VA.needsCustom()) {
1971	// TODO: We need custom lowering for vector (v2f64) args.
1972	if (VA.getLocVT() != MVT::f64 \|\|
1973	// TODO: Only handle register args for now.
1974	!VA.isRegLoc() \|\| !ArgLocs [++i].isRegLoc())
1975	return false;
1976	} else {
1977	switch (ArgVT.SimpleTy) {
1978	default:
1979	return false;
1980	case MVT::i1:
1981	case MVT::i8:
1982	case MVT::i16:
1983	case MVT::i32:
1984	break;
1985	case MVT::f32:
1986	if (!Subtarget->hasVFP2Base())
1987	return false;
1988	break;
1989	case MVT::f64:
1990	if (!Subtarget->hasVFP2Base())
1991	return false;
1992	break;
1993	}
1994	}
1995	}
1996
1997	// At the point, we are able to handle the call's arguments in fast isel.
1998
1999	// Get a count of how many bytes are to be pushed on the stack.
2000	NumBytes = CCInfo.getStackSize();
2001
2002	// Issue CALLSEQ_START
2003	unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
2004	AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
2005	MCID: TII.get(Opcode: AdjStackDown))
2006	.addImm(Val: NumBytes).addImm(Val: `0`));
2007
2008	// Process the args.
2009	for (unsigned i = `0`, e = ArgLocs.size(); i != e; ++i) {
2010	CCValAssign &VA = ArgLocs [i];
2011	const Value *ArgVal = Args [VA.getValNo()];
2012	Register Arg = ArgRegs [VA.getValNo()];
2013	MVT ArgVT = ArgVTs [VA.getValNo()];
2014
2015	assert((!ArgVT.isVector() && ArgVT.getSizeInBits() <= `64`) &&
2016	"We don't handle NEON/vector parameters yet.");
2017
2018	// Handle arg promotion, etc.
2019	switch (VA.getLocInfo()) {
2020	case CCValAssign::Full: break;
2021	case CCValAssign::SExt: {
2022	MVT DestVT = VA.getLocVT();
2023	Arg = ARMEmitIntExt(SrcVT: ArgVT, SrcReg: Arg, DestVT, /isZExt/false);
2024	assert(Arg && "Failed to emit a sext");
2025	ArgVT = DestVT;
2026	break;
2027	}
2028	case CCValAssign::AExt:
2029	// Intentional fall-through. Handle AExt and ZExt.
2030	case CCValAssign::ZExt: {
2031	MVT DestVT = VA.getLocVT();
2032	Arg = ARMEmitIntExt(SrcVT: ArgVT, SrcReg: Arg, DestVT, /isZExt/true);
2033	assert(Arg && "Failed to emit a zext");
2034	ArgVT = DestVT;
2035	break;
2036	}
2037	case CCValAssign::BCvt: {
2038	Register BC = fastEmit_r(VT: ArgVT, RetVT: VA.getLocVT(), Opcode: ISD::BITCAST, Op0: Arg);
2039	assert(BC && "Failed to emit a bitcast!");
2040	Arg = BC;
2041	ArgVT = VA.getLocVT();
2042	break;
2043	}
2044	default: llvm_unreachable("Unknown arg promotion!");
2045	}
2046
2047	// Now copy/store arg to correct locations.
2048	if (VA.isRegLoc() && !VA.needsCustom()) {
2049	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
2050	MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: VA.getLocReg()).addReg(RegNo: Arg);
2051	RegArgs.push_back(Elt: VA.getLocReg());
2052	} else if (VA.needsCustom()) {
2053	// TODO: We need custom lowering for vector (v2f64) args.
2054	assert(VA.getLocVT() == MVT::f64 &&
2055	"Custom lowering for v2f64 args not available");
2056
2057	// FIXME: ArgLocs[++i] may extend beyond ArgLocs.size()
2058	CCValAssign &NextVA = ArgLocs [++i];
2059
2060	assert(VA.isRegLoc() && NextVA.isRegLoc() &&
2061	"We only handle register args!");
2062
2063	AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
2064	MCID: TII.get(Opcode: ARM::VMOVRRD), DestReg: VA.getLocReg())
2065	.addReg(RegNo: NextVA.getLocReg(), Flags: RegState::Define)
2066	.addReg(RegNo: Arg));
2067	RegArgs.push_back(Elt: VA.getLocReg());
2068	RegArgs.push_back(Elt: NextVA.getLocReg());
2069	} else {
2070	assert(VA.isMemLoc());
2071	// Need to store on the stack.
2072
2073	// Don't emit stores for undef values.
2074	if (isa<UndefValue>(Val: ArgVal))
2075	continue;
2076
2077	Address Addr;
2078	Addr.setKind(Address::RegBase);
2079	Addr.setReg(ARM::SP);
2080	Addr.setOffset(VA.getLocMemOffset());
2081
2082	bool EmitRet = ARMEmitStore(VT: ArgVT, SrcReg: Arg, Addr); (void)EmitRet;
2083	assert(EmitRet && "Could not emit a store for argument!");
2084	}
2085	}
2086
2087	return true;
2088	}
2089
2090	bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<Register> &UsedRegs,
2091	const Instruction *I, CallingConv::ID CC,
2092	unsigned &NumBytes, bool isVarArg) {
2093	// Issue CALLSEQ_END
2094	unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
2095	AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
2096	MCID: TII.get(Opcode: AdjStackUp))
2097	.addImm(Val: NumBytes).addImm(Val: -`1ULL`));
2098
2099	// Now the return value.
2100	if (RetVT != MVT::isVoid) {
2101	SmallVector<CCValAssign, `16`> RVLocs;
2102	CCState CCInfo(CC, isVarArg, FuncInfo.MF, RVLocs, Context);
2103	CCInfo.AnalyzeCallResult(VT: RetVT, OrigTy: I->getType(),
2104	Fn: CCAssignFnForCall(CC, Return: true, isVarArg));
2105
2106	// Copy all of the result registers out of their specified physreg.
2107	if (RVLocs.size() == `2` && RetVT == MVT::f64) {
2108	// For this move we copy into two registers and then move into the
2109	// double fp reg we want.
2110	MVT DestVT = RVLocs [`0`].getValVT();
2111	const TargetRegisterClass* DstRC = TLI.getRegClassFor(VT: DestVT);
2112	Register ResultReg = createResultReg(RC: DstRC);
2113	AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
2114	MCID: TII.get(Opcode: ARM::VMOVDRR), DestReg: ResultReg)
2115	.addReg(RegNo: RVLocs [`0`].getLocReg())
2116	.addReg(RegNo: RVLocs [`1`].getLocReg()));
2117
2118	UsedRegs.push_back(Elt: RVLocs [`0`].getLocReg());
2119	UsedRegs.push_back(Elt: RVLocs [`1`].getLocReg());
2120
2121	// Finally update the result.
2122	updateValueMap(I, Reg: ResultReg);
2123	} else {
2124	assert(RVLocs.size() == `1` &&"Can't handle non-double multi-reg retvals!");
2125	MVT CopyVT = RVLocs [`0`].getValVT();
2126
2127	// Special handling for extended integers.
2128	if (RetVT == MVT::i1 \|\| RetVT == MVT::i8 \|\| RetVT == MVT::i16)
2129	CopyVT = MVT::i32;
2130
2131	const TargetRegisterClass* DstRC = TLI.getRegClassFor(VT: CopyVT);
2132
2133	Register ResultReg = createResultReg(RC: DstRC);
2134	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
2135	MCID: TII.get(Opcode: TargetOpcode::COPY),
2136	DestReg: ResultReg).addReg(RegNo: RVLocs [`0`].getLocReg());
2137	UsedRegs.push_back(Elt: RVLocs [`0`].getLocReg());
2138
2139	// Finally update the result.
2140	updateValueMap(I, Reg: ResultReg);
2141	}
2142	}
2143
2144	return true;
2145	}
2146
2147	bool ARMFastISel::SelectRet(const Instruction *I) {
2148	const ReturnInst *Ret = cast<ReturnInst>(Val: I);
2149	const Function &F = *I->getParent()->getParent();
2150	const bool IsCmseNSEntry = F.hasFnAttribute(Kind: "cmse_nonsecure_entry");
2151
2152	if (!FuncInfo.CanLowerReturn)
2153	return false;
2154
2155	if (TLI.supportSwiftError() &&
2156	F.getAttributes().hasAttrSomewhere(Kind: Attribute::SwiftError))
2157	return false;
2158
2159	if (TLI.supportSplitCSR(MF: FuncInfo.MF))
2160	return false;
2161
2162	// Build a list of return value registers.
2163	SmallVector<Register, `4`> RetRegs;
2164
2165	CallingConv::ID CC = F.getCallingConv();
2166	if (Ret->getNumOperands() > `0`) {
2167	SmallVector<ISD::OutputArg, `4`> Outs;
2168	GetReturnInfo(CC, ReturnType: F.getReturnType(), attr: F.getAttributes(), Outs, TLI, DL);
2169
2170	// Analyze operands of the call, assigning locations to each operand.
2171	SmallVector<CCValAssign, `16`> ValLocs;
2172	CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
2173	CCInfo.AnalyzeReturn(Outs, Fn: CCAssignFnForCall(CC, Return: true / is Ret /,
2174	isVarArg: F.isVarArg()));
2175
2176	const Value *RV = Ret->getOperand(i_nocapture: `0`);
2177	Register Reg = getRegForValue(V: RV);
2178	if (!Reg)
2179	return false;
2180
2181	// Only handle a single return value for now.
2182	if (ValLocs.size() != `1`)
2183	return false;
2184
2185	CCValAssign &VA = ValLocs [`0`];
2186
2187	// Don't bother handling odd stuff for now.
2188	if (VA.getLocInfo() != CCValAssign::Full)
2189	return false;
2190	// Only handle register returns for now.
2191	if (!VA.isRegLoc())
2192	return false;
2193
2194	Register SrcReg = Reg + VA.getValNo();
2195	EVT RVEVT = TLI.getValueType(DL, Ty: RV->getType());
2196	if (!RVEVT.isSimple()) return false;
2197	MVT RVVT = RVEVT.getSimpleVT();
2198	MVT DestVT = VA.getValVT();
2199	// Special handling for extended integers.
2200	if (RVVT != DestVT) {
2201	if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
2202	return false;
2203
2204	assert(DestVT == MVT::i32 && "ARM should always ext to i32");
2205
2206	// Perform extension if flagged as either zext or sext. Otherwise, do
2207	// nothing.
2208	if (Outs [`0`].Flags.isZExt() \|\| Outs [`0`].Flags.isSExt()) {
2209	SrcReg = ARMEmitIntExt(SrcVT: RVVT, SrcReg, DestVT, isZExt: Outs [`0`].Flags.isZExt());
2210	if (!SrcReg)
2211	return false;
2212	}
2213	}
2214
2215	// Make the copy.
2216	Register DstReg = VA.getLocReg();
2217	const TargetRegisterClass* SrcRC = MRI.getRegClass(Reg: SrcReg);
2218	// Avoid a cross-class copy. This is very unlikely.
2219	if (!SrcRC->contains(Reg: DstReg))
2220	return false;
2221	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
2222	MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: DstReg).addReg(RegNo: SrcReg);
2223
2224	// Add register to return instruction.
2225	RetRegs.push_back(Elt: VA.getLocReg());
2226	}
2227
2228	unsigned RetOpc;
2229	if (IsCmseNSEntry)
2230	if (isThumb2)
2231	RetOpc = ARM::tBXNS_RET;
2232	else
2233	llvm_unreachable("CMSE not valid for non-Thumb targets");
2234	else
2235	RetOpc = Subtarget->getReturnOpcode();
2236
2237	MachineInstrBuilder MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
2238	MCID: TII.get(Opcode: RetOpc));
2239	AddOptionalDefs(MIB);
2240	for (Register R : RetRegs)
2241	MIB.addReg(RegNo: R, Flags: RegState::Implicit);
2242	return true;
2243	}
2244
2245	unsigned ARMFastISel::ARMSelectCallOp(bool UseReg) {
2246	if (UseReg)
2247	return isThumb2 ? gettBLXrOpcode(MF: MF) : getBLXOpcode(MF: MF);
2248	else
2249	return isThumb2 ? ARM::tBL : ARM::BL;
2250	}
2251
2252	Register ARMFastISel::getLibcallReg(const Twine &Name) {
2253	// Manually compute the global's type to avoid building it when unnecessary.
2254	Type GVTy = PointerType::get(C&: Context, /AS=/AddressSpace: `0`);
2255	EVT LCREVT = TLI.getValueType(DL, Ty: GVTy);
2256	if (!LCREVT.isSimple())
2257	return Register ();
2258
2259	GlobalValue *GV = M.getNamedGlobal(Name: Name.str());
2260	if (!GV)
2261	GV = new GlobalVariable (M, Type::getInt32Ty(C&: Context), false*,
2262	GlobalValue::ExternalLinkage, nullptr, Name);
2263
2264	return ARMMaterializeGV(GV, VT: LCREVT.getSimpleVT());
2265	}
2266
2267	// A quick function that will emit a call for a named libcall in F with the
2268	// vector of passed arguments for the Instruction in I. We can assume that we
2269	// can emit a call for any libcall we can produce. This is an abridged version
2270	// of the full call infrastructure since we won't need to worry about things
2271	// like computed function pointers or strange arguments at call sites.
2272	// TODO: Try to unify this and the normal call bits for ARM, then try to unify
2273	// with X86.
2274	bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
2275	RTLIB::LibcallImpl LCImpl = LibcallLowering->getLibcallImpl(Call);
2276	if (LCImpl == RTLIB::Unsupported)
2277	return false;
2278
2279	// Handle simple* calls for now.*
2280	Type *RetTy = I->getType();
2281	MVT RetVT;
2282	if (RetTy->isVoidTy())
2283	RetVT = MVT::isVoid;
2284	else if (!isTypeLegal(Ty: RetTy, VT&: RetVT))
2285	return false;
2286
2287	CallingConv::ID CC = LibcallLowering->getLibcallImplCallingConv(Call: LCImpl);
2288
2289	// Can't handle non-double multi-reg retvals.
2290	if (RetVT != MVT::isVoid && RetVT != MVT::i32) {
2291	SmallVector<CCValAssign, `16`> RVLocs;
2292	CCState CCInfo(CC, false, FuncInfo.MF, RVLocs, Context);
2293	CCInfo.AnalyzeCallResult(VT: RetVT, OrigTy: RetTy, Fn: CCAssignFnForCall(CC, Return: true, isVarArg: false));
2294	if (RVLocs.size() >= `2` && RetVT != MVT::f64)
2295	return false;
2296	}
2297
2298	// Set up the argument vectors.
2299	SmallVector<Value*, `8`> Args;
2300	SmallVector<Register, `8`> ArgRegs;
2301	SmallVector<MVT, `8`> ArgVTs;
2302	SmallVector<ISD::ArgFlagsTy, `8`> ArgFlags;
2303	Args.reserve(N: I->getNumOperands());
2304	ArgRegs.reserve(N: I->getNumOperands());
2305	ArgVTs.reserve(N: I->getNumOperands());
2306	ArgFlags.reserve(N: I->getNumOperands());
2307	for (Value *Op : I->operands()) {
2308	Register Arg = getRegForValue(V: Op);
2309	if (!Arg)
2310	return false;
2311
2312	Type *ArgTy = Op->getType();
2313	MVT ArgVT;
2314	if (!isTypeLegal(Ty: ArgTy, VT&: ArgVT)) return false;
2315
2316	ISD::ArgFlagsTy Flags;
2317	Flags.setOrigAlign(DL.getABITypeAlign(Ty: ArgTy));
2318
2319	Args.push_back(Elt: Op);
2320	ArgRegs.push_back(Elt: Arg);
2321	ArgVTs.push_back(Elt: ArgVT);
2322	ArgFlags.push_back(Elt: Flags);
2323	}
2324
2325	// Handle the arguments now that we've gotten them.
2326	SmallVector<Register, `4`> RegArgs;
2327	unsigned NumBytes;
2328	if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags,
2329	RegArgs, CC, NumBytes, isVarArg: false))
2330	return false;
2331
2332	StringRef FuncName = RTLIB::RuntimeLibcallsInfo::getLibcallImplName(CallImpl: LCImpl);
2333
2334	Register CalleeReg;
2335	if (Subtarget->genLongCalls()) {
2336	CalleeReg = getLibcallReg(Name: FuncName);
2337	if (!CalleeReg)
2338	return false;
2339	}
2340
2341	// Issue the call.
2342	unsigned CallOpc = ARMSelectCallOp(UseReg: Subtarget->genLongCalls());
2343	MachineInstrBuilder MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt,
2344	MIMD, MCID: TII.get(Opcode: CallOpc));
2345	// BL / BLX don't take a predicate, but tBL / tBLX do.
2346	if (isThumb2)
2347	MIB.add(MOs: predOps(Pred: ARMCC::AL));
2348	if (Subtarget->genLongCalls()) {
2349	CalleeReg =
2350	constrainOperandRegClass(II: TII.get(Opcode: CallOpc), Op: CalleeReg, OpNum: isThumb2 ? `2` : `0`);
2351	MIB.addReg(RegNo: CalleeReg);
2352	} else
2353	MIB.addExternalSymbol(FnName: FuncName.data());
2354
2355	// Add implicit physical register uses to the call.
2356	for (Register R : RegArgs)
2357	MIB.addReg(RegNo: R, Flags: RegState::Implicit);
2358
2359	// Add a register mask with the call-preserved registers.
2360	// Proper defs for return values will be added by setPhysRegsDeadExcept().
2361	MIB.addRegMask(Mask: TRI.getCallPreservedMask(MF: *FuncInfo.MF, CC));
2362
2363	// Finish off the call including any return values.
2364	SmallVector<Register, `4`> UsedRegs;
2365	if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes, isVarArg: false)) return false;
2366
2367	// Set all unused physreg defs as dead.
2368	static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
2369
2370	return true;
2371	}
2372
2373	bool ARMFastISel::SelectCall(const Instruction *I,
2374	const char IntrMemName = nullptr*) {
2375	const CallInst *CI = cast<CallInst>(Val: I);
2376	const Value *Callee = CI->getCalledOperand();
2377
2378	// Can't handle inline asm.
2379	if (isa<InlineAsm>(Val: Callee)) return false;
2380
2381	// Allow SelectionDAG isel to handle tail calls.
2382	if (CI->isTailCall()) return false;
2383
2384	// Check the calling convention.
2385	CallingConv::ID CC = CI->getCallingConv();
2386
2387	// TODO: Avoid some calling conventions?
2388
2389	FunctionType *FTy = CI->getFunctionType();
2390	bool isVarArg = FTy->isVarArg();
2391
2392	// Handle simple* calls for now.*
2393	Type *RetTy = I->getType();
2394	MVT RetVT;
2395	if (RetTy->isVoidTy())
2396	RetVT = MVT::isVoid;
2397	else if (!isTypeLegal(Ty: RetTy, VT&: RetVT) && RetVT != MVT::i16 &&
2398	RetVT != MVT::i8 && RetVT != MVT::i1)
2399	return false;
2400
2401	// Can't handle non-double multi-reg retvals.
2402	if (RetVT != MVT::isVoid && RetVT != MVT::i1 && RetVT != MVT::i8 &&
2403	RetVT != MVT::i16 && RetVT != MVT::i32) {
2404	SmallVector<CCValAssign, `16`> RVLocs;
2405	CCState CCInfo(CC, isVarArg, FuncInfo.MF, RVLocs, Context);
2406	CCInfo.AnalyzeCallResult(VT: RetVT, OrigTy: RetTy,
2407	Fn: CCAssignFnForCall(CC, Return: true, isVarArg));
2408	if (RVLocs.size() >= `2` && RetVT != MVT::f64)
2409	return false;
2410	}
2411
2412	// Set up the argument vectors.
2413	SmallVector<Value*, `8`> Args;
2414	SmallVector<Register, `8`> ArgRegs;
2415	SmallVector<MVT, `8`> ArgVTs;
2416	SmallVector<ISD::ArgFlagsTy, `8`> ArgFlags;
2417	unsigned arg_size = CI->arg_size();
2418	Args.reserve(N: arg_size);
2419	ArgRegs.reserve(N: arg_size);
2420	ArgVTs.reserve(N: arg_size);
2421	ArgFlags.reserve(N: arg_size);
2422	for (auto ArgI = CI->arg_begin(), ArgE = CI->arg_end(); ArgI != ArgE; ++ArgI) {
2423	// If we're lowering a memory intrinsic instead of a regular call, skip the
2424	// last argument, which shouldn't be passed to the underlying function.
2425	if (IntrMemName && ArgE - ArgI <= `1`)
2426	break;
2427
2428	ISD::ArgFlagsTy Flags;
2429	unsigned ArgIdx = ArgI - CI->arg_begin();
2430	if (CI->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::SExt))
2431	Flags.setSExt();
2432	if (CI->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::ZExt))
2433	Flags.setZExt();
2434
2435	// FIXME: Only handle easy* calls for now.*
2436	if (CI->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::InReg) \|\|
2437	CI->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::StructRet) \|\|
2438	CI->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::SwiftSelf) \|\|
2439	CI->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::SwiftError) \|\|
2440	CI->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::Nest) \|\|
2441	CI->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::ByVal))
2442	return false;
2443
2444	Type ArgTy = (ArgI)->getType();
2445	MVT ArgVT;
2446	if (!isTypeLegal(Ty: ArgTy, VT&: ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8 &&
2447	ArgVT != MVT::i1)
2448	return false;
2449
2450	Register Arg = getRegForValue(V: *ArgI);
2451	if (!Arg.isValid())
2452	return false;
2453
2454	Flags.setOrigAlign(DL.getABITypeAlign(Ty: ArgTy));
2455
2456	Args.push_back(Elt: *ArgI);
2457	ArgRegs.push_back(Elt: Arg);
2458	ArgVTs.push_back(Elt: ArgVT);
2459	ArgFlags.push_back(Elt: Flags);
2460	}
2461
2462	// Handle the arguments now that we've gotten them.
2463	SmallVector<Register, `4`> RegArgs;
2464	unsigned NumBytes;
2465	if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags,
2466	RegArgs, CC, NumBytes, isVarArg))
2467	return false;
2468
2469	bool UseReg = false;
2470	const GlobalValue *GV = dyn_cast<GlobalValue>(Val: Callee);
2471	if (!GV \|\| Subtarget->genLongCalls()) UseReg = true;
2472
2473	Register CalleeReg;
2474	if (UseReg) {
2475	if (IntrMemName)
2476	CalleeReg = getLibcallReg(Name: IntrMemName);
2477	else
2478	CalleeReg = getRegForValue(V: Callee);
2479
2480	if (!CalleeReg)
2481	return false;
2482	}
2483
2484	// Issue the call.
2485	unsigned CallOpc = ARMSelectCallOp(UseReg);
2486	MachineInstrBuilder MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt,
2487	MIMD, MCID: TII.get(Opcode: CallOpc));
2488
2489	// ARM calls don't take a predicate, but tBL / tBLX do.
2490	if(isThumb2)
2491	MIB.add(MOs: predOps(Pred: ARMCC::AL));
2492	if (UseReg) {
2493	CalleeReg =
2494	constrainOperandRegClass(II: TII.get(Opcode: CallOpc), Op: CalleeReg, OpNum: isThumb2 ? `2` : `0`);
2495	MIB.addReg(RegNo: CalleeReg);
2496	} else if (!IntrMemName)
2497	MIB.addGlobalAddress(GV, Offset: `0`, TargetFlags: `0`);
2498	else
2499	MIB.addExternalSymbol(FnName: IntrMemName, TargetFlags: `0`);
2500
2501	// Add implicit physical register uses to the call.
2502	for (Register R : RegArgs)
2503	MIB.addReg(RegNo: R, Flags: RegState::Implicit);
2504
2505	// Add a register mask with the call-preserved registers.
2506	// Proper defs for return values will be added by setPhysRegsDeadExcept().
2507	MIB.addRegMask(Mask: TRI.getCallPreservedMask(MF: *FuncInfo.MF, CC));
2508
2509	// Finish off the call including any return values.
2510	SmallVector<Register, `4`> UsedRegs;
2511	if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes, isVarArg))
2512	return false;
2513
2514	// Set all unused physreg defs as dead.
2515	static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
2516
2517	diagnoseDontCall(CI: *CI);
2518	return true;
2519	}
2520
2521	bool ARMFastISel::ARMIsMemCpySmall(uint64_t Len) {
2522	return Len <= `16`;
2523	}
2524
2525	bool ARMFastISel::ARMTryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
2526	MaybeAlign Alignment) {
2527	// Make sure we don't bloat code by inlining very large memcpy's.
2528	if (!ARMIsMemCpySmall(Len))
2529	return false;
2530
2531	while (Len) {
2532	MVT VT;
2533	if (!Alignment \|\| *Alignment >= `4`) {
2534	if (Len >= `4`)
2535	VT = MVT::i32;
2536	else if (Len >= `2`)
2537	VT = MVT::i16;
2538	else {
2539	assert(Len == `1` && "Expected a length of 1!");
2540	VT = MVT::i8;
2541	}
2542	} else {
2543	assert(Alignment && "Alignment is set in this branch");
2544	// Bound based on alignment.
2545	if (Len >= `2` && *Alignment == `2`)
2546	VT = MVT::i16;
2547	else {
2548	VT = MVT::i8;
2549	}
2550	}
2551
2552	bool RV;
2553	Register ResultReg;
2554	RV = ARMEmitLoad(VT, ResultReg, Addr&: Src);
2555	assert(RV && "Should be able to handle this load.");
2556	RV = ARMEmitStore(VT, SrcReg: ResultReg, Addr&: Dest);
2557	assert(RV && "Should be able to handle this store.");
2558	(void)RV;
2559
2560	unsigned Size = VT.getSizeInBits()/`8`;
2561	Len -= Size;
2562	Dest.setOffset(Dest.getOffset() + Size);
2563	Src.setOffset(Src.getOffset() + Size);
2564	}
2565
2566	return true;
2567	}
2568
2569	bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) {
2570	// FIXME: Handle more intrinsics.
2571	switch (I.getIntrinsicID()) {
2572	default: return false;
2573	case Intrinsic::frameaddress: {
2574	MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
2575	MFI.setFrameAddressIsTaken(true);
2576
2577	unsigned LdrOpc = isThumb2 ? ARM::t2LDRi12 : ARM::LDRi12;
2578	const TargetRegisterClass *RC = isThumb2 ? &ARM::tGPRRegClass
2579	: &ARM::GPRRegClass;
2580
2581	const ARMBaseRegisterInfo *RegInfo = Subtarget->getRegisterInfo();
2582	Register FramePtr = RegInfo->getFrameRegister(MF: *(FuncInfo.MF));
2583	Register SrcReg = FramePtr;
2584
2585	// Recursively load frame address
2586	// ldr r0 [fp]
2587	// ldr r0 [r0]
2588	// ldr r0 [r0]
2589	// ...
2590	Register DestReg;
2591	unsigned Depth = cast<ConstantInt>(Val: I.getOperand(i_nocapture: `0`))->getZExtValue();
2592	while (Depth--) {
2593	DestReg = createResultReg(RC);
2594	AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
2595	MCID: TII.get(Opcode: LdrOpc), DestReg)
2596	.addReg(RegNo: SrcReg).addImm(Val: `0`));
2597	SrcReg = DestReg;
2598	}
2599	updateValueMap(I: &I, Reg: SrcReg);
2600	return true;
2601	}
2602	case Intrinsic::memcpy:
2603	case Intrinsic::memmove: {
2604	const MemTransferInst &MTI = cast<MemTransferInst>(Val: I);
2605	// Don't handle volatile.
2606	if (MTI.isVolatile())
2607	return false;
2608
2609	// Disable inlining for memmove before calls to ComputeAddress. Otherwise,
2610	// we would emit dead code because we don't currently handle memmoves.
2611	bool isMemCpy = (I.getIntrinsicID() == Intrinsic::memcpy);
2612	if (isa<ConstantInt>(Val: MTI.getLength()) && isMemCpy) {
2613	// Small memcpy's are common enough that we want to do them without a call
2614	// if possible.
2615	uint64_t Len = cast<ConstantInt>(Val: MTI.getLength())->getZExtValue();
2616	if (ARMIsMemCpySmall(Len)) {
2617	Address Dest, Src;
2618	if (!ARMComputeAddress(Obj: MTI.getRawDest(), Addr&: Dest) \|\|
2619	!ARMComputeAddress(Obj: MTI.getRawSource(), Addr&: Src))
2620	return false;
2621	MaybeAlign Alignment;
2622	if (MTI.getDestAlign() \|\| MTI.getSourceAlign())
2623	Alignment = std::min(a: MTI.getDestAlign().valueOrOne(),
2624	b: MTI.getSourceAlign().valueOrOne());
2625	if (ARMTryEmitSmallMemCpy(Dest, Src, Len, Alignment))
2626	return true;
2627	}
2628	}
2629
2630	if (!MTI.getLength()->getType()->isIntegerTy(Bitwidth: `32`))
2631	return false;
2632
2633	if (MTI.getSourceAddressSpace() > `255` \|\| MTI.getDestAddressSpace() > `255`)
2634	return false;
2635
2636	const char *IntrMemName = isa<MemCpyInst>(Val: I) ? "memcpy" : "memmove";
2637	return SelectCall(I: &I, IntrMemName);
2638	}
2639	case Intrinsic::memset: {
2640	const MemSetInst &MSI = cast<MemSetInst>(Val: I);
2641	// Don't handle volatile.
2642	if (MSI.isVolatile())
2643	return false;
2644
2645	if (!MSI.getLength()->getType()->isIntegerTy(Bitwidth: `32`))
2646	return false;
2647
2648	if (MSI.getDestAddressSpace() > `255`)
2649	return false;
2650
2651	return SelectCall(I: &I, IntrMemName: "memset");
2652	}
2653	case Intrinsic::trap: {
2654	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
2655	MCID: TII.get(Opcode: Subtarget->isThumb() ? ARM::tTRAP : ARM::TRAP));
2656	return true;
2657	}
2658	}
2659	}
2660
2661	bool ARMFastISel::SelectTrunc(const Instruction *I) {
2662	// The high bits for a type smaller than the register size are assumed to be
2663	// undefined.
2664	Value *Op = I->getOperand(i: `0`);
2665
2666	EVT SrcVT, DestVT;
2667	SrcVT = TLI.getValueType(DL, Ty: Op->getType(), AllowUnknown: true);
2668	DestVT = TLI.getValueType(DL, Ty: I->getType(), AllowUnknown: true);
2669
2670	if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8)
2671	return false;
2672	if (DestVT != MVT::i16 && DestVT != MVT::i8 && DestVT != MVT::i1)
2673	return false;
2674
2675	Register SrcReg = getRegForValue(V: Op);
2676	if (!SrcReg) return false;
2677
2678	// Because the high bits are undefined, a truncate doesn't generate
2679	// any code.
2680	updateValueMap(I, Reg: SrcReg);
2681	return true;
2682	}
2683
2684	Register ARMFastISel::ARMEmitIntExt(MVT SrcVT, Register SrcReg, MVT DestVT,
2685	bool isZExt) {
2686	if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8)
2687	return Register ();
2688	if (SrcVT != MVT::i16 && SrcVT != MVT::i8 && SrcVT != MVT::i1)
2689	return Register ();
2690
2691	// Table of which combinations can be emitted as a single instruction,
2692	// and which will require two.
2693	static const uint8_t isSingleInstrTbl[`3`][`2`][`2`][`2`] = {
2694	// ARM Thumb
2695	// !hasV6Ops hasV6Ops !hasV6Ops hasV6Ops
2696	// ext: s z s z s z s z
2697	/ 1 / { { { `0`, `1` }, { `0`, `1` } }, { { `0`, `0` }, { `0`, `1` } } },
2698	/ 8 / { { { `0`, `1` }, { `1`, `1` } }, { { `0`, `0` }, { `1`, `1` } } },
2699	/ 16 / { { { `0`, `0` }, { `1`, `1` } }, { { `0`, `0` }, { `1`, `1` } } }
2700	};
2701
2702	// Target registers for:
2703	// - For ARM can never be PC.
2704	// - For 16-bit Thumb are restricted to lower 8 registers.
2705	// - For 32-bit Thumb are restricted to non-SP and non-PC.
2706	static const TargetRegisterClass *RCTbl[`2`][`2`] = {
2707	// Instructions: Two Single
2708	/ ARM / { &ARM::GPRnopcRegClass, &ARM::GPRnopcRegClass },
2709	/ Thumb / { &ARM::tGPRRegClass, &ARM::rGPRRegClass }
2710	};
2711
2712	// Table governing the instruction(s) to be emitted.
2713	static const struct InstructionTable {
2714	uint32_t Opc : `16`;
2715	uint32_t hasS : `1`; // Some instructions have an S bit, always set it to 0.
2716	uint32_t Shift : `7`; // For shift operand addressing mode, used by MOVsi.
2717	uint32_t Imm : `8`; // All instructions have either a shift or a mask.
2718	} IT[`2`][`2`][`3`][`2`] = {
2719	{ // Two instructions (first is left shift, second is in this table).
2720	{ // ARM Opc S Shift Imm
2721	/ 1 bit sext / { { .Opc: ARM::MOVsi , .hasS: `1`, .Shift: ARM_AM::asr , .Imm: `31` },
2722	/ 1 bit zext / { .Opc: ARM::MOVsi , .hasS: `1`, .Shift: ARM_AM::lsr , .Imm: `31` } },
2723	/ 8 bit sext / { { .Opc: ARM::MOVsi , .hasS: `1`, .Shift: ARM_AM::asr , .Imm: `24` },
2724	/ 8 bit zext / { .Opc: ARM::MOVsi , .hasS: `1`, .Shift: ARM_AM::lsr , .Imm: `24` } },
2725	/ 16 bit sext / { { .Opc: ARM::MOVsi , .hasS: `1`, .Shift: ARM_AM::asr , .Imm: `16` },
2726	/ 16 bit zext / { .Opc: ARM::MOVsi , .hasS: `1`, .Shift: ARM_AM::lsr , .Imm: `16` } }
2727	},
2728	{ // Thumb Opc S Shift Imm
2729	/ 1 bit sext / { { .Opc: ARM::tASRri , .hasS: `0`, .Shift: ARM_AM::no_shift, .Imm: `31` },
2730	/ 1 bit zext / { .Opc: ARM::tLSRri , .hasS: `0`, .Shift: ARM_AM::no_shift, .Imm: `31` } },
2731	/ 8 bit sext / { { .Opc: ARM::tASRri , .hasS: `0`, .Shift: ARM_AM::no_shift, .Imm: `24` },
2732	/ 8 bit zext / { .Opc: ARM::tLSRri , .hasS: `0`, .Shift: ARM_AM::no_shift, .Imm: `24` } },
2733	/ 16 bit sext / { { .Opc: ARM::tASRri , .hasS: `0`, .Shift: ARM_AM::no_shift, .Imm: `16` },
2734	/ 16 bit zext / { .Opc: ARM::tLSRri , .hasS: `0`, .Shift: ARM_AM::no_shift, .Imm: `16` } }
2735	}
2736	},
2737	{ // Single instruction.
2738	{ // ARM Opc S Shift Imm
2739	/ 1 bit sext / { { .Opc: ARM::KILL , .hasS: `0`, .Shift: ARM_AM::no_shift, .Imm: `0` },
2740	/ 1 bit zext / { .Opc: ARM::ANDri , .hasS: `1`, .Shift: ARM_AM::no_shift, .Imm: `1` } },
2741	/ 8 bit sext / { { .Opc: ARM::SXTB , .hasS: `0`, .Shift: ARM_AM::no_shift, .Imm: `0` },
2742	/ 8 bit zext / { .Opc: ARM::ANDri , .hasS: `1`, .Shift: ARM_AM::no_shift, .Imm: `255` } },
2743	/ 16 bit sext / { { .Opc: ARM::SXTH , .hasS: `0`, .Shift: ARM_AM::no_shift, .Imm: `0` },
2744	/ 16 bit zext / { .Opc: ARM::UXTH , .hasS: `0`, .Shift: ARM_AM::no_shift, .Imm: `0` } }
2745	},
2746	{ // Thumb Opc S Shift Imm
2747	/ 1 bit sext / { { .Opc: ARM::KILL , .hasS: `0`, .Shift: ARM_AM::no_shift, .Imm: `0` },
2748	/ 1 bit zext / { .Opc: ARM::t2ANDri, .hasS: `1`, .Shift: ARM_AM::no_shift, .Imm: `1` } },
2749	/ 8 bit sext / { { .Opc: ARM::t2SXTB , .hasS: `0`, .Shift: ARM_AM::no_shift, .Imm: `0` },
2750	/ 8 bit zext / { .Opc: ARM::t2ANDri, .hasS: `1`, .Shift: ARM_AM::no_shift, .Imm: `255` } },
2751	/ 16 bit sext / { { .Opc: ARM::t2SXTH , .hasS: `0`, .Shift: ARM_AM::no_shift, .Imm: `0` },
2752	/ 16 bit zext / { .Opc: ARM::t2UXTH , .hasS: `0`, .Shift: ARM_AM::no_shift, .Imm: `0` } }
2753	}
2754	}
2755	};
2756
2757	unsigned SrcBits = SrcVT.getSizeInBits();
2758	unsigned DestBits = DestVT.getSizeInBits();
2759	(void) DestBits;
2760	assert((SrcBits < DestBits) && "can only extend to larger types");
2761	assert((DestBits == `32` \|\| DestBits == `16` \|\| DestBits == `8`) &&
2762	"other sizes unimplemented");
2763	assert((SrcBits == `16` \|\| SrcBits == `8` \|\| SrcBits == `1`) &&
2764	"other sizes unimplemented");
2765
2766	bool hasV6Ops = Subtarget->hasV6Ops();
2767	unsigned Bitness = SrcBits / `8`; // {1,8,16}=>{0,1,2}
2768	assert((Bitness < `3`) && "sanity-check table bounds");
2769
2770	bool isSingleInstr = isSingleInstrTbl[Bitness][isThumb2][hasV6Ops][isZExt];
2771	const TargetRegisterClass *RC = RCTbl[isThumb2][isSingleInstr];
2772	const InstructionTable *ITP = &IT[isSingleInstr][isThumb2][Bitness][isZExt];
2773	unsigned Opc = ITP->Opc;
2774	assert(ARM::KILL != Opc && "Invalid table entry");
2775	unsigned hasS = ITP->hasS;
2776	ARM_AM::ShiftOpc Shift = (ARM_AM::ShiftOpc) ITP->Shift;
2777	assert(((Shift == ARM_AM::no_shift) == (Opc != ARM::MOVsi)) &&
2778	"only MOVsi has shift operand addressing mode");
2779	unsigned Imm = ITP->Imm;
2780
2781	// 16-bit Thumb instructions always set CPSR (unless they're in an IT block).
2782	bool setsCPSR = &ARM::tGPRRegClass == RC;
2783	unsigned LSLOpc = isThumb2 ? ARM::tLSLri : ARM::MOVsi;
2784	Register ResultReg;
2785	// MOVsi encodes shift and immediate in shift operand addressing mode.
2786	// The following condition has the same value when emitting two
2787	// instruction sequences: both are shifts.
2788	bool ImmIsSO = (Shift != ARM_AM::no_shift);
2789
2790	// Either one or two instructions are emitted.
2791	// They're always of the form:
2792	// dst = in OP imm
2793	// CPSR is set only by 16-bit Thumb instructions.
2794	// Predicate, if any, is AL.
2795	// S bit, if available, is always 0.
2796	// When two are emitted the first's result will feed as the second's input,
2797	// that value is then dead.
2798	unsigned NumInstrsEmitted = isSingleInstr ? `1` : `2`;
2799	for (unsigned Instr = `0`; Instr != NumInstrsEmitted; ++Instr) {
2800	ResultReg = createResultReg(RC);
2801	bool isLsl = (`0` == Instr) && !isSingleInstr;
2802	unsigned Opcode = isLsl ? LSLOpc : Opc;
2803	ARM_AM::ShiftOpc ShiftAM = isLsl ? ARM_AM::lsl : Shift;
2804	unsigned ImmEnc = ImmIsSO ? ARM_AM::getSORegOpc(ShOp: ShiftAM, Imm) : Imm;
2805	bool isKill = `1` == Instr;
2806	MachineInstrBuilder MIB = BuildMI(
2807	BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode), DestReg: ResultReg);
2808	if (setsCPSR)
2809	MIB.addReg(RegNo: ARM::CPSR, Flags: RegState::Define);
2810	SrcReg = constrainOperandRegClass(II: TII.get(Opcode), Op: SrcReg, OpNum: `1` + setsCPSR);
2811	MIB.addReg(RegNo: SrcReg, Flags: getKillRegState(B: isKill))
2812	.addImm(Val: ImmEnc)
2813	.add(MOs: predOps(Pred: ARMCC::AL));
2814	if (hasS)
2815	MIB.add(MO: condCodeOp());
2816	// Second instruction consumes the first's result.
2817	SrcReg = ResultReg;
2818	}
2819
2820	return ResultReg;
2821	}
2822
2823	bool ARMFastISel::SelectIntExt(const Instruction *I) {
2824	// On ARM, in general, integer casts don't involve legal types; this code
2825	// handles promotable integers.
2826	Type *DestTy = I->getType();
2827	Value *Src = I->getOperand(i: `0`);
2828	Type *SrcTy = Src->getType();
2829
2830	bool isZExt = isa<ZExtInst>(Val: I);
2831	Register SrcReg = getRegForValue(V: Src);
2832	if (!SrcReg) return false;
2833
2834	EVT SrcEVT, DestEVT;
2835	SrcEVT = TLI.getValueType(DL, Ty: SrcTy, AllowUnknown: true);
2836	DestEVT = TLI.getValueType(DL, Ty: DestTy, AllowUnknown: true);
2837	if (!SrcEVT.isSimple()) return false;
2838	if (!DestEVT.isSimple()) return false;
2839
2840	MVT SrcVT = SrcEVT.getSimpleVT();
2841	MVT DestVT = DestEVT.getSimpleVT();
2842	Register ResultReg = ARMEmitIntExt(SrcVT, SrcReg, DestVT, isZExt);
2843	if (!ResultReg)
2844	return false;
2845	updateValueMap(I, Reg: ResultReg);
2846	return true;
2847	}
2848
2849	bool ARMFastISel::SelectShift(const Instruction *I,
2850	ARM_AM::ShiftOpc ShiftTy) {
2851	// We handle thumb2 mode by target independent selector
2852	// or SelectionDAG ISel.
2853	if (isThumb2)
2854	return false;
2855
2856	// Only handle i32 now.
2857	EVT DestVT = TLI.getValueType(DL, Ty: I->getType(), AllowUnknown: true);
2858	if (DestVT != MVT::i32)
2859	return false;
2860
2861	unsigned Opc = ARM::MOVsr;
2862	unsigned ShiftImm;
2863	Value *Src2Value = I->getOperand(i: `1`);
2864	if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val: Src2Value)) {
2865	ShiftImm = CI->getZExtValue();
2866
2867	// Fall back to selection DAG isel if the shift amount
2868	// is zero or greater than the width of the value type.
2869	if (ShiftImm == `0` \|\| ShiftImm >=`32`)
2870	return false;
2871
2872	Opc = ARM::MOVsi;
2873	}
2874
2875	Value *Src1Value = I->getOperand(i: `0`);
2876	Register Reg1 = getRegForValue(V: Src1Value);
2877	if (!Reg1)
2878	return false;
2879
2880	Register Reg2;
2881	if (Opc == ARM::MOVsr) {
2882	Reg2 = getRegForValue(V: Src2Value);
2883	if (!Reg2)
2884	return false;
2885	}
2886
2887	Register ResultReg = createResultReg(RC: &ARM::GPRnopcRegClass);
2888	if (!ResultReg)
2889	return false;
2890
2891	MachineInstrBuilder MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
2892	MCID: TII.get(Opcode: Opc), DestReg: ResultReg)
2893	.addReg(RegNo: Reg1);
2894
2895	if (Opc == ARM::MOVsi)
2896	MIB.addImm(Val: ARM_AM::getSORegOpc(ShOp: ShiftTy, Imm: ShiftImm));
2897	else if (Opc == ARM::MOVsr) {
2898	MIB.addReg(RegNo: Reg2);
2899	MIB.addImm(Val: ARM_AM::getSORegOpc(ShOp: ShiftTy, Imm: `0`));
2900	}
2901
2902	AddOptionalDefs(MIB);
2903	updateValueMap(I, Reg: ResultReg);
2904	return true;
2905	}
2906
2907	// TODO: SoftFP support.
2908	bool ARMFastISel::fastSelectInstruction(const Instruction *I) {
2909	switch (I->getOpcode()) {
2910	case Instruction::Load:
2911	return SelectLoad(I);
2912	case Instruction::Store:
2913	return SelectStore(I);
2914	case Instruction::CondBr:
2915	return SelectBranch(I);
2916	case Instruction::IndirectBr:
2917	return SelectIndirectBr(I);
2918	case Instruction::ICmp:
2919	case Instruction::FCmp:
2920	return SelectCmp(I);
2921	case Instruction::FPExt:
2922	return SelectFPExt(I);
2923	case Instruction::FPTrunc:
2924	return SelectFPTrunc(I);
2925	case Instruction::SIToFP:
2926	return SelectIToFP(I, /isSigned/ true);
2927	case Instruction::UIToFP:
2928	return SelectIToFP(I, /isSigned/ false);
2929	case Instruction::FPToSI:
2930	return SelectFPToI(I, /isSigned/ true);
2931	case Instruction::FPToUI:
2932	return SelectFPToI(I, /isSigned/ false);
2933	case Instruction::Add:
2934	return SelectBinaryIntOp(I, ISDOpcode: ISD::ADD);
2935	case Instruction::Or:
2936	return SelectBinaryIntOp(I, ISDOpcode: ISD::OR);
2937	case Instruction::Sub:
2938	return SelectBinaryIntOp(I, ISDOpcode: ISD::SUB);
2939	case Instruction::FAdd:
2940	return SelectBinaryFPOp(I, ISDOpcode: ISD::FADD);
2941	case Instruction::FSub:
2942	return SelectBinaryFPOp(I, ISDOpcode: ISD::FSUB);
2943	case Instruction::FMul:
2944	return SelectBinaryFPOp(I, ISDOpcode: ISD::FMUL);
2945	case Instruction::SDiv:
2946	return SelectDiv(I, /isSigned/ true);
2947	case Instruction::UDiv:
2948	return SelectDiv(I, /isSigned/ false);
2949	case Instruction::SRem:
2950	return SelectRem(I, /isSigned/ true);
2951	case Instruction::URem:
2952	return SelectRem(I, /isSigned/ false);
2953	case Instruction::Call:
2954	if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I))
2955	return SelectIntrinsicCall(I: *II);
2956	return SelectCall(I);
2957	case Instruction::Select:
2958	return SelectSelect(I);
2959	case Instruction::Ret:
2960	return SelectRet(I);
2961	case Instruction::Trunc:
2962	return SelectTrunc(I);
2963	case Instruction::ZExt:
2964	case Instruction::SExt:
2965	return SelectIntExt(I);
2966	case Instruction::Shl:
2967	return SelectShift(I, ShiftTy: ARM_AM::lsl);
2968	case Instruction::LShr:
2969	return SelectShift(I, ShiftTy: ARM_AM::lsr);
2970	case Instruction::AShr:
2971	return SelectShift(I, ShiftTy: ARM_AM::asr);
2972	default: break;
2973	}
2974	return false;
2975	}
2976
2977	// This table describes sign- and zero-extend instructions which can be
2978	// folded into a preceding load. All of these extends have an immediate
2979	// (sometimes a mask and sometimes a shift) that's applied after
2980	// extension.
2981	static const struct FoldableLoadExtendsStruct {
2982	uint16_t Opc[`2`]; // ARM, Thumb.
2983	uint8_t ExpectedImm;
2984	uint8_t isZExt : `1`;
2985	uint8_t ExpectedVT : `7`;
2986	} FoldableLoadExtends[] = {
2987	{ .Opc: { ARM::SXTH, ARM::t2SXTH }, .ExpectedImm: `0`, .isZExt: `0`, .ExpectedVT: MVT::i16 },
2988	{ .Opc: { ARM::UXTH, ARM::t2UXTH }, .ExpectedImm: `0`, .isZExt: `1`, .ExpectedVT: MVT::i16 },
2989	{ .Opc: { ARM::ANDri, ARM::t2ANDri }, .ExpectedImm: `255`, .isZExt: `1`, .ExpectedVT: MVT::i8 },
2990	{ .Opc: { ARM::SXTB, ARM::t2SXTB }, .ExpectedImm: `0`, .isZExt: `0`, .ExpectedVT: MVT::i8 },
2991	{ .Opc: { ARM::UXTB, ARM::t2UXTB }, .ExpectedImm: `0`, .isZExt: `1`, .ExpectedVT: MVT::i8 }
2992	};
2993
2994	/// The specified machine instr operand is a vreg, and that
2995	/// vreg is being provided by the specified load instruction. If possible,
2996	/// try to fold the load as an operand to the instruction, returning true if
2997	/// successful.
2998	bool ARMFastISel::tryToFoldLoadIntoMI(MachineInstr MI, unsigned* OpNo,
2999	const LoadInst *LI) {
3000	// Verify we have a legal type before going any further.
3001	MVT VT;
3002	if (!isLoadTypeLegal(Ty: LI->getType(), VT))
3003	return false;
3004
3005	// Combine load followed by zero- or sign-extend.
3006	// ldrb r1, [r0] ldrb r1, [r0]
3007	// uxtb r2, r1 =>
3008	// mov r3, r2 mov r3, r1
3009	if (MI->getNumOperands() < `3` \|\| !MI->getOperand(i: `2`).isImm())
3010	return false;
3011	const uint64_t Imm = MI->getOperand(i: `2`).getImm();
3012
3013	bool Found = false;
3014	bool isZExt;
3015	for (const FoldableLoadExtendsStruct &FLE : FoldableLoadExtends) {
3016	if (FLE.Opc[isThumb2] == MI->getOpcode() &&
3017	(uint64_t)FLE.ExpectedImm == Imm &&
3018	MVT ((MVT::SimpleValueType)FLE.ExpectedVT) == VT) {
3019	Found = true;
3020	isZExt = FLE.isZExt;
3021	}
3022	}
3023	if (!Found) return false;
3024
3025	// See if we can handle this address.
3026	Address Addr;
3027	if (!ARMComputeAddress(Obj: LI->getOperand(i_nocapture: `0`), Addr)) return false;
3028
3029	Register ResultReg = MI->getOperand(i: `0`).getReg();
3030	if (!ARMEmitLoad(VT, ResultReg, Addr, Alignment: LI->getAlign(), isZExt, allocReg: false))
3031	return false;
3032	MachineBasicBlock::iterator I(MI);
3033	removeDeadCode(I, E: std::next(x: I));
3034	return true;
3035	}
3036
3037	Register ARMFastISel::ARMLowerPICELF(const GlobalValue *GV, MVT VT) {
3038	bool UseGOT_PREL = !GV->isDSOLocal();
3039	LLVMContext *Context = &MF->getFunction().getContext();
3040	unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3041	unsigned PCAdj = Subtarget->isThumb() ? `4` : `8`;
3042	ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(
3043	C: GV, ID: ARMPCLabelIndex, Kind: ARMCP::CPValue, PCAdj,
3044	Modifier: UseGOT_PREL ? ARMCP::GOT_PREL : ARMCP::no_modifier,
3045	/AddCurrentAddress=/UseGOT_PREL);
3046
3047	Align ConstAlign =
3048	MF->getDataLayout().getPrefTypeAlign(Ty: PointerType::get(C&: *Context, AddressSpace: `0`));
3049	unsigned Idx = MF->getConstantPool()->getConstantPoolIndex(V: CPV, Alignment: ConstAlign);
3050	MachineMemOperand *CPMMO =
3051	MF->getMachineMemOperand(PtrInfo: MachinePointerInfo::getConstantPool(MF&: *MF),
3052	F: MachineMemOperand::MOLoad, Size: `4`, BaseAlignment: Align (`4`));
3053
3054	Register TempReg = MF->getRegInfo().createVirtualRegister(RegClass: &ARM::rGPRRegClass);
3055	unsigned Opc = isThumb2 ? ARM::t2LDRpci : ARM::LDRcp;
3056	MachineInstrBuilder MIB =
3057	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg: TempReg)
3058	.addConstantPoolIndex(Idx)
3059	.addMemOperand(MMO: CPMMO);
3060	if (Opc == ARM::LDRcp)
3061	MIB.addImm(Val: `0`);
3062	MIB.add(MOs: predOps(Pred: ARMCC::AL));
3063
3064	// Fix the address by adding pc.
3065	Register DestReg = createResultReg(RC: TLI.getRegClassFor(VT));
3066	Opc = Subtarget->isThumb() ? ARM::tPICADD : UseGOT_PREL ? ARM::PICLDR
3067	: ARM::PICADD;
3068	DestReg = constrainOperandRegClass(II: TII.get(Opcode: Opc), Op: DestReg, OpNum: `0`);
3069	MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg)
3070	.addReg(RegNo: TempReg)
3071	.addImm(Val: ARMPCLabelIndex);
3072
3073	if (!Subtarget->isThumb())
3074	MIB.add(MOs: predOps(Pred: ARMCC::AL));
3075
3076	if (UseGOT_PREL && Subtarget->isThumb()) {
3077	Register NewDestReg = createResultReg(RC: TLI.getRegClassFor(VT));
3078	MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
3079	MCID: TII.get(Opcode: ARM::t2LDRi12), DestReg: NewDestReg)
3080	.addReg(RegNo: DestReg)
3081	.addImm(Val: `0`);
3082	DestReg = NewDestReg;
3083	AddOptionalDefs(MIB);
3084	}
3085	return DestReg;
3086	}
3087
3088	bool ARMFastISel::fastLowerArguments() {
3089	if (!FuncInfo.CanLowerReturn)
3090	return false;
3091
3092	const Function *F = FuncInfo.Fn;
3093	if (F->isVarArg())
3094	return false;
3095
3096	CallingConv::ID CC = F->getCallingConv();
3097	switch (CC) {
3098	default:
3099	return false;
3100	case CallingConv::Fast:
3101	case CallingConv::C:
3102	case CallingConv::ARM_AAPCS_VFP:
3103	case CallingConv::ARM_AAPCS:
3104	case CallingConv::ARM_APCS:
3105	case CallingConv::Swift:
3106	case CallingConv::SwiftTail:
3107	break;
3108	}
3109
3110	// Only handle simple cases. i.e. Up to 4 i8/i16/i32 scalar arguments
3111	// which are passed in r0 - r3.
3112	for (const Argument &Arg : F->args()) {
3113	if (Arg.getArgNo() >= `4`)
3114	return false;
3115
3116	if (Arg.hasAttribute(Kind: Attribute::InReg) \|\|
3117	Arg.hasAttribute(Kind: Attribute::StructRet) \|\|
3118	Arg.hasAttribute(Kind: Attribute::SwiftSelf) \|\|
3119	Arg.hasAttribute(Kind: Attribute::SwiftError) \|\|
3120	Arg.hasAttribute(Kind: Attribute::ByVal))
3121	return false;
3122
3123	Type *ArgTy = Arg.getType();
3124	if (ArgTy->isStructTy() \|\| ArgTy->isArrayTy() \|\| ArgTy->isVectorTy())
3125	return false;
3126
3127	EVT ArgVT = TLI.getValueType(DL, Ty: ArgTy);
3128	if (!ArgVT.isSimple()) return false;
3129	switch (ArgVT.getSimpleVT().SimpleTy) {
3130	case MVT::i8:
3131	case MVT::i16:
3132	case MVT::i32:
3133	break;
3134	default:
3135	return false;
3136	}
3137	}
3138
3139	static const MCPhysReg GPRArgRegs[] = {
3140	ARM::R0, ARM::R1, ARM::R2, ARM::R3
3141	};
3142
3143	const TargetRegisterClass *RC = &ARM::rGPRRegClass;
3144	for (const Argument &Arg : F->args()) {
3145	unsigned ArgNo = Arg.getArgNo();
3146	MCRegister SrcReg = GPRArgRegs[ArgNo];
3147	Register DstReg = FuncInfo.MF->addLiveIn(PReg: SrcReg, RC);
3148	// FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
3149	// Without this, EmitLiveInCopies may eliminate the livein if its only
3150	// use is a bitcast (which isn't turned into an instruction).
3151	Register ResultReg = createResultReg(RC);
3152	BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
3153	MCID: TII.get(Opcode: TargetOpcode::COPY),
3154	DestReg: ResultReg).addReg(RegNo: DstReg, Flags: getKillRegState(B: true));
3155	updateValueMap(I: &Arg, Reg: ResultReg);
3156	}
3157
3158	return true;
3159	}
3160
3161	namespace llvm {
3162
3163	FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo,
3164	const TargetLibraryInfo *libInfo,
3165	const LibcallLoweringInfo *libcallLowering) {
3166	if (funcInfo.MF->getSubtarget<ARMSubtarget>().useFastISel())
3167	return new ARMFastISel (funcInfo, libInfo, libcallLowering);
3168
3169	return nullptr;
3170	}
3171
3172	} // end namespace llvm
3173

Browse the source code of llvm_projects/llvm/lib/Target/ARM/ARMFastISel.cpp