1//===- ARMFastISel.cpp - ARM FastISel implementation ----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the ARM-specific support for the FastISel class. Some
10// of the target-specific code is generated by tablegen in the file
11// ARMGenFastISel.inc, which is #included here.
12//
13//===----------------------------------------------------------------------===//
14
15#include "ARM.h"
16#include "ARMBaseInstrInfo.h"
17#include "ARMBaseRegisterInfo.h"
18#include "ARMCallingConv.h"
19#include "ARMConstantPoolValue.h"
20#include "ARMISelLowering.h"
21#include "ARMMachineFunctionInfo.h"
22#include "ARMSubtarget.h"
23#include "ARMTargetMachine.h"
24#include "MCTargetDesc/ARMAddressingModes.h"
25#include "MCTargetDesc/ARMBaseInfo.h"
26#include "Utils/ARMBaseInfo.h"
27#include "llvm/ADT/APFloat.h"
28#include "llvm/ADT/APInt.h"
29#include "llvm/ADT/DenseMap.h"
30#include "llvm/ADT/SmallVector.h"
31#include "llvm/CodeGen/CallingConvLower.h"
32#include "llvm/CodeGen/FastISel.h"
33#include "llvm/CodeGen/FunctionLoweringInfo.h"
34#include "llvm/CodeGen/ISDOpcodes.h"
35#include "llvm/CodeGen/MachineBasicBlock.h"
36#include "llvm/CodeGen/MachineConstantPool.h"
37#include "llvm/CodeGen/MachineFrameInfo.h"
38#include "llvm/CodeGen/MachineFunction.h"
39#include "llvm/CodeGen/MachineInstr.h"
40#include "llvm/CodeGen/MachineInstrBuilder.h"
41#include "llvm/CodeGen/MachineMemOperand.h"
42#include "llvm/CodeGen/MachineOperand.h"
43#include "llvm/CodeGen/MachineRegisterInfo.h"
44#include "llvm/CodeGen/TargetInstrInfo.h"
45#include "llvm/CodeGen/TargetLowering.h"
46#include "llvm/CodeGen/TargetOpcodes.h"
47#include "llvm/CodeGen/TargetRegisterInfo.h"
48#include "llvm/CodeGen/ValueTypes.h"
49#include "llvm/CodeGenTypes/MachineValueType.h"
50#include "llvm/IR/Argument.h"
51#include "llvm/IR/Attributes.h"
52#include "llvm/IR/CallingConv.h"
53#include "llvm/IR/Constant.h"
54#include "llvm/IR/Constants.h"
55#include "llvm/IR/DataLayout.h"
56#include "llvm/IR/DerivedTypes.h"
57#include "llvm/IR/Function.h"
58#include "llvm/IR/GetElementPtrTypeIterator.h"
59#include "llvm/IR/GlobalValue.h"
60#include "llvm/IR/GlobalVariable.h"
61#include "llvm/IR/InstrTypes.h"
62#include "llvm/IR/Instruction.h"
63#include "llvm/IR/Instructions.h"
64#include "llvm/IR/IntrinsicInst.h"
65#include "llvm/IR/Intrinsics.h"
66#include "llvm/IR/Module.h"
67#include "llvm/IR/Operator.h"
68#include "llvm/IR/Type.h"
69#include "llvm/IR/User.h"
70#include "llvm/IR/Value.h"
71#include "llvm/MC/MCInstrDesc.h"
72#include "llvm/Support/Casting.h"
73#include "llvm/Support/Compiler.h"
74#include "llvm/Support/ErrorHandling.h"
75#include "llvm/Support/MathExtras.h"
76#include "llvm/Target/TargetMachine.h"
77#include "llvm/Target/TargetOptions.h"
78#include <cassert>
79#include <cstdint>
80#include <utility>
81
82using namespace llvm;
83
84namespace {
85
86 // All possible address modes, plus some.
87class Address {
88public:
89 enum BaseKind { RegBase, FrameIndexBase };
90
91private:
92 BaseKind Kind = RegBase;
93 union {
94 unsigned Reg;
95 int FI;
96 } Base;
97
98 int Offset = 0;
99
100public:
101 // Innocuous defaults for our address.
102 Address() { Base.Reg = 0; }
103
104 void setKind(BaseKind K) { Kind = K; }
105 BaseKind getKind() const { return Kind; }
106 bool isRegBase() const { return Kind == RegBase; }
107 bool isFIBase() const { return Kind == FrameIndexBase; }
108
109 void setReg(Register Reg) {
110 assert(isRegBase() && "Invalid base register access!");
111 Base.Reg = Reg.id();
112 }
113
114 Register getReg() const {
115 assert(isRegBase() && "Invalid base register access!");
116 return Base.Reg;
117 }
118
119 void setFI(int FI) {
120 assert(isFIBase() && "Invalid base frame index access!");
121 Base.FI = FI;
122 }
123
124 int getFI() const {
125 assert(isFIBase() && "Invalid base frame index access!");
126 return Base.FI;
127 }
128
129 void setOffset(int O) { Offset = O; }
130 int getOffset() { return Offset; }
131};
132
133class ARMFastISel final : public FastISel {
134 /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
135 /// make the right decision when generating code for different targets.
136 const ARMSubtarget *Subtarget;
137 Module &M;
138 const ARMBaseInstrInfo &TII;
139 const ARMTargetLowering &TLI;
140 const ARMBaseTargetMachine &TM;
141 ARMFunctionInfo *AFI;
142
143 // Convenience variables to avoid some queries.
144 bool isThumb2;
145 LLVMContext *Context;
146
147 public:
148 explicit ARMFastISel(FunctionLoweringInfo &funcInfo,
149 const TargetLibraryInfo *libInfo,
150 const LibcallLoweringInfo *libcallLowering)
151 : FastISel(funcInfo, libInfo, libcallLowering),
152 Subtarget(&funcInfo.MF->getSubtarget<ARMSubtarget>()),
153 M(const_cast<Module &>(*funcInfo.Fn->getParent())),
154 TII(*Subtarget->getInstrInfo()), TLI(*Subtarget->getTargetLowering()),
155 TM(TLI.getTM()) {
156 AFI = funcInfo.MF->getInfo<ARMFunctionInfo>();
157 isThumb2 = AFI->isThumbFunction();
158 Context = &funcInfo.Fn->getContext();
159 }
160
161 private:
162 // Code from FastISel.cpp.
163
164 Register fastEmitInst_r(unsigned MachineInstOpcode,
165 const TargetRegisterClass *RC, Register Op0);
166 Register fastEmitInst_rr(unsigned MachineInstOpcode,
167 const TargetRegisterClass *RC, Register Op0,
168 Register Op1);
169 Register fastEmitInst_ri(unsigned MachineInstOpcode,
170 const TargetRegisterClass *RC, Register Op0,
171 uint64_t Imm);
172 Register fastEmitInst_i(unsigned MachineInstOpcode,
173 const TargetRegisterClass *RC, uint64_t Imm);
174
175 // Backend specific FastISel code.
176
177 bool fastSelectInstruction(const Instruction *I) override;
178 Register fastMaterializeConstant(const Constant *C) override;
179 Register fastMaterializeAlloca(const AllocaInst *AI) override;
180 bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
181 const LoadInst *LI) override;
182 bool fastLowerArguments() override;
183
184#include "ARMGenFastISel.inc"
185
186 // Instruction selection routines.
187
188 bool SelectLoad(const Instruction *I);
189 bool SelectStore(const Instruction *I);
190 bool SelectBranch(const Instruction *I);
191 bool SelectIndirectBr(const Instruction *I);
192 bool SelectCmp(const Instruction *I);
193 bool SelectFPExt(const Instruction *I);
194 bool SelectFPTrunc(const Instruction *I);
195 bool SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode);
196 bool SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode);
197 bool SelectIToFP(const Instruction *I, bool isSigned);
198 bool SelectFPToI(const Instruction *I, bool isSigned);
199 bool SelectDiv(const Instruction *I, bool isSigned);
200 bool SelectRem(const Instruction *I, bool isSigned);
201 bool SelectCall(const Instruction *I, const char *IntrMemName);
202 bool SelectIntrinsicCall(const IntrinsicInst &I);
203 bool SelectSelect(const Instruction *I);
204 bool SelectRet(const Instruction *I);
205 bool SelectTrunc(const Instruction *I);
206 bool SelectIntExt(const Instruction *I);
207 bool SelectShift(const Instruction *I, ARM_AM::ShiftOpc ShiftTy);
208
209 // Utility routines.
210
211 bool isPositionIndependent() const;
212 bool isTypeLegal(Type *Ty, MVT &VT);
213 bool isLoadTypeLegal(Type *Ty, MVT &VT);
214 bool ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
215 bool isZExt);
216 bool ARMEmitLoad(MVT VT, Register &ResultReg, Address &Addr,
217 MaybeAlign Alignment = std::nullopt, bool isZExt = true,
218 bool allocReg = true);
219 bool ARMEmitStore(MVT VT, Register SrcReg, Address &Addr,
220 MaybeAlign Alignment = std::nullopt);
221 bool ARMComputeAddress(const Value *Obj, Address &Addr);
222 void ARMSimplifyAddress(Address &Addr, MVT VT, bool useAM3);
223 bool ARMIsMemCpySmall(uint64_t Len);
224 bool ARMTryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
225 MaybeAlign Alignment);
226 Register ARMEmitIntExt(MVT SrcVT, Register SrcReg, MVT DestVT, bool isZExt);
227 Register ARMMaterializeFP(const ConstantFP *CFP, MVT VT);
228 Register ARMMaterializeInt(const Constant *C, MVT VT);
229 Register ARMMaterializeGV(const GlobalValue *GV, MVT VT);
230 Register ARMMoveToFPReg(MVT VT, Register SrcReg);
231 Register ARMMoveToIntReg(MVT VT, Register SrcReg);
232 unsigned ARMSelectCallOp(bool UseReg);
233 Register ARMLowerPICELF(const GlobalValue *GV, MVT VT);
234
235 const TargetLowering *getTargetLowering() { return &TLI; }
236
237 // Call handling routines.
238
239 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC,
240 bool Return,
241 bool isVarArg);
242 bool ProcessCallArgs(SmallVectorImpl<Value*> &Args,
243 SmallVectorImpl<Register> &ArgRegs,
244 SmallVectorImpl<MVT> &ArgVTs,
245 SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
246 SmallVectorImpl<Register> &RegArgs,
247 CallingConv::ID CC,
248 unsigned &NumBytes,
249 bool isVarArg);
250 Register getLibcallReg(const Twine &Name);
251 bool FinishCall(MVT RetVT, SmallVectorImpl<Register> &UsedRegs,
252 const Instruction *I, CallingConv::ID CC,
253 unsigned &NumBytes, bool isVarArg);
254 bool ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call);
255
256 // OptionalDef handling routines.
257
258 bool isARMNEONPred(const MachineInstr *MI);
259 bool DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR);
260 const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB);
261 void AddLoadStoreOperands(MVT VT, Address &Addr,
262 const MachineInstrBuilder &MIB,
263 MachineMemOperand::Flags Flags, bool useAM3);
264};
265
266} // end anonymous namespace
267
268// DefinesOptionalPredicate - This is different from DefinesPredicate in that
269// we don't care about implicit defs here, just places we'll need to add a
270// default CCReg argument. Sets CPSR if we're setting CPSR instead of CCR.
271bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) {
272 if (!MI->hasOptionalDef())
273 return false;
274
275 // Look to see if our OptionalDef is defining CPSR or CCR.
276 for (const MachineOperand &MO : MI->operands()) {
277 if (!MO.isReg() || !MO.isDef()) continue;
278 if (MO.getReg() == ARM::CPSR)
279 *CPSR = true;
280 }
281 return true;
282}
283
284bool ARMFastISel::isARMNEONPred(const MachineInstr *MI) {
285 const MCInstrDesc &MCID = MI->getDesc();
286
287 // If we're a thumb2 or not NEON function we'll be handled via isPredicable.
288 if ((MCID.TSFlags & ARMII::DomainMask) != ARMII::DomainNEON ||
289 AFI->isThumb2Function())
290 return MI->isPredicable();
291
292 for (const MCOperandInfo &opInfo : MCID.operands())
293 if (opInfo.isPredicate())
294 return true;
295
296 return false;
297}
298
299// If the machine is predicable go ahead and add the predicate operands, if
300// it needs default CC operands add those.
301// TODO: If we want to support thumb1 then we'll need to deal with optional
302// CPSR defs that need to be added before the remaining operands. See s_cc_out
303// for descriptions why.
304const MachineInstrBuilder &
305ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) {
306 MachineInstr *MI = &*MIB;
307
308 // Do we use a predicate? or...
309 // Are we NEON in ARM mode and have a predicate operand? If so, I know
310 // we're not predicable but add it anyways.
311 if (isARMNEONPred(MI))
312 MIB.add(MOs: predOps(Pred: ARMCC::AL));
313
314 // Do we optionally set a predicate? Preds is size > 0 iff the predicate
315 // defines CPSR. All other OptionalDefines in ARM are the CCR register.
316 bool CPSR = false;
317 if (DefinesOptionalPredicate(MI, CPSR: &CPSR))
318 MIB.add(MO: CPSR ? t1CondCodeOp() : condCodeOp());
319 return MIB;
320}
321
322Register ARMFastISel::fastEmitInst_r(unsigned MachineInstOpcode,
323 const TargetRegisterClass *RC,
324 Register Op0) {
325 Register ResultReg = createResultReg(RC);
326 const MCInstrDesc &II = TII.get(Opcode: MachineInstOpcode);
327
328 // Make sure the input operand is sufficiently constrained to be legal
329 // for this instruction.
330 Op0 = constrainOperandRegClass(II, Op: Op0, OpNum: 1);
331 if (II.getNumDefs() >= 1) {
332 AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II,
333 DestReg: ResultReg).addReg(RegNo: Op0));
334 } else {
335 AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II)
336 .addReg(RegNo: Op0));
337 AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
338 MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: ResultReg)
339 .addReg(RegNo: II.implicit_defs()[0]));
340 }
341 return ResultReg;
342}
343
344Register ARMFastISel::fastEmitInst_rr(unsigned MachineInstOpcode,
345 const TargetRegisterClass *RC,
346 Register Op0, Register Op1) {
347 Register ResultReg = createResultReg(RC);
348 const MCInstrDesc &II = TII.get(Opcode: MachineInstOpcode);
349
350 // Make sure the input operands are sufficiently constrained to be legal
351 // for this instruction.
352 Op0 = constrainOperandRegClass(II, Op: Op0, OpNum: 1);
353 Op1 = constrainOperandRegClass(II, Op: Op1, OpNum: 2);
354
355 if (II.getNumDefs() >= 1) {
356 AddOptionalDefs(
357 MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II, DestReg: ResultReg)
358 .addReg(RegNo: Op0)
359 .addReg(RegNo: Op1));
360 } else {
361 AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II)
362 .addReg(RegNo: Op0)
363 .addReg(RegNo: Op1));
364 AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
365 MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: ResultReg)
366 .addReg(RegNo: II.implicit_defs()[0]));
367 }
368 return ResultReg;
369}
370
371Register ARMFastISel::fastEmitInst_ri(unsigned MachineInstOpcode,
372 const TargetRegisterClass *RC,
373 Register Op0, uint64_t Imm) {
374 Register ResultReg = createResultReg(RC);
375 const MCInstrDesc &II = TII.get(Opcode: MachineInstOpcode);
376
377 // Make sure the input operand is sufficiently constrained to be legal
378 // for this instruction.
379 Op0 = constrainOperandRegClass(II, Op: Op0, OpNum: 1);
380 if (II.getNumDefs() >= 1) {
381 AddOptionalDefs(
382 MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II, DestReg: ResultReg)
383 .addReg(RegNo: Op0)
384 .addImm(Val: Imm));
385 } else {
386 AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II)
387 .addReg(RegNo: Op0)
388 .addImm(Val: Imm));
389 AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
390 MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: ResultReg)
391 .addReg(RegNo: II.implicit_defs()[0]));
392 }
393 return ResultReg;
394}
395
396Register ARMFastISel::fastEmitInst_i(unsigned MachineInstOpcode,
397 const TargetRegisterClass *RC,
398 uint64_t Imm) {
399 Register ResultReg = createResultReg(RC);
400 const MCInstrDesc &II = TII.get(Opcode: MachineInstOpcode);
401
402 if (II.getNumDefs() >= 1) {
403 AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II,
404 DestReg: ResultReg).addImm(Val: Imm));
405 } else {
406 AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II)
407 .addImm(Val: Imm));
408 AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
409 MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: ResultReg)
410 .addReg(RegNo: II.implicit_defs()[0]));
411 }
412 return ResultReg;
413}
414
415// TODO: Don't worry about 64-bit now, but when this is fixed remove the
416// checks from the various callers.
417Register ARMFastISel::ARMMoveToFPReg(MVT VT, Register SrcReg) {
418 if (VT == MVT::f64)
419 return Register();
420
421 Register MoveReg = createResultReg(RC: TLI.getRegClassFor(VT));
422 AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
423 MCID: TII.get(Opcode: ARM::VMOVSR), DestReg: MoveReg)
424 .addReg(RegNo: SrcReg));
425 return MoveReg;
426}
427
428Register ARMFastISel::ARMMoveToIntReg(MVT VT, Register SrcReg) {
429 if (VT == MVT::i64)
430 return Register();
431
432 Register MoveReg = createResultReg(RC: TLI.getRegClassFor(VT));
433 AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
434 MCID: TII.get(Opcode: ARM::VMOVRS), DestReg: MoveReg)
435 .addReg(RegNo: SrcReg));
436 return MoveReg;
437}
438
439// For double width floating point we need to materialize two constants
440// (the high and the low) into integer registers then use a move to get
441// the combined constant into an FP reg.
442Register ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, MVT VT) {
443 if (VT != MVT::f32 && VT != MVT::f64)
444 return Register();
445
446 const APFloat Val = CFP->getValueAPF();
447 bool is64bit = VT == MVT::f64;
448
449 // This checks to see if we can use VFP3 instructions to materialize
450 // a constant, otherwise we have to go through the constant pool.
451 if (TLI.isFPImmLegal(Imm: Val, VT)) {
452 int Imm;
453 unsigned Opc;
454 if (is64bit) {
455 Imm = ARM_AM::getFP64Imm(FPImm: Val);
456 Opc = ARM::FCONSTD;
457 } else {
458 Imm = ARM_AM::getFP32Imm(FPImm: Val);
459 Opc = ARM::FCONSTS;
460 }
461 Register DestReg = createResultReg(RC: TLI.getRegClassFor(VT));
462 AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
463 MCID: TII.get(Opcode: Opc), DestReg).addImm(Val: Imm));
464 return DestReg;
465 }
466
467 // Require VFP2 for loading fp constants.
468 if (!Subtarget->hasVFP2Base()) return false;
469
470 // MachineConstantPool wants an explicit alignment.
471 Align Alignment = DL.getPrefTypeAlign(Ty: CFP->getType());
472 unsigned Idx = MCP.getConstantPoolIndex(C: cast<Constant>(Val: CFP), Alignment);
473 Register DestReg = createResultReg(RC: TLI.getRegClassFor(VT));
474 unsigned Opc = is64bit ? ARM::VLDRD : ARM::VLDRS;
475
476 // The extra reg is for addrmode5.
477 AddOptionalDefs(
478 MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg)
479 .addConstantPoolIndex(Idx)
480 .addReg(RegNo: 0));
481 return DestReg;
482}
483
484Register ARMFastISel::ARMMaterializeInt(const Constant *C, MVT VT) {
485 if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 && VT != MVT::i1)
486 return Register();
487
488 // If we can do this in a single instruction without a constant pool entry
489 // do so now.
490 const ConstantInt *CI = cast<ConstantInt>(Val: C);
491 if (Subtarget->hasV6T2Ops() && isUInt<16>(x: CI->getZExtValue())) {
492 unsigned Opc = isThumb2 ? ARM::t2MOVi16 : ARM::MOVi16;
493 const TargetRegisterClass *RC = isThumb2 ? &ARM::rGPRRegClass :
494 &ARM::GPRRegClass;
495 Register ImmReg = createResultReg(RC);
496 AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
497 MCID: TII.get(Opcode: Opc), DestReg: ImmReg)
498 .addImm(Val: CI->getZExtValue()));
499 return ImmReg;
500 }
501
502 // Use MVN to emit negative constants.
503 if (VT == MVT::i32 && Subtarget->hasV6T2Ops() && CI->isNegative()) {
504 unsigned Imm = (unsigned)~(CI->getSExtValue());
505 bool UseImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Arg: Imm) != -1) :
506 (ARM_AM::getSOImmVal(Arg: Imm) != -1);
507 if (UseImm) {
508 unsigned Opc = isThumb2 ? ARM::t2MVNi : ARM::MVNi;
509 const TargetRegisterClass *RC = isThumb2 ? &ARM::rGPRRegClass :
510 &ARM::GPRRegClass;
511 Register ImmReg = createResultReg(RC);
512 AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
513 MCID: TII.get(Opcode: Opc), DestReg: ImmReg)
514 .addImm(Val: Imm));
515 return ImmReg;
516 }
517 }
518
519 Register ResultReg;
520 if (Subtarget->useMovt())
521 ResultReg = fastEmit_i(VT, RetVT: VT, Opcode: ISD::Constant, imm0: CI->getZExtValue());
522
523 if (ResultReg)
524 return ResultReg;
525
526 // Load from constant pool. For now 32-bit only.
527 if (VT != MVT::i32)
528 return Register();
529
530 // MachineConstantPool wants an explicit alignment.
531 Align Alignment = DL.getPrefTypeAlign(Ty: C->getType());
532 unsigned Idx = MCP.getConstantPoolIndex(C, Alignment);
533 ResultReg = createResultReg(RC: TLI.getRegClassFor(VT));
534 if (isThumb2)
535 AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
536 MCID: TII.get(Opcode: ARM::t2LDRpci), DestReg: ResultReg)
537 .addConstantPoolIndex(Idx));
538 else {
539 // The extra immediate is for addrmode2.
540 ResultReg = constrainOperandRegClass(II: TII.get(Opcode: ARM::LDRcp), Op: ResultReg, OpNum: 0);
541 AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
542 MCID: TII.get(Opcode: ARM::LDRcp), DestReg: ResultReg)
543 .addConstantPoolIndex(Idx)
544 .addImm(Val: 0));
545 }
546 return ResultReg;
547}
548
549bool ARMFastISel::isPositionIndependent() const {
550 return TLI.isPositionIndependent();
551}
552
553Register ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) {
554 // For now 32-bit only.
555 if (VT != MVT::i32 || GV->isThreadLocal())
556 return Register();
557
558 // ROPI/RWPI not currently supported.
559 if (Subtarget->isROPI() || Subtarget->isRWPI())
560 return Register();
561
562 bool IsIndirect = Subtarget->isGVIndirectSymbol(GV);
563 const TargetRegisterClass *RC = isThumb2 ? &ARM::rGPRRegClass
564 : &ARM::GPRRegClass;
565 Register DestReg = createResultReg(RC);
566
567 // FastISel TLS support on non-MachO is broken, punt to SelectionDAG.
568 const GlobalVariable *GVar = dyn_cast<GlobalVariable>(Val: GV);
569 bool IsThreadLocal = GVar && GVar->isThreadLocal();
570 if (!Subtarget->isTargetMachO() && IsThreadLocal)
571 return Register();
572
573 bool IsPositionIndependent = isPositionIndependent();
574 // Use movw+movt when possible, it avoids constant pool entries.
575 // Non-darwin targets only support static movt relocations in FastISel.
576 if (Subtarget->useMovt() &&
577 (Subtarget->isTargetMachO() || !IsPositionIndependent)) {
578 unsigned Opc;
579 unsigned char TF = 0;
580 if (Subtarget->isTargetMachO())
581 TF = ARMII::MO_NONLAZY;
582
583 if (IsPositionIndependent)
584 Opc = isThumb2 ? ARM::t2MOV_ga_pcrel : ARM::MOV_ga_pcrel;
585 else
586 Opc = isThumb2 ? ARM::t2MOVi32imm : ARM::MOVi32imm;
587 AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
588 MCID: TII.get(Opcode: Opc), DestReg).addGlobalAddress(GV, Offset: 0, TargetFlags: TF));
589 } else {
590 // MachineConstantPool wants an explicit alignment.
591 Align Alignment = DL.getPrefTypeAlign(Ty: GV->getType());
592
593 if (Subtarget->isTargetELF() && IsPositionIndependent)
594 return ARMLowerPICELF(GV, VT);
595
596 // Grab index.
597 unsigned PCAdj = IsPositionIndependent ? (Subtarget->isThumb() ? 4 : 8) : 0;
598 unsigned Id = AFI->createPICLabelUId();
599 ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(C: GV, ID: Id,
600 Kind: ARMCP::CPValue,
601 PCAdj);
602 unsigned Idx = MCP.getConstantPoolIndex(V: CPV, Alignment);
603
604 // Load value.
605 MachineInstrBuilder MIB;
606 if (isThumb2) {
607 unsigned Opc = IsPositionIndependent ? ARM::t2LDRpci_pic : ARM::t2LDRpci;
608 MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc),
609 DestReg).addConstantPoolIndex(Idx);
610 if (IsPositionIndependent)
611 MIB.addImm(Val: Id);
612 AddOptionalDefs(MIB);
613 } else {
614 // The extra immediate is for addrmode2.
615 DestReg = constrainOperandRegClass(II: TII.get(Opcode: ARM::LDRcp), Op: DestReg, OpNum: 0);
616 MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
617 MCID: TII.get(Opcode: ARM::LDRcp), DestReg)
618 .addConstantPoolIndex(Idx)
619 .addImm(Val: 0);
620 AddOptionalDefs(MIB);
621
622 if (IsPositionIndependent) {
623 unsigned Opc = IsIndirect ? ARM::PICLDR : ARM::PICADD;
624 Register NewDestReg = createResultReg(RC: TLI.getRegClassFor(VT));
625
626 MachineInstrBuilder MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt,
627 MIMD, MCID: TII.get(Opcode: Opc), DestReg: NewDestReg)
628 .addReg(RegNo: DestReg)
629 .addImm(Val: Id);
630 AddOptionalDefs(MIB);
631 return NewDestReg;
632 }
633 }
634 }
635
636 if ((Subtarget->isTargetELF() && Subtarget->isGVInGOT(GV)) ||
637 (Subtarget->isTargetMachO() && IsIndirect)) {
638 MachineInstrBuilder MIB;
639 Register NewDestReg = createResultReg(RC: TLI.getRegClassFor(VT));
640 if (isThumb2)
641 MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
642 MCID: TII.get(Opcode: ARM::t2LDRi12), DestReg: NewDestReg)
643 .addReg(RegNo: DestReg)
644 .addImm(Val: 0);
645 else
646 MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
647 MCID: TII.get(Opcode: ARM::LDRi12), DestReg: NewDestReg)
648 .addReg(RegNo: DestReg)
649 .addImm(Val: 0);
650 DestReg = NewDestReg;
651 AddOptionalDefs(MIB);
652 }
653
654 return DestReg;
655}
656
657Register ARMFastISel::fastMaterializeConstant(const Constant *C) {
658 EVT CEVT = TLI.getValueType(DL, Ty: C->getType(), AllowUnknown: true);
659
660 // Only handle simple types.
661 if (!CEVT.isSimple())
662 return Register();
663 MVT VT = CEVT.getSimpleVT();
664
665 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(Val: C))
666 return ARMMaterializeFP(CFP, VT);
667 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(Val: C))
668 return ARMMaterializeGV(GV, VT);
669 else if (isa<ConstantInt>(Val: C))
670 return ARMMaterializeInt(C, VT);
671
672 return Register();
673}
674
675// TODO: Register ARMFastISel::TargetMaterializeFloatZero(const ConstantFP *CF);
676
677Register ARMFastISel::fastMaterializeAlloca(const AllocaInst *AI) {
678 // Don't handle dynamic allocas.
679 if (!FuncInfo.StaticAllocaMap.count(Val: AI))
680 return Register();
681
682 MVT VT;
683 if (!isLoadTypeLegal(Ty: AI->getType(), VT))
684 return Register();
685
686 DenseMap<const AllocaInst*, int>::iterator SI =
687 FuncInfo.StaticAllocaMap.find(Val: AI);
688
689 // This will get lowered later into the correct offsets and registers
690 // via rewriteXFrameIndex.
691 if (SI != FuncInfo.StaticAllocaMap.end()) {
692 unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri;
693 const TargetRegisterClass* RC = TLI.getRegClassFor(VT);
694 Register ResultReg = createResultReg(RC);
695 ResultReg = constrainOperandRegClass(II: TII.get(Opcode: Opc), Op: ResultReg, OpNum: 0);
696
697 AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
698 MCID: TII.get(Opcode: Opc), DestReg: ResultReg)
699 .addFrameIndex(Idx: SI->second)
700 .addImm(Val: 0));
701 return ResultReg;
702 }
703
704 return Register();
705}
706
707bool ARMFastISel::isTypeLegal(Type *Ty, MVT &VT) {
708 EVT evt = TLI.getValueType(DL, Ty, AllowUnknown: true);
709
710 // Only handle simple types.
711 if (evt == MVT::Other || !evt.isSimple()) return false;
712 VT = evt.getSimpleVT();
713
714 // Handle all legal types, i.e. a register that will directly hold this
715 // value.
716 return TLI.isTypeLegal(VT);
717}
718
719bool ARMFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) {
720 if (isTypeLegal(Ty, VT)) return true;
721
722 // If this is a type than can be sign or zero-extended to a basic operation
723 // go ahead and accept it now.
724 if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
725 return true;
726
727 return false;
728}
729
730// Computes the address to get to an object.
731bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) {
732 // Some boilerplate from the X86 FastISel.
733 const User *U = nullptr;
734 unsigned Opcode = Instruction::UserOp1;
735 if (const Instruction *I = dyn_cast<Instruction>(Val: Obj)) {
736 // Don't walk into other basic blocks unless the object is an alloca from
737 // another block, otherwise it may not have a virtual register assigned.
738 if (FuncInfo.StaticAllocaMap.count(Val: static_cast<const AllocaInst *>(Obj)) ||
739 FuncInfo.getMBB(BB: I->getParent()) == FuncInfo.MBB) {
740 Opcode = I->getOpcode();
741 U = I;
742 }
743 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Val: Obj)) {
744 Opcode = C->getOpcode();
745 U = C;
746 }
747
748 if (PointerType *Ty = dyn_cast<PointerType>(Val: Obj->getType()))
749 if (Ty->getAddressSpace() > 255)
750 // Fast instruction selection doesn't support the special
751 // address spaces.
752 return false;
753
754 switch (Opcode) {
755 default:
756 break;
757 case Instruction::BitCast:
758 // Look through bitcasts.
759 return ARMComputeAddress(Obj: U->getOperand(i: 0), Addr);
760 case Instruction::IntToPtr:
761 // Look past no-op inttoptrs.
762 if (TLI.getValueType(DL, Ty: U->getOperand(i: 0)->getType()) ==
763 TLI.getPointerTy(DL))
764 return ARMComputeAddress(Obj: U->getOperand(i: 0), Addr);
765 break;
766 case Instruction::PtrToInt:
767 // Look past no-op ptrtoints.
768 if (TLI.getValueType(DL, Ty: U->getType()) == TLI.getPointerTy(DL))
769 return ARMComputeAddress(Obj: U->getOperand(i: 0), Addr);
770 break;
771 case Instruction::GetElementPtr: {
772 Address SavedAddr = Addr;
773 int TmpOffset = Addr.getOffset();
774
775 // Iterate through the GEP folding the constants into offsets where
776 // we can.
777 gep_type_iterator GTI = gep_type_begin(GEP: U);
778 for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end();
779 i != e; ++i, ++GTI) {
780 const Value *Op = *i;
781 if (StructType *STy = GTI.getStructTypeOrNull()) {
782 const StructLayout *SL = DL.getStructLayout(Ty: STy);
783 unsigned Idx = cast<ConstantInt>(Val: Op)->getZExtValue();
784 TmpOffset += SL->getElementOffset(Idx);
785 } else {
786 uint64_t S = GTI.getSequentialElementStride(DL);
787 while (true) {
788 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val: Op)) {
789 // Constant-offset addressing.
790 TmpOffset += CI->getSExtValue() * S;
791 break;
792 }
793 if (canFoldAddIntoGEP(GEP: U, Add: Op)) {
794 // A compatible add with a constant operand. Fold the constant.
795 ConstantInt *CI =
796 cast<ConstantInt>(Val: cast<AddOperator>(Val: Op)->getOperand(i_nocapture: 1));
797 TmpOffset += CI->getSExtValue() * S;
798 // Iterate on the other operand.
799 Op = cast<AddOperator>(Val: Op)->getOperand(i_nocapture: 0);
800 continue;
801 }
802 // Unsupported
803 goto unsupported_gep;
804 }
805 }
806 }
807
808 // Try to grab the base operand now.
809 Addr.setOffset(TmpOffset);
810 if (ARMComputeAddress(Obj: U->getOperand(i: 0), Addr)) return true;
811
812 // We failed, restore everything and try the other options.
813 Addr = SavedAddr;
814
815 unsupported_gep:
816 break;
817 }
818 case Instruction::Alloca: {
819 const AllocaInst *AI = cast<AllocaInst>(Val: Obj);
820 DenseMap<const AllocaInst*, int>::iterator SI =
821 FuncInfo.StaticAllocaMap.find(Val: AI);
822 if (SI != FuncInfo.StaticAllocaMap.end()) {
823 Addr.setKind(Address::FrameIndexBase);
824 Addr.setFI(SI->second);
825 return true;
826 }
827 break;
828 }
829 }
830
831 // Try to get this in a register if nothing else has worked.
832 if (!Addr.getReg())
833 Addr.setReg(getRegForValue(V: Obj));
834 return Addr.getReg();
835}
836
837void ARMFastISel::ARMSimplifyAddress(Address &Addr, MVT VT, bool useAM3) {
838 bool needsLowering = false;
839 switch (VT.SimpleTy) {
840 default: llvm_unreachable("Unhandled load/store type!");
841 case MVT::i1:
842 case MVT::i8:
843 case MVT::i16:
844 case MVT::i32:
845 if (!useAM3) {
846 // Integer loads/stores handle 12-bit offsets.
847 needsLowering = ((Addr.getOffset() & 0xfff) != Addr.getOffset());
848 // Handle negative offsets.
849 if (needsLowering && isThumb2)
850 needsLowering = !(Subtarget->hasV6T2Ops() && Addr.getOffset() < 0 &&
851 Addr.getOffset() > -256);
852 } else {
853 // ARM halfword load/stores and signed byte loads use +/-imm8 offsets.
854 needsLowering = (Addr.getOffset() > 255 || Addr.getOffset() < -255);
855 }
856 break;
857 case MVT::f32:
858 case MVT::f64:
859 // Floating point operands handle 8-bit offsets.
860 needsLowering = ((Addr.getOffset() & 0xff) != Addr.getOffset());
861 break;
862 }
863
864 // If this is a stack pointer and the offset needs to be simplified then
865 // put the alloca address into a register, set the base type back to
866 // register and continue. This should almost never happen.
867 if (needsLowering && Addr.isFIBase()) {
868 const TargetRegisterClass *RC = isThumb2 ? &ARM::tGPRRegClass
869 : &ARM::GPRRegClass;
870 Register ResultReg = createResultReg(RC);
871 unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri;
872 AddOptionalDefs(
873 MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg: ResultReg)
874 .addFrameIndex(Idx: Addr.getFI())
875 .addImm(Val: 0));
876 Addr.setKind(Address::RegBase);
877 Addr.setReg(ResultReg);
878 }
879
880 // Since the offset is too large for the load/store instruction
881 // get the reg+offset into a register.
882 if (needsLowering) {
883 Addr.setReg(fastEmit_ri_(VT: MVT::i32, Opcode: ISD::ADD, Op0: Addr.getReg(),
884 Imm: Addr.getOffset(), ImmType: MVT::i32));
885 Addr.setOffset(0);
886 }
887}
888
889void ARMFastISel::AddLoadStoreOperands(MVT VT, Address &Addr,
890 const MachineInstrBuilder &MIB,
891 MachineMemOperand::Flags Flags,
892 bool useAM3) {
893 // addrmode5 output depends on the selection dag addressing dividing the
894 // offset by 4 that it then later multiplies. Do this here as well.
895 if (VT.SimpleTy == MVT::f32 || VT.SimpleTy == MVT::f64)
896 Addr.setOffset(Addr.getOffset() / 4);
897
898 // Frame base works a bit differently. Handle it separately.
899 if (Addr.isFIBase()) {
900 int FI = Addr.getFI();
901 int Offset = Addr.getOffset();
902 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
903 PtrInfo: MachinePointerInfo::getFixedStack(MF&: *FuncInfo.MF, FI, Offset), F: Flags,
904 Size: MFI.getObjectSize(ObjectIdx: FI), BaseAlignment: MFI.getObjectAlign(ObjectIdx: FI));
905 // Now add the rest of the operands.
906 MIB.addFrameIndex(Idx: FI);
907
908 // ARM halfword load/stores and signed byte loads need an additional
909 // operand.
910 if (useAM3) {
911 int Imm = (Addr.getOffset() < 0) ? (0x100 | -Addr.getOffset())
912 : Addr.getOffset();
913 MIB.addReg(RegNo: 0);
914 MIB.addImm(Val: Imm);
915 } else {
916 MIB.addImm(Val: Addr.getOffset());
917 }
918 MIB.addMemOperand(MMO);
919 } else {
920 // Now add the rest of the operands.
921 MIB.addReg(RegNo: Addr.getReg());
922
923 // ARM halfword load/stores and signed byte loads need an additional
924 // operand.
925 if (useAM3) {
926 int Imm = (Addr.getOffset() < 0) ? (0x100 | -Addr.getOffset())
927 : Addr.getOffset();
928 MIB.addReg(RegNo: 0);
929 MIB.addImm(Val: Imm);
930 } else {
931 MIB.addImm(Val: Addr.getOffset());
932 }
933 }
934 AddOptionalDefs(MIB);
935}
936
937bool ARMFastISel::ARMEmitLoad(MVT VT, Register &ResultReg, Address &Addr,
938 MaybeAlign Alignment, bool isZExt,
939 bool allocReg) {
940 unsigned Opc;
941 bool useAM3 = false;
942 bool needVMOV = false;
943 const TargetRegisterClass *RC;
944 switch (VT.SimpleTy) {
945 // This is mostly going to be Neon/vector support.
946 default: return false;
947 case MVT::i1:
948 case MVT::i8:
949 if (isThumb2) {
950 if (Addr.getOffset() < 0 && Addr.getOffset() > -256 &&
951 Subtarget->hasV6T2Ops())
952 Opc = isZExt ? ARM::t2LDRBi8 : ARM::t2LDRSBi8;
953 else
954 Opc = isZExt ? ARM::t2LDRBi12 : ARM::t2LDRSBi12;
955 } else {
956 if (isZExt) {
957 Opc = ARM::LDRBi12;
958 } else {
959 Opc = ARM::LDRSB;
960 useAM3 = true;
961 }
962 }
963 RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
964 break;
965 case MVT::i16:
966 if (Alignment && *Alignment < Align(2) &&
967 !Subtarget->allowsUnalignedMem())
968 return false;
969
970 if (isThumb2) {
971 if (Addr.getOffset() < 0 && Addr.getOffset() > -256 &&
972 Subtarget->hasV6T2Ops())
973 Opc = isZExt ? ARM::t2LDRHi8 : ARM::t2LDRSHi8;
974 else
975 Opc = isZExt ? ARM::t2LDRHi12 : ARM::t2LDRSHi12;
976 } else {
977 Opc = isZExt ? ARM::LDRH : ARM::LDRSH;
978 useAM3 = true;
979 }
980 RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
981 break;
982 case MVT::i32:
983 if (Alignment && *Alignment < Align(4) &&
984 !Subtarget->allowsUnalignedMem())
985 return false;
986
987 if (isThumb2) {
988 if (Addr.getOffset() < 0 && Addr.getOffset() > -256 &&
989 Subtarget->hasV6T2Ops())
990 Opc = ARM::t2LDRi8;
991 else
992 Opc = ARM::t2LDRi12;
993 } else {
994 Opc = ARM::LDRi12;
995 }
996 RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
997 break;
998 case MVT::f32:
999 if (!Subtarget->hasVFP2Base()) return false;
1000 // Unaligned loads need special handling. Floats require word-alignment.
1001 if (Alignment && *Alignment < Align(4)) {
1002 needVMOV = true;
1003 VT = MVT::i32;
1004 Opc = isThumb2 ? ARM::t2LDRi12 : ARM::LDRi12;
1005 RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
1006 } else {
1007 Opc = ARM::VLDRS;
1008 RC = TLI.getRegClassFor(VT);
1009 }
1010 break;
1011 case MVT::f64:
1012 // Can load and store double precision even without FeatureFP64
1013 if (!Subtarget->hasVFP2Base()) return false;
1014 // FIXME: Unaligned loads need special handling. Doublewords require
1015 // word-alignment.
1016 if (Alignment && *Alignment < Align(4))
1017 return false;
1018
1019 Opc = ARM::VLDRD;
1020 RC = TLI.getRegClassFor(VT);
1021 break;
1022 }
1023 // Simplify this down to something we can handle.
1024 ARMSimplifyAddress(Addr, VT, useAM3);
1025
1026 // Create the base instruction, then add the operands.
1027 if (allocReg)
1028 ResultReg = createResultReg(RC);
1029 assert(ResultReg.isVirtual() && "Expected an allocated virtual register.");
1030 MachineInstrBuilder MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1031 MCID: TII.get(Opcode: Opc), DestReg: ResultReg);
1032 AddLoadStoreOperands(VT, Addr, MIB, Flags: MachineMemOperand::MOLoad, useAM3);
1033
1034 // If we had an unaligned load of a float we've converted it to an regular
1035 // load. Now we must move from the GRP to the FP register.
1036 if (needVMOV) {
1037 Register MoveReg = createResultReg(RC: TLI.getRegClassFor(VT: MVT::f32));
1038 AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1039 MCID: TII.get(Opcode: ARM::VMOVSR), DestReg: MoveReg)
1040 .addReg(RegNo: ResultReg));
1041 ResultReg = MoveReg;
1042 }
1043 return true;
1044}
1045
1046bool ARMFastISel::SelectLoad(const Instruction *I) {
1047 // Atomic loads need special handling.
1048 if (cast<LoadInst>(Val: I)->isAtomic())
1049 return false;
1050
1051 const Value *SV = I->getOperand(i: 0);
1052 if (TLI.supportSwiftError()) {
1053 // Swifterror values can come from either a function parameter with
1054 // swifterror attribute or an alloca with swifterror attribute.
1055 if (const Argument *Arg = dyn_cast<Argument>(Val: SV)) {
1056 if (Arg->hasSwiftErrorAttr())
1057 return false;
1058 }
1059
1060 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(Val: SV)) {
1061 if (Alloca->isSwiftError())
1062 return false;
1063 }
1064 }
1065
1066 // Verify we have a legal type before going any further.
1067 MVT VT;
1068 if (!isLoadTypeLegal(Ty: I->getType(), VT))
1069 return false;
1070
1071 // See if we can handle this address.
1072 Address Addr;
1073 if (!ARMComputeAddress(Obj: I->getOperand(i: 0), Addr)) return false;
1074
1075 Register ResultReg;
1076 if (!ARMEmitLoad(VT, ResultReg, Addr, Alignment: cast<LoadInst>(Val: I)->getAlign()))
1077 return false;
1078 updateValueMap(I, Reg: ResultReg);
1079 return true;
1080}
1081
1082bool ARMFastISel::ARMEmitStore(MVT VT, Register SrcReg, Address &Addr,
1083 MaybeAlign Alignment) {
1084 unsigned StrOpc;
1085 bool useAM3 = false;
1086 switch (VT.SimpleTy) {
1087 // This is mostly going to be Neon/vector support.
1088 default: return false;
1089 case MVT::i1: {
1090 Register Res = createResultReg(RC: isThumb2 ? &ARM::tGPRRegClass
1091 : &ARM::GPRRegClass);
1092 unsigned Opc = isThumb2 ? ARM::t2ANDri : ARM::ANDri;
1093 SrcReg = constrainOperandRegClass(II: TII.get(Opcode: Opc), Op: SrcReg, OpNum: 1);
1094 AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1095 MCID: TII.get(Opcode: Opc), DestReg: Res)
1096 .addReg(RegNo: SrcReg).addImm(Val: 1));
1097 SrcReg = Res;
1098 [[fallthrough]];
1099 }
1100 case MVT::i8:
1101 if (isThumb2) {
1102 if (Addr.getOffset() < 0 && Addr.getOffset() > -256 &&
1103 Subtarget->hasV6T2Ops())
1104 StrOpc = ARM::t2STRBi8;
1105 else
1106 StrOpc = ARM::t2STRBi12;
1107 } else {
1108 StrOpc = ARM::STRBi12;
1109 }
1110 break;
1111 case MVT::i16:
1112 if (Alignment && *Alignment < Align(2) &&
1113 !Subtarget->allowsUnalignedMem())
1114 return false;
1115
1116 if (isThumb2) {
1117 if (Addr.getOffset() < 0 && Addr.getOffset() > -256 &&
1118 Subtarget->hasV6T2Ops())
1119 StrOpc = ARM::t2STRHi8;
1120 else
1121 StrOpc = ARM::t2STRHi12;
1122 } else {
1123 StrOpc = ARM::STRH;
1124 useAM3 = true;
1125 }
1126 break;
1127 case MVT::i32:
1128 if (Alignment && *Alignment < Align(4) &&
1129 !Subtarget->allowsUnalignedMem())
1130 return false;
1131
1132 if (isThumb2) {
1133 if (Addr.getOffset() < 0 && Addr.getOffset() > -256 &&
1134 Subtarget->hasV6T2Ops())
1135 StrOpc = ARM::t2STRi8;
1136 else
1137 StrOpc = ARM::t2STRi12;
1138 } else {
1139 StrOpc = ARM::STRi12;
1140 }
1141 break;
1142 case MVT::f32:
1143 if (!Subtarget->hasVFP2Base()) return false;
1144 // Unaligned stores need special handling. Floats require word-alignment.
1145 if (Alignment && *Alignment < Align(4)) {
1146 Register MoveReg = createResultReg(RC: TLI.getRegClassFor(VT: MVT::i32));
1147 AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1148 MCID: TII.get(Opcode: ARM::VMOVRS), DestReg: MoveReg)
1149 .addReg(RegNo: SrcReg));
1150 SrcReg = MoveReg;
1151 VT = MVT::i32;
1152 StrOpc = isThumb2 ? ARM::t2STRi12 : ARM::STRi12;
1153 } else {
1154 StrOpc = ARM::VSTRS;
1155 }
1156 break;
1157 case MVT::f64:
1158 // Can load and store double precision even without FeatureFP64
1159 if (!Subtarget->hasVFP2Base()) return false;
1160 // FIXME: Unaligned stores need special handling. Doublewords require
1161 // word-alignment.
1162 if (Alignment && *Alignment < Align(4))
1163 return false;
1164
1165 StrOpc = ARM::VSTRD;
1166 break;
1167 }
1168 // Simplify this down to something we can handle.
1169 ARMSimplifyAddress(Addr, VT, useAM3);
1170
1171 // Create the base instruction, then add the operands.
1172 SrcReg = constrainOperandRegClass(II: TII.get(Opcode: StrOpc), Op: SrcReg, OpNum: 0);
1173 MachineInstrBuilder MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1174 MCID: TII.get(Opcode: StrOpc))
1175 .addReg(RegNo: SrcReg);
1176 AddLoadStoreOperands(VT, Addr, MIB, Flags: MachineMemOperand::MOStore, useAM3);
1177 return true;
1178}
1179
1180bool ARMFastISel::SelectStore(const Instruction *I) {
1181 Value *Op0 = I->getOperand(i: 0);
1182 Register SrcReg;
1183
1184 // Atomic stores need special handling.
1185 if (cast<StoreInst>(Val: I)->isAtomic())
1186 return false;
1187
1188 const Value *PtrV = I->getOperand(i: 1);
1189 if (TLI.supportSwiftError()) {
1190 // Swifterror values can come from either a function parameter with
1191 // swifterror attribute or an alloca with swifterror attribute.
1192 if (const Argument *Arg = dyn_cast<Argument>(Val: PtrV)) {
1193 if (Arg->hasSwiftErrorAttr())
1194 return false;
1195 }
1196
1197 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(Val: PtrV)) {
1198 if (Alloca->isSwiftError())
1199 return false;
1200 }
1201 }
1202
1203 // Verify we have a legal type before going any further.
1204 MVT VT;
1205 if (!isLoadTypeLegal(Ty: I->getOperand(i: 0)->getType(), VT))
1206 return false;
1207
1208 // Get the value to be stored into a register.
1209 SrcReg = getRegForValue(V: Op0);
1210 if (!SrcReg)
1211 return false;
1212
1213 // See if we can handle this address.
1214 Address Addr;
1215 if (!ARMComputeAddress(Obj: I->getOperand(i: 1), Addr))
1216 return false;
1217
1218 if (!ARMEmitStore(VT, SrcReg, Addr, Alignment: cast<StoreInst>(Val: I)->getAlign()))
1219 return false;
1220 return true;
1221}
1222
1223static ARMCC::CondCodes getComparePred(CmpInst::Predicate Pred) {
1224 switch (Pred) {
1225 // Needs two compares...
1226 case CmpInst::FCMP_ONE:
1227 case CmpInst::FCMP_UEQ:
1228 default:
1229 // AL is our "false" for now. The other two need more compares.
1230 return ARMCC::AL;
1231 case CmpInst::ICMP_EQ:
1232 case CmpInst::FCMP_OEQ:
1233 return ARMCC::EQ;
1234 case CmpInst::ICMP_SGT:
1235 case CmpInst::FCMP_OGT:
1236 return ARMCC::GT;
1237 case CmpInst::ICMP_SGE:
1238 case CmpInst::FCMP_OGE:
1239 return ARMCC::GE;
1240 case CmpInst::ICMP_UGT:
1241 case CmpInst::FCMP_UGT:
1242 return ARMCC::HI;
1243 case CmpInst::FCMP_OLT:
1244 return ARMCC::MI;
1245 case CmpInst::ICMP_ULE:
1246 case CmpInst::FCMP_OLE:
1247 return ARMCC::LS;
1248 case CmpInst::FCMP_ORD:
1249 return ARMCC::VC;
1250 case CmpInst::FCMP_UNO:
1251 return ARMCC::VS;
1252 case CmpInst::FCMP_UGE:
1253 return ARMCC::PL;
1254 case CmpInst::ICMP_SLT:
1255 case CmpInst::FCMP_ULT:
1256 return ARMCC::LT;
1257 case CmpInst::ICMP_SLE:
1258 case CmpInst::FCMP_ULE:
1259 return ARMCC::LE;
1260 case CmpInst::FCMP_UNE:
1261 case CmpInst::ICMP_NE:
1262 return ARMCC::NE;
1263 case CmpInst::ICMP_UGE:
1264 return ARMCC::HS;
1265 case CmpInst::ICMP_ULT:
1266 return ARMCC::LO;
1267 }
1268}
1269
1270bool ARMFastISel::SelectBranch(const Instruction *I) {
1271 const CondBrInst *BI = cast<CondBrInst>(Val: I);
1272 MachineBasicBlock *TBB = FuncInfo.getMBB(BB: BI->getSuccessor(i: 0));
1273 MachineBasicBlock *FBB = FuncInfo.getMBB(BB: BI->getSuccessor(i: 1));
1274
1275 // Simple branch support.
1276
1277 // If we can, avoid recomputing the compare - redoing it could lead to wonky
1278 // behavior.
1279 if (const CmpInst *CI = dyn_cast<CmpInst>(Val: BI->getCondition())) {
1280 if (CI->hasOneUse() && (CI->getParent() == I->getParent())) {
1281 // Get the compare predicate.
1282 // Try to take advantage of fallthrough opportunities.
1283 CmpInst::Predicate Predicate = CI->getPredicate();
1284 if (FuncInfo.MBB->isLayoutSuccessor(MBB: TBB)) {
1285 std::swap(a&: TBB, b&: FBB);
1286 Predicate = CmpInst::getInversePredicate(pred: Predicate);
1287 }
1288
1289 ARMCC::CondCodes ARMPred = getComparePred(Pred: Predicate);
1290
1291 // We may not handle every CC for now.
1292 if (ARMPred == ARMCC::AL) return false;
1293
1294 // Emit the compare.
1295 if (!ARMEmitCmp(Src1Value: CI->getOperand(i_nocapture: 0), Src2Value: CI->getOperand(i_nocapture: 1), isZExt: CI->isUnsigned()))
1296 return false;
1297
1298 unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
1299 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: BrOpc))
1300 .addMBB(MBB: TBB).addImm(Val: ARMPred).addReg(RegNo: ARM::CPSR);
1301 finishCondBranch(BranchBB: BI->getParent(), TrueMBB: TBB, FalseMBB: FBB);
1302 return true;
1303 }
1304 } else if (TruncInst *TI = dyn_cast<TruncInst>(Val: BI->getCondition())) {
1305 MVT SourceVT;
1306 if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
1307 (isLoadTypeLegal(Ty: TI->getOperand(i_nocapture: 0)->getType(), VT&: SourceVT))) {
1308 unsigned TstOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri;
1309 Register OpReg = getRegForValue(V: TI->getOperand(i_nocapture: 0));
1310 OpReg = constrainOperandRegClass(II: TII.get(Opcode: TstOpc), Op: OpReg, OpNum: 0);
1311 AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1312 MCID: TII.get(Opcode: TstOpc))
1313 .addReg(RegNo: OpReg).addImm(Val: 1));
1314
1315 unsigned CCMode = ARMCC::NE;
1316 if (FuncInfo.MBB->isLayoutSuccessor(MBB: TBB)) {
1317 std::swap(a&: TBB, b&: FBB);
1318 CCMode = ARMCC::EQ;
1319 }
1320
1321 unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
1322 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: BrOpc))
1323 .addMBB(MBB: TBB).addImm(Val: CCMode).addReg(RegNo: ARM::CPSR);
1324
1325 finishCondBranch(BranchBB: BI->getParent(), TrueMBB: TBB, FalseMBB: FBB);
1326 return true;
1327 }
1328 } else if (const ConstantInt *CI =
1329 dyn_cast<ConstantInt>(Val: BI->getCondition())) {
1330 uint64_t Imm = CI->getZExtValue();
1331 MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
1332 fastEmitBranch(MSucc: Target, DbgLoc: MIMD.getDL());
1333 return true;
1334 }
1335
1336 Register CmpReg = getRegForValue(V: BI->getCondition());
1337 if (!CmpReg)
1338 return false;
1339
1340 // We've been divorced from our compare! Our block was split, and
1341 // now our compare lives in a predecessor block. We musn't
1342 // re-compare here, as the children of the compare aren't guaranteed
1343 // live across the block boundary (we *could* check for this).
1344 // Regardless, the compare has been done in the predecessor block,
1345 // and it left a value for us in a virtual register. Ergo, we test
1346 // the one-bit value left in the virtual register.
1347 unsigned TstOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri;
1348 CmpReg = constrainOperandRegClass(II: TII.get(Opcode: TstOpc), Op: CmpReg, OpNum: 0);
1349 AddOptionalDefs(
1350 MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: TstOpc))
1351 .addReg(RegNo: CmpReg)
1352 .addImm(Val: 1));
1353
1354 unsigned CCMode = ARMCC::NE;
1355 if (FuncInfo.MBB->isLayoutSuccessor(MBB: TBB)) {
1356 std::swap(a&: TBB, b&: FBB);
1357 CCMode = ARMCC::EQ;
1358 }
1359
1360 unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
1361 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: BrOpc))
1362 .addMBB(MBB: TBB).addImm(Val: CCMode).addReg(RegNo: ARM::CPSR);
1363 finishCondBranch(BranchBB: BI->getParent(), TrueMBB: TBB, FalseMBB: FBB);
1364 return true;
1365}
1366
1367bool ARMFastISel::SelectIndirectBr(const Instruction *I) {
1368 Register AddrReg = getRegForValue(V: I->getOperand(i: 0));
1369 if (!AddrReg)
1370 return false;
1371
1372 unsigned Opc = isThumb2 ? ARM::tBRIND : ARM::BX;
1373 assert(isThumb2 || Subtarget->hasV4TOps());
1374
1375 AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1376 MCID: TII.get(Opcode: Opc)).addReg(RegNo: AddrReg));
1377
1378 const IndirectBrInst *IB = cast<IndirectBrInst>(Val: I);
1379 for (const BasicBlock *SuccBB : IB->successors())
1380 FuncInfo.MBB->addSuccessor(Succ: FuncInfo.getMBB(BB: SuccBB));
1381
1382 return true;
1383}
1384
1385bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
1386 bool isZExt) {
1387 Type *Ty = Src1Value->getType();
1388 EVT SrcEVT = TLI.getValueType(DL, Ty, AllowUnknown: true);
1389 if (!SrcEVT.isSimple()) return false;
1390 MVT SrcVT = SrcEVT.getSimpleVT();
1391
1392 if (Ty->isFloatTy() && !Subtarget->hasVFP2Base())
1393 return false;
1394
1395 if (Ty->isDoubleTy() && (!Subtarget->hasVFP2Base() || !Subtarget->hasFP64()))
1396 return false;
1397
1398 // Check to see if the 2nd operand is a constant that we can encode directly
1399 // in the compare.
1400 int Imm = 0;
1401 bool UseImm = false;
1402 bool isNegativeImm = false;
1403 // FIXME: At -O0 we don't have anything that canonicalizes operand order.
1404 // Thus, Src1Value may be a ConstantInt, but we're missing it.
1405 if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(Val: Src2Value)) {
1406 if (SrcVT == MVT::i32 || SrcVT == MVT::i16 || SrcVT == MVT::i8 ||
1407 SrcVT == MVT::i1) {
1408 const APInt &CIVal = ConstInt->getValue();
1409 Imm = (isZExt) ? (int)CIVal.getZExtValue() : (int)CIVal.getSExtValue();
1410 // For INT_MIN/LONG_MIN (i.e., 0x80000000) we need to use a cmp, rather
1411 // then a cmn, because there is no way to represent 2147483648 as a
1412 // signed 32-bit int.
1413 if (Imm < 0 && Imm != (int)0x80000000) {
1414 isNegativeImm = true;
1415 Imm = -Imm;
1416 }
1417 UseImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Arg: Imm) != -1) :
1418 (ARM_AM::getSOImmVal(Arg: Imm) != -1);
1419 }
1420 } else if (const ConstantFP *ConstFP = dyn_cast<ConstantFP>(Val: Src2Value)) {
1421 if (SrcVT == MVT::f32 || SrcVT == MVT::f64)
1422 if (ConstFP->isZero() && !ConstFP->isNegative())
1423 UseImm = true;
1424 }
1425
1426 unsigned CmpOpc;
1427 bool isICmp = true;
1428 bool needsExt = false;
1429 switch (SrcVT.SimpleTy) {
1430 default: return false;
1431 // TODO: Verify compares.
1432 case MVT::f32:
1433 isICmp = false;
1434 CmpOpc = UseImm ? ARM::VCMPZS : ARM::VCMPS;
1435 break;
1436 case MVT::f64:
1437 isICmp = false;
1438 CmpOpc = UseImm ? ARM::VCMPZD : ARM::VCMPD;
1439 break;
1440 case MVT::i1:
1441 case MVT::i8:
1442 case MVT::i16:
1443 needsExt = true;
1444 [[fallthrough]];
1445 case MVT::i32:
1446 if (isThumb2) {
1447 if (!UseImm)
1448 CmpOpc = ARM::t2CMPrr;
1449 else
1450 CmpOpc = isNegativeImm ? ARM::t2CMNri : ARM::t2CMPri;
1451 } else {
1452 if (!UseImm)
1453 CmpOpc = ARM::CMPrr;
1454 else
1455 CmpOpc = isNegativeImm ? ARM::CMNri : ARM::CMPri;
1456 }
1457 break;
1458 }
1459
1460 Register SrcReg1 = getRegForValue(V: Src1Value);
1461 if (!SrcReg1)
1462 return false;
1463
1464 Register SrcReg2;
1465 if (!UseImm) {
1466 SrcReg2 = getRegForValue(V: Src2Value);
1467 if (!SrcReg2)
1468 return false;
1469 }
1470
1471 // We have i1, i8, or i16, we need to either zero extend or sign extend.
1472 if (needsExt) {
1473 SrcReg1 = ARMEmitIntExt(SrcVT, SrcReg: SrcReg1, DestVT: MVT::i32, isZExt);
1474 if (!SrcReg1)
1475 return false;
1476 if (!UseImm) {
1477 SrcReg2 = ARMEmitIntExt(SrcVT, SrcReg: SrcReg2, DestVT: MVT::i32, isZExt);
1478 if (!SrcReg2)
1479 return false;
1480 }
1481 }
1482
1483 const MCInstrDesc &II = TII.get(Opcode: CmpOpc);
1484 SrcReg1 = constrainOperandRegClass(II, Op: SrcReg1, OpNum: 0);
1485 if (!UseImm) {
1486 SrcReg2 = constrainOperandRegClass(II, Op: SrcReg2, OpNum: 1);
1487 AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II)
1488 .addReg(RegNo: SrcReg1).addReg(RegNo: SrcReg2));
1489 } else {
1490 MachineInstrBuilder MIB;
1491 MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II)
1492 .addReg(RegNo: SrcReg1);
1493
1494 // Only add immediate for icmp as the immediate for fcmp is an implicit 0.0.
1495 if (isICmp)
1496 MIB.addImm(Val: Imm);
1497 AddOptionalDefs(MIB);
1498 }
1499
1500 // For floating point we need to move the result to a comparison register
1501 // that we can then use for branches.
1502 if (Ty->isFloatTy() || Ty->isDoubleTy())
1503 AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1504 MCID: TII.get(Opcode: ARM::FMSTAT)));
1505 return true;
1506}
1507
1508bool ARMFastISel::SelectCmp(const Instruction *I) {
1509 const CmpInst *CI = cast<CmpInst>(Val: I);
1510
1511 // Get the compare predicate.
1512 ARMCC::CondCodes ARMPred = getComparePred(Pred: CI->getPredicate());
1513
1514 // We may not handle every CC for now.
1515 if (ARMPred == ARMCC::AL) return false;
1516
1517 // Emit the compare.
1518 if (!ARMEmitCmp(Src1Value: CI->getOperand(i_nocapture: 0), Src2Value: CI->getOperand(i_nocapture: 1), isZExt: CI->isUnsigned()))
1519 return false;
1520
1521 // Now set a register based on the comparison. Explicitly set the predicates
1522 // here.
1523 unsigned MovCCOpc = isThumb2 ? ARM::t2MOVCCi : ARM::MOVCCi;
1524 const TargetRegisterClass *RC = isThumb2 ? &ARM::rGPRRegClass
1525 : &ARM::GPRRegClass;
1526 Register DestReg = createResultReg(RC);
1527 Constant *Zero = ConstantInt::get(Ty: Type::getInt32Ty(C&: *Context), V: 0);
1528 Register ZeroReg = fastMaterializeConstant(C: Zero);
1529 // ARMEmitCmp emits a FMSTAT when necessary, so it's always safe to use CPSR.
1530 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: MovCCOpc), DestReg)
1531 .addReg(RegNo: ZeroReg).addImm(Val: 1)
1532 .addImm(Val: ARMPred).addReg(RegNo: ARM::CPSR);
1533
1534 updateValueMap(I, Reg: DestReg);
1535 return true;
1536}
1537
1538bool ARMFastISel::SelectFPExt(const Instruction *I) {
1539 // Make sure we have VFP and that we're extending float to double.
1540 if (!Subtarget->hasVFP2Base() || !Subtarget->hasFP64()) return false;
1541
1542 Value *V = I->getOperand(i: 0);
1543 if (!I->getType()->isDoubleTy() ||
1544 !V->getType()->isFloatTy()) return false;
1545
1546 Register Op = getRegForValue(V);
1547 if (!Op)
1548 return false;
1549
1550 Register Result = createResultReg(RC: &ARM::DPRRegClass);
1551 AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1552 MCID: TII.get(Opcode: ARM::VCVTDS), DestReg: Result)
1553 .addReg(RegNo: Op));
1554 updateValueMap(I, Reg: Result);
1555 return true;
1556}
1557
1558bool ARMFastISel::SelectFPTrunc(const Instruction *I) {
1559 // Make sure we have VFP and that we're truncating double to float.
1560 if (!Subtarget->hasVFP2Base() || !Subtarget->hasFP64()) return false;
1561
1562 Value *V = I->getOperand(i: 0);
1563 if (!(I->getType()->isFloatTy() &&
1564 V->getType()->isDoubleTy())) return false;
1565
1566 Register Op = getRegForValue(V);
1567 if (!Op)
1568 return false;
1569
1570 Register Result = createResultReg(RC: &ARM::SPRRegClass);
1571 AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1572 MCID: TII.get(Opcode: ARM::VCVTSD), DestReg: Result)
1573 .addReg(RegNo: Op));
1574 updateValueMap(I, Reg: Result);
1575 return true;
1576}
1577
1578bool ARMFastISel::SelectIToFP(const Instruction *I, bool isSigned) {
1579 // Make sure we have VFP.
1580 if (!Subtarget->hasVFP2Base()) return false;
1581
1582 MVT DstVT;
1583 Type *Ty = I->getType();
1584 if (!isTypeLegal(Ty, VT&: DstVT))
1585 return false;
1586
1587 Value *Src = I->getOperand(i: 0);
1588 EVT SrcEVT = TLI.getValueType(DL, Ty: Src->getType(), AllowUnknown: true);
1589 if (!SrcEVT.isSimple())
1590 return false;
1591 MVT SrcVT = SrcEVT.getSimpleVT();
1592 if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8)
1593 return false;
1594
1595 Register SrcReg = getRegForValue(V: Src);
1596 if (!SrcReg)
1597 return false;
1598
1599 // Handle sign-extension.
1600 if (SrcVT == MVT::i16 || SrcVT == MVT::i8) {
1601 SrcReg = ARMEmitIntExt(SrcVT, SrcReg, DestVT: MVT::i32,
1602 /*isZExt*/!isSigned);
1603 if (!SrcReg)
1604 return false;
1605 }
1606
1607 // The conversion routine works on fp-reg to fp-reg and the operand above
1608 // was an integer, move it to the fp registers if possible.
1609 Register FP = ARMMoveToFPReg(VT: MVT::f32, SrcReg);
1610 if (!FP)
1611 return false;
1612
1613 unsigned Opc;
1614 if (Ty->isFloatTy()) Opc = isSigned ? ARM::VSITOS : ARM::VUITOS;
1615 else if (Ty->isDoubleTy() && Subtarget->hasFP64())
1616 Opc = isSigned ? ARM::VSITOD : ARM::VUITOD;
1617 else return false;
1618
1619 Register ResultReg = createResultReg(RC: TLI.getRegClassFor(VT: DstVT));
1620 AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1621 MCID: TII.get(Opcode: Opc), DestReg: ResultReg).addReg(RegNo: FP));
1622 updateValueMap(I, Reg: ResultReg);
1623 return true;
1624}
1625
1626bool ARMFastISel::SelectFPToI(const Instruction *I, bool isSigned) {
1627 // Make sure we have VFP.
1628 if (!Subtarget->hasVFP2Base()) return false;
1629
1630 MVT DstVT;
1631 Type *RetTy = I->getType();
1632 if (!isTypeLegal(Ty: RetTy, VT&: DstVT))
1633 return false;
1634
1635 Register Op = getRegForValue(V: I->getOperand(i: 0));
1636 if (!Op)
1637 return false;
1638
1639 unsigned Opc;
1640 Type *OpTy = I->getOperand(i: 0)->getType();
1641 if (OpTy->isFloatTy()) Opc = isSigned ? ARM::VTOSIZS : ARM::VTOUIZS;
1642 else if (OpTy->isDoubleTy() && Subtarget->hasFP64())
1643 Opc = isSigned ? ARM::VTOSIZD : ARM::VTOUIZD;
1644 else return false;
1645
1646 // f64->s32/u32 or f32->s32/u32 both need an intermediate f32 reg.
1647 Register ResultReg = createResultReg(RC: TLI.getRegClassFor(VT: MVT::f32));
1648 AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1649 MCID: TII.get(Opcode: Opc), DestReg: ResultReg).addReg(RegNo: Op));
1650
1651 // This result needs to be in an integer register, but the conversion only
1652 // takes place in fp-regs.
1653 Register IntReg = ARMMoveToIntReg(VT: DstVT, SrcReg: ResultReg);
1654 if (!IntReg)
1655 return false;
1656
1657 updateValueMap(I, Reg: IntReg);
1658 return true;
1659}
1660
1661bool ARMFastISel::SelectSelect(const Instruction *I) {
1662 MVT VT;
1663 if (!isTypeLegal(Ty: I->getType(), VT))
1664 return false;
1665
1666 // Things need to be register sized for register moves.
1667 if (VT != MVT::i32) return false;
1668
1669 Register CondReg = getRegForValue(V: I->getOperand(i: 0));
1670 if (!CondReg)
1671 return false;
1672 Register Op1Reg = getRegForValue(V: I->getOperand(i: 1));
1673 if (!Op1Reg)
1674 return false;
1675
1676 // Check to see if we can use an immediate in the conditional move.
1677 int Imm = 0;
1678 bool UseImm = false;
1679 bool isNegativeImm = false;
1680 if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(Val: I->getOperand(i: 2))) {
1681 assert(VT == MVT::i32 && "Expecting an i32.");
1682 Imm = (int)ConstInt->getValue().getZExtValue();
1683 if (Imm < 0) {
1684 isNegativeImm = true;
1685 Imm = ~Imm;
1686 }
1687 UseImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Arg: Imm) != -1) :
1688 (ARM_AM::getSOImmVal(Arg: Imm) != -1);
1689 }
1690
1691 Register Op2Reg;
1692 if (!UseImm) {
1693 Op2Reg = getRegForValue(V: I->getOperand(i: 2));
1694 if (!Op2Reg)
1695 return false;
1696 }
1697
1698 unsigned TstOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri;
1699 CondReg = constrainOperandRegClass(II: TII.get(Opcode: TstOpc), Op: CondReg, OpNum: 0);
1700 AddOptionalDefs(
1701 MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: TstOpc))
1702 .addReg(RegNo: CondReg)
1703 .addImm(Val: 1));
1704
1705 unsigned MovCCOpc;
1706 const TargetRegisterClass *RC;
1707 if (!UseImm) {
1708 RC = isThumb2 ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
1709 MovCCOpc = isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr;
1710 } else {
1711 RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass;
1712 if (!isNegativeImm)
1713 MovCCOpc = isThumb2 ? ARM::t2MOVCCi : ARM::MOVCCi;
1714 else
1715 MovCCOpc = isThumb2 ? ARM::t2MVNCCi : ARM::MVNCCi;
1716 }
1717 Register ResultReg = createResultReg(RC);
1718 if (!UseImm) {
1719 Op2Reg = constrainOperandRegClass(II: TII.get(Opcode: MovCCOpc), Op: Op2Reg, OpNum: 1);
1720 Op1Reg = constrainOperandRegClass(II: TII.get(Opcode: MovCCOpc), Op: Op1Reg, OpNum: 2);
1721 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: MovCCOpc),
1722 DestReg: ResultReg)
1723 .addReg(RegNo: Op2Reg)
1724 .addReg(RegNo: Op1Reg)
1725 .addImm(Val: ARMCC::NE)
1726 .addReg(RegNo: ARM::CPSR);
1727 } else {
1728 Op1Reg = constrainOperandRegClass(II: TII.get(Opcode: MovCCOpc), Op: Op1Reg, OpNum: 1);
1729 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: MovCCOpc),
1730 DestReg: ResultReg)
1731 .addReg(RegNo: Op1Reg)
1732 .addImm(Val: Imm)
1733 .addImm(Val: ARMCC::EQ)
1734 .addReg(RegNo: ARM::CPSR);
1735 }
1736 updateValueMap(I, Reg: ResultReg);
1737 return true;
1738}
1739
1740bool ARMFastISel::SelectDiv(const Instruction *I, bool isSigned) {
1741 MVT VT;
1742 Type *Ty = I->getType();
1743 if (!isTypeLegal(Ty, VT))
1744 return false;
1745
1746 // If we have integer div support we should have selected this automagically.
1747 // In case we have a real miss go ahead and return false and we'll pick
1748 // it up later.
1749 if (Subtarget->hasDivideInThumbMode())
1750 return false;
1751
1752 // Otherwise emit a libcall.
1753 RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
1754 if (VT == MVT::i8)
1755 LC = isSigned ? RTLIB::SDIV_I8 : RTLIB::UDIV_I8;
1756 else if (VT == MVT::i16)
1757 LC = isSigned ? RTLIB::SDIV_I16 : RTLIB::UDIV_I16;
1758 else if (VT == MVT::i32)
1759 LC = isSigned ? RTLIB::SDIV_I32 : RTLIB::UDIV_I32;
1760 else if (VT == MVT::i64)
1761 LC = isSigned ? RTLIB::SDIV_I64 : RTLIB::UDIV_I64;
1762 else if (VT == MVT::i128)
1763 LC = isSigned ? RTLIB::SDIV_I128 : RTLIB::UDIV_I128;
1764 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!");
1765
1766 return ARMEmitLibcall(I, Call: LC);
1767}
1768
1769bool ARMFastISel::SelectRem(const Instruction *I, bool isSigned) {
1770 MVT VT;
1771 Type *Ty = I->getType();
1772 if (!isTypeLegal(Ty, VT))
1773 return false;
1774
1775 // Many ABIs do not provide a libcall for standalone remainder, so we need to
1776 // use divrem (see the RTABI 4.3.1). Since FastISel can't handle non-double
1777 // multi-reg returns, we'll have to bail out.
1778 if (!TLI.hasStandaloneRem(VT)) {
1779 return false;
1780 }
1781
1782 RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
1783 if (VT == MVT::i8)
1784 LC = isSigned ? RTLIB::SREM_I8 : RTLIB::UREM_I8;
1785 else if (VT == MVT::i16)
1786 LC = isSigned ? RTLIB::SREM_I16 : RTLIB::UREM_I16;
1787 else if (VT == MVT::i32)
1788 LC = isSigned ? RTLIB::SREM_I32 : RTLIB::UREM_I32;
1789 else if (VT == MVT::i64)
1790 LC = isSigned ? RTLIB::SREM_I64 : RTLIB::UREM_I64;
1791 else if (VT == MVT::i128)
1792 LC = isSigned ? RTLIB::SREM_I128 : RTLIB::UREM_I128;
1793 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!");
1794
1795 return ARMEmitLibcall(I, Call: LC);
1796}
1797
1798bool ARMFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
1799 EVT DestVT = TLI.getValueType(DL, Ty: I->getType(), AllowUnknown: true);
1800
1801 // We can get here in the case when we have a binary operation on a non-legal
1802 // type and the target independent selector doesn't know how to handle it.
1803 if (DestVT != MVT::i16 && DestVT != MVT::i8 && DestVT != MVT::i1)
1804 return false;
1805
1806 unsigned Opc;
1807 switch (ISDOpcode) {
1808 default: return false;
1809 case ISD::ADD:
1810 Opc = isThumb2 ? ARM::t2ADDrr : ARM::ADDrr;
1811 break;
1812 case ISD::OR:
1813 Opc = isThumb2 ? ARM::t2ORRrr : ARM::ORRrr;
1814 break;
1815 case ISD::SUB:
1816 Opc = isThumb2 ? ARM::t2SUBrr : ARM::SUBrr;
1817 break;
1818 }
1819
1820 Register SrcReg1 = getRegForValue(V: I->getOperand(i: 0));
1821 if (!SrcReg1)
1822 return false;
1823
1824 // TODO: Often the 2nd operand is an immediate, which can be encoded directly
1825 // in the instruction, rather then materializing the value in a register.
1826 Register SrcReg2 = getRegForValue(V: I->getOperand(i: 1));
1827 if (!SrcReg2)
1828 return false;
1829
1830 Register ResultReg = createResultReg(RC: &ARM::GPRnopcRegClass);
1831 SrcReg1 = constrainOperandRegClass(II: TII.get(Opcode: Opc), Op: SrcReg1, OpNum: 1);
1832 SrcReg2 = constrainOperandRegClass(II: TII.get(Opcode: Opc), Op: SrcReg2, OpNum: 2);
1833 AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1834 MCID: TII.get(Opcode: Opc), DestReg: ResultReg)
1835 .addReg(RegNo: SrcReg1).addReg(RegNo: SrcReg2));
1836 updateValueMap(I, Reg: ResultReg);
1837 return true;
1838}
1839
1840bool ARMFastISel::SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode) {
1841 EVT FPVT = TLI.getValueType(DL, Ty: I->getType(), AllowUnknown: true);
1842 if (!FPVT.isSimple()) return false;
1843 MVT VT = FPVT.getSimpleVT();
1844
1845 // FIXME: Support vector types where possible.
1846 if (VT.isVector())
1847 return false;
1848
1849 // We can get here in the case when we want to use NEON for our fp
1850 // operations, but can't figure out how to. Just use the vfp instructions
1851 // if we have them.
1852 // FIXME: It'd be nice to use NEON instructions.
1853 Type *Ty = I->getType();
1854 if (Ty->isFloatTy() && !Subtarget->hasVFP2Base())
1855 return false;
1856 if (Ty->isDoubleTy() && (!Subtarget->hasVFP2Base() || !Subtarget->hasFP64()))
1857 return false;
1858
1859 unsigned Opc;
1860 bool is64bit = VT == MVT::f64 || VT == MVT::i64;
1861 switch (ISDOpcode) {
1862 default: return false;
1863 case ISD::FADD:
1864 Opc = is64bit ? ARM::VADDD : ARM::VADDS;
1865 break;
1866 case ISD::FSUB:
1867 Opc = is64bit ? ARM::VSUBD : ARM::VSUBS;
1868 break;
1869 case ISD::FMUL:
1870 Opc = is64bit ? ARM::VMULD : ARM::VMULS;
1871 break;
1872 }
1873 Register Op1 = getRegForValue(V: I->getOperand(i: 0));
1874 if (!Op1)
1875 return false;
1876
1877 Register Op2 = getRegForValue(V: I->getOperand(i: 1));
1878 if (!Op2)
1879 return false;
1880
1881 Register ResultReg = createResultReg(RC: TLI.getRegClassFor(VT: VT.SimpleTy));
1882 AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1883 MCID: TII.get(Opcode: Opc), DestReg: ResultReg)
1884 .addReg(RegNo: Op1).addReg(RegNo: Op2));
1885 updateValueMap(I, Reg: ResultReg);
1886 return true;
1887}
1888
1889// Call Handling Code
1890
1891// This is largely taken directly from CCAssignFnForNode
1892// TODO: We may not support all of this.
1893CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC,
1894 bool Return,
1895 bool isVarArg) {
1896 switch (CC) {
1897 default:
1898 report_fatal_error(reason: "Unsupported calling convention");
1899 case CallingConv::Fast:
1900 if (Subtarget->hasFPRegs() && !isVarArg) {
1901 if (!TM.isAAPCS_ABI())
1902 return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
1903 // For AAPCS ABI targets, just use VFP variant of the calling convention.
1904 return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
1905 }
1906 [[fallthrough]];
1907 case CallingConv::C:
1908 case CallingConv::CXX_FAST_TLS:
1909 // Use target triple & subtarget features to do actual dispatch.
1910 if (TM.isAAPCS_ABI()) {
1911 if (Subtarget->hasFPRegs() &&
1912 TM.Options.FloatABIType == FloatABI::Hard && !isVarArg)
1913 return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
1914 else
1915 return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
1916 } else {
1917 return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
1918 }
1919 case CallingConv::ARM_AAPCS_VFP:
1920 case CallingConv::Swift:
1921 case CallingConv::SwiftTail:
1922 if (!isVarArg)
1923 return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
1924 // Fall through to soft float variant, variadic functions don't
1925 // use hard floating point ABI.
1926 [[fallthrough]];
1927 case CallingConv::ARM_AAPCS:
1928 return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
1929 case CallingConv::ARM_APCS:
1930 return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
1931 case CallingConv::GHC:
1932 if (Return)
1933 report_fatal_error(reason: "Can't return in GHC call convention");
1934 else
1935 return CC_ARM_APCS_GHC;
1936 case CallingConv::CFGuard_Check:
1937 return (Return ? RetCC_ARM_AAPCS : CC_ARM_Win32_CFGuard_Check);
1938 }
1939}
1940
1941bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
1942 SmallVectorImpl<Register> &ArgRegs,
1943 SmallVectorImpl<MVT> &ArgVTs,
1944 SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
1945 SmallVectorImpl<Register> &RegArgs,
1946 CallingConv::ID CC,
1947 unsigned &NumBytes,
1948 bool isVarArg) {
1949 SmallVector<CCValAssign, 16> ArgLocs;
1950 SmallVector<Type *, 16> OrigTys;
1951 for (Value *Arg : Args)
1952 OrigTys.push_back(Elt: Arg->getType());
1953 CCState CCInfo(CC, isVarArg, *FuncInfo.MF, ArgLocs, *Context);
1954 CCInfo.AnalyzeCallOperands(ArgVTs, Flags&: ArgFlags, OrigTys,
1955 Fn: CCAssignFnForCall(CC, Return: false, isVarArg));
1956
1957 // Check that we can handle all of the arguments. If we can't, then bail out
1958 // now before we add code to the MBB.
1959 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
1960 CCValAssign &VA = ArgLocs[i];
1961 MVT ArgVT = ArgVTs[VA.getValNo()];
1962
1963 // We don't handle NEON/vector parameters yet.
1964 if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64)
1965 return false;
1966
1967 // Now copy/store arg to correct locations.
1968 if (VA.isRegLoc() && !VA.needsCustom()) {
1969 continue;
1970 } else if (VA.needsCustom()) {
1971 // TODO: We need custom lowering for vector (v2f64) args.
1972 if (VA.getLocVT() != MVT::f64 ||
1973 // TODO: Only handle register args for now.
1974 !VA.isRegLoc() || !ArgLocs[++i].isRegLoc())
1975 return false;
1976 } else {
1977 switch (ArgVT.SimpleTy) {
1978 default:
1979 return false;
1980 case MVT::i1:
1981 case MVT::i8:
1982 case MVT::i16:
1983 case MVT::i32:
1984 break;
1985 case MVT::f32:
1986 if (!Subtarget->hasVFP2Base())
1987 return false;
1988 break;
1989 case MVT::f64:
1990 if (!Subtarget->hasVFP2Base())
1991 return false;
1992 break;
1993 }
1994 }
1995 }
1996
1997 // At the point, we are able to handle the call's arguments in fast isel.
1998
1999 // Get a count of how many bytes are to be pushed on the stack.
2000 NumBytes = CCInfo.getStackSize();
2001
2002 // Issue CALLSEQ_START
2003 unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
2004 AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
2005 MCID: TII.get(Opcode: AdjStackDown))
2006 .addImm(Val: NumBytes).addImm(Val: 0));
2007
2008 // Process the args.
2009 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
2010 CCValAssign &VA = ArgLocs[i];
2011 const Value *ArgVal = Args[VA.getValNo()];
2012 Register Arg = ArgRegs[VA.getValNo()];
2013 MVT ArgVT = ArgVTs[VA.getValNo()];
2014
2015 assert((!ArgVT.isVector() && ArgVT.getSizeInBits() <= 64) &&
2016 "We don't handle NEON/vector parameters yet.");
2017
2018 // Handle arg promotion, etc.
2019 switch (VA.getLocInfo()) {
2020 case CCValAssign::Full: break;
2021 case CCValAssign::SExt: {
2022 MVT DestVT = VA.getLocVT();
2023 Arg = ARMEmitIntExt(SrcVT: ArgVT, SrcReg: Arg, DestVT, /*isZExt*/false);
2024 assert(Arg && "Failed to emit a sext");
2025 ArgVT = DestVT;
2026 break;
2027 }
2028 case CCValAssign::AExt:
2029 // Intentional fall-through. Handle AExt and ZExt.
2030 case CCValAssign::ZExt: {
2031 MVT DestVT = VA.getLocVT();
2032 Arg = ARMEmitIntExt(SrcVT: ArgVT, SrcReg: Arg, DestVT, /*isZExt*/true);
2033 assert(Arg && "Failed to emit a zext");
2034 ArgVT = DestVT;
2035 break;
2036 }
2037 case CCValAssign::BCvt: {
2038 Register BC = fastEmit_r(VT: ArgVT, RetVT: VA.getLocVT(), Opcode: ISD::BITCAST, Op0: Arg);
2039 assert(BC && "Failed to emit a bitcast!");
2040 Arg = BC;
2041 ArgVT = VA.getLocVT();
2042 break;
2043 }
2044 default: llvm_unreachable("Unknown arg promotion!");
2045 }
2046
2047 // Now copy/store arg to correct locations.
2048 if (VA.isRegLoc() && !VA.needsCustom()) {
2049 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
2050 MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: VA.getLocReg()).addReg(RegNo: Arg);
2051 RegArgs.push_back(Elt: VA.getLocReg());
2052 } else if (VA.needsCustom()) {
2053 // TODO: We need custom lowering for vector (v2f64) args.
2054 assert(VA.getLocVT() == MVT::f64 &&
2055 "Custom lowering for v2f64 args not available");
2056
2057 // FIXME: ArgLocs[++i] may extend beyond ArgLocs.size()
2058 CCValAssign &NextVA = ArgLocs[++i];
2059
2060 assert(VA.isRegLoc() && NextVA.isRegLoc() &&
2061 "We only handle register args!");
2062
2063 AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
2064 MCID: TII.get(Opcode: ARM::VMOVRRD), DestReg: VA.getLocReg())
2065 .addReg(RegNo: NextVA.getLocReg(), Flags: RegState::Define)
2066 .addReg(RegNo: Arg));
2067 RegArgs.push_back(Elt: VA.getLocReg());
2068 RegArgs.push_back(Elt: NextVA.getLocReg());
2069 } else {
2070 assert(VA.isMemLoc());
2071 // Need to store on the stack.
2072
2073 // Don't emit stores for undef values.
2074 if (isa<UndefValue>(Val: ArgVal))
2075 continue;
2076
2077 Address Addr;
2078 Addr.setKind(Address::RegBase);
2079 Addr.setReg(ARM::SP);
2080 Addr.setOffset(VA.getLocMemOffset());
2081
2082 bool EmitRet = ARMEmitStore(VT: ArgVT, SrcReg: Arg, Addr); (void)EmitRet;
2083 assert(EmitRet && "Could not emit a store for argument!");
2084 }
2085 }
2086
2087 return true;
2088}
2089
2090bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<Register> &UsedRegs,
2091 const Instruction *I, CallingConv::ID CC,
2092 unsigned &NumBytes, bool isVarArg) {
2093 // Issue CALLSEQ_END
2094 unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
2095 AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
2096 MCID: TII.get(Opcode: AdjStackUp))
2097 .addImm(Val: NumBytes).addImm(Val: -1ULL));
2098
2099 // Now the return value.
2100 if (RetVT != MVT::isVoid) {
2101 SmallVector<CCValAssign, 16> RVLocs;
2102 CCState CCInfo(CC, isVarArg, *FuncInfo.MF, RVLocs, *Context);
2103 CCInfo.AnalyzeCallResult(VT: RetVT, OrigTy: I->getType(),
2104 Fn: CCAssignFnForCall(CC, Return: true, isVarArg));
2105
2106 // Copy all of the result registers out of their specified physreg.
2107 if (RVLocs.size() == 2 && RetVT == MVT::f64) {
2108 // For this move we copy into two registers and then move into the
2109 // double fp reg we want.
2110 MVT DestVT = RVLocs[0].getValVT();
2111 const TargetRegisterClass* DstRC = TLI.getRegClassFor(VT: DestVT);
2112 Register ResultReg = createResultReg(RC: DstRC);
2113 AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
2114 MCID: TII.get(Opcode: ARM::VMOVDRR), DestReg: ResultReg)
2115 .addReg(RegNo: RVLocs[0].getLocReg())
2116 .addReg(RegNo: RVLocs[1].getLocReg()));
2117
2118 UsedRegs.push_back(Elt: RVLocs[0].getLocReg());
2119 UsedRegs.push_back(Elt: RVLocs[1].getLocReg());
2120
2121 // Finally update the result.
2122 updateValueMap(I, Reg: ResultReg);
2123 } else {
2124 assert(RVLocs.size() == 1 &&"Can't handle non-double multi-reg retvals!");
2125 MVT CopyVT = RVLocs[0].getValVT();
2126
2127 // Special handling for extended integers.
2128 if (RetVT == MVT::i1 || RetVT == MVT::i8 || RetVT == MVT::i16)
2129 CopyVT = MVT::i32;
2130
2131 const TargetRegisterClass* DstRC = TLI.getRegClassFor(VT: CopyVT);
2132
2133 Register ResultReg = createResultReg(RC: DstRC);
2134 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
2135 MCID: TII.get(Opcode: TargetOpcode::COPY),
2136 DestReg: ResultReg).addReg(RegNo: RVLocs[0].getLocReg());
2137 UsedRegs.push_back(Elt: RVLocs[0].getLocReg());
2138
2139 // Finally update the result.
2140 updateValueMap(I, Reg: ResultReg);
2141 }
2142 }
2143
2144 return true;
2145}
2146
2147bool ARMFastISel::SelectRet(const Instruction *I) {
2148 const ReturnInst *Ret = cast<ReturnInst>(Val: I);
2149 const Function &F = *I->getParent()->getParent();
2150 const bool IsCmseNSEntry = F.hasFnAttribute(Kind: "cmse_nonsecure_entry");
2151
2152 if (!FuncInfo.CanLowerReturn)
2153 return false;
2154
2155 if (TLI.supportSwiftError() &&
2156 F.getAttributes().hasAttrSomewhere(Kind: Attribute::SwiftError))
2157 return false;
2158
2159 if (TLI.supportSplitCSR(MF: FuncInfo.MF))
2160 return false;
2161
2162 // Build a list of return value registers.
2163 SmallVector<Register, 4> RetRegs;
2164
2165 CallingConv::ID CC = F.getCallingConv();
2166 if (Ret->getNumOperands() > 0) {
2167 SmallVector<ISD::OutputArg, 4> Outs;
2168 GetReturnInfo(CC, ReturnType: F.getReturnType(), attr: F.getAttributes(), Outs, TLI, DL);
2169
2170 // Analyze operands of the call, assigning locations to each operand.
2171 SmallVector<CCValAssign, 16> ValLocs;
2172 CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
2173 CCInfo.AnalyzeReturn(Outs, Fn: CCAssignFnForCall(CC, Return: true /* is Ret */,
2174 isVarArg: F.isVarArg()));
2175
2176 const Value *RV = Ret->getOperand(i_nocapture: 0);
2177 Register Reg = getRegForValue(V: RV);
2178 if (!Reg)
2179 return false;
2180
2181 // Only handle a single return value for now.
2182 if (ValLocs.size() != 1)
2183 return false;
2184
2185 CCValAssign &VA = ValLocs[0];
2186
2187 // Don't bother handling odd stuff for now.
2188 if (VA.getLocInfo() != CCValAssign::Full)
2189 return false;
2190 // Only handle register returns for now.
2191 if (!VA.isRegLoc())
2192 return false;
2193
2194 Register SrcReg = Reg + VA.getValNo();
2195 EVT RVEVT = TLI.getValueType(DL, Ty: RV->getType());
2196 if (!RVEVT.isSimple()) return false;
2197 MVT RVVT = RVEVT.getSimpleVT();
2198 MVT DestVT = VA.getValVT();
2199 // Special handling for extended integers.
2200 if (RVVT != DestVT) {
2201 if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
2202 return false;
2203
2204 assert(DestVT == MVT::i32 && "ARM should always ext to i32");
2205
2206 // Perform extension if flagged as either zext or sext. Otherwise, do
2207 // nothing.
2208 if (Outs[0].Flags.isZExt() || Outs[0].Flags.isSExt()) {
2209 SrcReg = ARMEmitIntExt(SrcVT: RVVT, SrcReg, DestVT, isZExt: Outs[0].Flags.isZExt());
2210 if (!SrcReg)
2211 return false;
2212 }
2213 }
2214
2215 // Make the copy.
2216 Register DstReg = VA.getLocReg();
2217 const TargetRegisterClass* SrcRC = MRI.getRegClass(Reg: SrcReg);
2218 // Avoid a cross-class copy. This is very unlikely.
2219 if (!SrcRC->contains(Reg: DstReg))
2220 return false;
2221 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
2222 MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: DstReg).addReg(RegNo: SrcReg);
2223
2224 // Add register to return instruction.
2225 RetRegs.push_back(Elt: VA.getLocReg());
2226 }
2227
2228 unsigned RetOpc;
2229 if (IsCmseNSEntry)
2230 if (isThumb2)
2231 RetOpc = ARM::tBXNS_RET;
2232 else
2233 llvm_unreachable("CMSE not valid for non-Thumb targets");
2234 else
2235 RetOpc = Subtarget->getReturnOpcode();
2236
2237 MachineInstrBuilder MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
2238 MCID: TII.get(Opcode: RetOpc));
2239 AddOptionalDefs(MIB);
2240 for (Register R : RetRegs)
2241 MIB.addReg(RegNo: R, Flags: RegState::Implicit);
2242 return true;
2243}
2244
2245unsigned ARMFastISel::ARMSelectCallOp(bool UseReg) {
2246 if (UseReg)
2247 return isThumb2 ? gettBLXrOpcode(MF: *MF) : getBLXOpcode(MF: *MF);
2248 else
2249 return isThumb2 ? ARM::tBL : ARM::BL;
2250}
2251
2252Register ARMFastISel::getLibcallReg(const Twine &Name) {
2253 // Manually compute the global's type to avoid building it when unnecessary.
2254 Type *GVTy = PointerType::get(C&: *Context, /*AS=*/AddressSpace: 0);
2255 EVT LCREVT = TLI.getValueType(DL, Ty: GVTy);
2256 if (!LCREVT.isSimple())
2257 return Register();
2258
2259 GlobalValue *GV = M.getNamedGlobal(Name: Name.str());
2260 if (!GV)
2261 GV = new GlobalVariable(M, Type::getInt32Ty(C&: *Context), false,
2262 GlobalValue::ExternalLinkage, nullptr, Name);
2263
2264 return ARMMaterializeGV(GV, VT: LCREVT.getSimpleVT());
2265}
2266
2267// A quick function that will emit a call for a named libcall in F with the
2268// vector of passed arguments for the Instruction in I. We can assume that we
2269// can emit a call for any libcall we can produce. This is an abridged version
2270// of the full call infrastructure since we won't need to worry about things
2271// like computed function pointers or strange arguments at call sites.
2272// TODO: Try to unify this and the normal call bits for ARM, then try to unify
2273// with X86.
2274bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
2275 RTLIB::LibcallImpl LCImpl = LibcallLowering->getLibcallImpl(Call);
2276 if (LCImpl == RTLIB::Unsupported)
2277 return false;
2278
2279 // Handle *simple* calls for now.
2280 Type *RetTy = I->getType();
2281 MVT RetVT;
2282 if (RetTy->isVoidTy())
2283 RetVT = MVT::isVoid;
2284 else if (!isTypeLegal(Ty: RetTy, VT&: RetVT))
2285 return false;
2286
2287 CallingConv::ID CC = LibcallLowering->getLibcallImplCallingConv(Call: LCImpl);
2288
2289 // Can't handle non-double multi-reg retvals.
2290 if (RetVT != MVT::isVoid && RetVT != MVT::i32) {
2291 SmallVector<CCValAssign, 16> RVLocs;
2292 CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
2293 CCInfo.AnalyzeCallResult(VT: RetVT, OrigTy: RetTy, Fn: CCAssignFnForCall(CC, Return: true, isVarArg: false));
2294 if (RVLocs.size() >= 2 && RetVT != MVT::f64)
2295 return false;
2296 }
2297
2298 // Set up the argument vectors.
2299 SmallVector<Value*, 8> Args;
2300 SmallVector<Register, 8> ArgRegs;
2301 SmallVector<MVT, 8> ArgVTs;
2302 SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
2303 Args.reserve(N: I->getNumOperands());
2304 ArgRegs.reserve(N: I->getNumOperands());
2305 ArgVTs.reserve(N: I->getNumOperands());
2306 ArgFlags.reserve(N: I->getNumOperands());
2307 for (Value *Op : I->operands()) {
2308 Register Arg = getRegForValue(V: Op);
2309 if (!Arg)
2310 return false;
2311
2312 Type *ArgTy = Op->getType();
2313 MVT ArgVT;
2314 if (!isTypeLegal(Ty: ArgTy, VT&: ArgVT)) return false;
2315
2316 ISD::ArgFlagsTy Flags;
2317 Flags.setOrigAlign(DL.getABITypeAlign(Ty: ArgTy));
2318
2319 Args.push_back(Elt: Op);
2320 ArgRegs.push_back(Elt: Arg);
2321 ArgVTs.push_back(Elt: ArgVT);
2322 ArgFlags.push_back(Elt: Flags);
2323 }
2324
2325 // Handle the arguments now that we've gotten them.
2326 SmallVector<Register, 4> RegArgs;
2327 unsigned NumBytes;
2328 if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags,
2329 RegArgs, CC, NumBytes, isVarArg: false))
2330 return false;
2331
2332 StringRef FuncName = RTLIB::RuntimeLibcallsInfo::getLibcallImplName(CallImpl: LCImpl);
2333
2334 Register CalleeReg;
2335 if (Subtarget->genLongCalls()) {
2336 CalleeReg = getLibcallReg(Name: FuncName);
2337 if (!CalleeReg)
2338 return false;
2339 }
2340
2341 // Issue the call.
2342 unsigned CallOpc = ARMSelectCallOp(UseReg: Subtarget->genLongCalls());
2343 MachineInstrBuilder MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt,
2344 MIMD, MCID: TII.get(Opcode: CallOpc));
2345 // BL / BLX don't take a predicate, but tBL / tBLX do.
2346 if (isThumb2)
2347 MIB.add(MOs: predOps(Pred: ARMCC::AL));
2348 if (Subtarget->genLongCalls()) {
2349 CalleeReg =
2350 constrainOperandRegClass(II: TII.get(Opcode: CallOpc), Op: CalleeReg, OpNum: isThumb2 ? 2 : 0);
2351 MIB.addReg(RegNo: CalleeReg);
2352 } else
2353 MIB.addExternalSymbol(FnName: FuncName.data());
2354
2355 // Add implicit physical register uses to the call.
2356 for (Register R : RegArgs)
2357 MIB.addReg(RegNo: R, Flags: RegState::Implicit);
2358
2359 // Add a register mask with the call-preserved registers.
2360 // Proper defs for return values will be added by setPhysRegsDeadExcept().
2361 MIB.addRegMask(Mask: TRI.getCallPreservedMask(MF: *FuncInfo.MF, CC));
2362
2363 // Finish off the call including any return values.
2364 SmallVector<Register, 4> UsedRegs;
2365 if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes, isVarArg: false)) return false;
2366
2367 // Set all unused physreg defs as dead.
2368 static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
2369
2370 return true;
2371}
2372
2373bool ARMFastISel::SelectCall(const Instruction *I,
2374 const char *IntrMemName = nullptr) {
2375 const CallInst *CI = cast<CallInst>(Val: I);
2376 const Value *Callee = CI->getCalledOperand();
2377
2378 // Can't handle inline asm.
2379 if (isa<InlineAsm>(Val: Callee)) return false;
2380
2381 // Allow SelectionDAG isel to handle tail calls.
2382 if (CI->isTailCall()) return false;
2383
2384 // Check the calling convention.
2385 CallingConv::ID CC = CI->getCallingConv();
2386
2387 // TODO: Avoid some calling conventions?
2388
2389 FunctionType *FTy = CI->getFunctionType();
2390 bool isVarArg = FTy->isVarArg();
2391
2392 // Handle *simple* calls for now.
2393 Type *RetTy = I->getType();
2394 MVT RetVT;
2395 if (RetTy->isVoidTy())
2396 RetVT = MVT::isVoid;
2397 else if (!isTypeLegal(Ty: RetTy, VT&: RetVT) && RetVT != MVT::i16 &&
2398 RetVT != MVT::i8 && RetVT != MVT::i1)
2399 return false;
2400
2401 // Can't handle non-double multi-reg retvals.
2402 if (RetVT != MVT::isVoid && RetVT != MVT::i1 && RetVT != MVT::i8 &&
2403 RetVT != MVT::i16 && RetVT != MVT::i32) {
2404 SmallVector<CCValAssign, 16> RVLocs;
2405 CCState CCInfo(CC, isVarArg, *FuncInfo.MF, RVLocs, *Context);
2406 CCInfo.AnalyzeCallResult(VT: RetVT, OrigTy: RetTy,
2407 Fn: CCAssignFnForCall(CC, Return: true, isVarArg));
2408 if (RVLocs.size() >= 2 && RetVT != MVT::f64)
2409 return false;
2410 }
2411
2412 // Set up the argument vectors.
2413 SmallVector<Value*, 8> Args;
2414 SmallVector<Register, 8> ArgRegs;
2415 SmallVector<MVT, 8> ArgVTs;
2416 SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
2417 unsigned arg_size = CI->arg_size();
2418 Args.reserve(N: arg_size);
2419 ArgRegs.reserve(N: arg_size);
2420 ArgVTs.reserve(N: arg_size);
2421 ArgFlags.reserve(N: arg_size);
2422 for (auto ArgI = CI->arg_begin(), ArgE = CI->arg_end(); ArgI != ArgE; ++ArgI) {
2423 // If we're lowering a memory intrinsic instead of a regular call, skip the
2424 // last argument, which shouldn't be passed to the underlying function.
2425 if (IntrMemName && ArgE - ArgI <= 1)
2426 break;
2427
2428 ISD::ArgFlagsTy Flags;
2429 unsigned ArgIdx = ArgI - CI->arg_begin();
2430 if (CI->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::SExt))
2431 Flags.setSExt();
2432 if (CI->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::ZExt))
2433 Flags.setZExt();
2434
2435 // FIXME: Only handle *easy* calls for now.
2436 if (CI->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::InReg) ||
2437 CI->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::StructRet) ||
2438 CI->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::SwiftSelf) ||
2439 CI->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::SwiftError) ||
2440 CI->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::Nest) ||
2441 CI->paramHasAttr(ArgNo: ArgIdx, Kind: Attribute::ByVal))
2442 return false;
2443
2444 Type *ArgTy = (*ArgI)->getType();
2445 MVT ArgVT;
2446 if (!isTypeLegal(Ty: ArgTy, VT&: ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8 &&
2447 ArgVT != MVT::i1)
2448 return false;
2449
2450 Register Arg = getRegForValue(V: *ArgI);
2451 if (!Arg.isValid())
2452 return false;
2453
2454 Flags.setOrigAlign(DL.getABITypeAlign(Ty: ArgTy));
2455
2456 Args.push_back(Elt: *ArgI);
2457 ArgRegs.push_back(Elt: Arg);
2458 ArgVTs.push_back(Elt: ArgVT);
2459 ArgFlags.push_back(Elt: Flags);
2460 }
2461
2462 // Handle the arguments now that we've gotten them.
2463 SmallVector<Register, 4> RegArgs;
2464 unsigned NumBytes;
2465 if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags,
2466 RegArgs, CC, NumBytes, isVarArg))
2467 return false;
2468
2469 bool UseReg = false;
2470 const GlobalValue *GV = dyn_cast<GlobalValue>(Val: Callee);
2471 if (!GV || Subtarget->genLongCalls()) UseReg = true;
2472
2473 Register CalleeReg;
2474 if (UseReg) {
2475 if (IntrMemName)
2476 CalleeReg = getLibcallReg(Name: IntrMemName);
2477 else
2478 CalleeReg = getRegForValue(V: Callee);
2479
2480 if (!CalleeReg)
2481 return false;
2482 }
2483
2484 // Issue the call.
2485 unsigned CallOpc = ARMSelectCallOp(UseReg);
2486 MachineInstrBuilder MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt,
2487 MIMD, MCID: TII.get(Opcode: CallOpc));
2488
2489 // ARM calls don't take a predicate, but tBL / tBLX do.
2490 if(isThumb2)
2491 MIB.add(MOs: predOps(Pred: ARMCC::AL));
2492 if (UseReg) {
2493 CalleeReg =
2494 constrainOperandRegClass(II: TII.get(Opcode: CallOpc), Op: CalleeReg, OpNum: isThumb2 ? 2 : 0);
2495 MIB.addReg(RegNo: CalleeReg);
2496 } else if (!IntrMemName)
2497 MIB.addGlobalAddress(GV, Offset: 0, TargetFlags: 0);
2498 else
2499 MIB.addExternalSymbol(FnName: IntrMemName, TargetFlags: 0);
2500
2501 // Add implicit physical register uses to the call.
2502 for (Register R : RegArgs)
2503 MIB.addReg(RegNo: R, Flags: RegState::Implicit);
2504
2505 // Add a register mask with the call-preserved registers.
2506 // Proper defs for return values will be added by setPhysRegsDeadExcept().
2507 MIB.addRegMask(Mask: TRI.getCallPreservedMask(MF: *FuncInfo.MF, CC));
2508
2509 // Finish off the call including any return values.
2510 SmallVector<Register, 4> UsedRegs;
2511 if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes, isVarArg))
2512 return false;
2513
2514 // Set all unused physreg defs as dead.
2515 static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
2516
2517 diagnoseDontCall(CI: *CI);
2518 return true;
2519}
2520
2521bool ARMFastISel::ARMIsMemCpySmall(uint64_t Len) {
2522 return Len <= 16;
2523}
2524
2525bool ARMFastISel::ARMTryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
2526 MaybeAlign Alignment) {
2527 // Make sure we don't bloat code by inlining very large memcpy's.
2528 if (!ARMIsMemCpySmall(Len))
2529 return false;
2530
2531 while (Len) {
2532 MVT VT;
2533 if (!Alignment || *Alignment >= 4) {
2534 if (Len >= 4)
2535 VT = MVT::i32;
2536 else if (Len >= 2)
2537 VT = MVT::i16;
2538 else {
2539 assert(Len == 1 && "Expected a length of 1!");
2540 VT = MVT::i8;
2541 }
2542 } else {
2543 assert(Alignment && "Alignment is set in this branch");
2544 // Bound based on alignment.
2545 if (Len >= 2 && *Alignment == 2)
2546 VT = MVT::i16;
2547 else {
2548 VT = MVT::i8;
2549 }
2550 }
2551
2552 bool RV;
2553 Register ResultReg;
2554 RV = ARMEmitLoad(VT, ResultReg, Addr&: Src);
2555 assert(RV && "Should be able to handle this load.");
2556 RV = ARMEmitStore(VT, SrcReg: ResultReg, Addr&: Dest);
2557 assert(RV && "Should be able to handle this store.");
2558 (void)RV;
2559
2560 unsigned Size = VT.getSizeInBits()/8;
2561 Len -= Size;
2562 Dest.setOffset(Dest.getOffset() + Size);
2563 Src.setOffset(Src.getOffset() + Size);
2564 }
2565
2566 return true;
2567}
2568
2569bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) {
2570 // FIXME: Handle more intrinsics.
2571 switch (I.getIntrinsicID()) {
2572 default: return false;
2573 case Intrinsic::frameaddress: {
2574 MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
2575 MFI.setFrameAddressIsTaken(true);
2576
2577 unsigned LdrOpc = isThumb2 ? ARM::t2LDRi12 : ARM::LDRi12;
2578 const TargetRegisterClass *RC = isThumb2 ? &ARM::tGPRRegClass
2579 : &ARM::GPRRegClass;
2580
2581 const ARMBaseRegisterInfo *RegInfo = Subtarget->getRegisterInfo();
2582 Register FramePtr = RegInfo->getFrameRegister(MF: *(FuncInfo.MF));
2583 Register SrcReg = FramePtr;
2584
2585 // Recursively load frame address
2586 // ldr r0 [fp]
2587 // ldr r0 [r0]
2588 // ldr r0 [r0]
2589 // ...
2590 Register DestReg;
2591 unsigned Depth = cast<ConstantInt>(Val: I.getOperand(i_nocapture: 0))->getZExtValue();
2592 while (Depth--) {
2593 DestReg = createResultReg(RC);
2594 AddOptionalDefs(MIB: BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
2595 MCID: TII.get(Opcode: LdrOpc), DestReg)
2596 .addReg(RegNo: SrcReg).addImm(Val: 0));
2597 SrcReg = DestReg;
2598 }
2599 updateValueMap(I: &I, Reg: SrcReg);
2600 return true;
2601 }
2602 case Intrinsic::memcpy:
2603 case Intrinsic::memmove: {
2604 const MemTransferInst &MTI = cast<MemTransferInst>(Val: I);
2605 // Don't handle volatile.
2606 if (MTI.isVolatile())
2607 return false;
2608
2609 // Disable inlining for memmove before calls to ComputeAddress. Otherwise,
2610 // we would emit dead code because we don't currently handle memmoves.
2611 bool isMemCpy = (I.getIntrinsicID() == Intrinsic::memcpy);
2612 if (isa<ConstantInt>(Val: MTI.getLength()) && isMemCpy) {
2613 // Small memcpy's are common enough that we want to do them without a call
2614 // if possible.
2615 uint64_t Len = cast<ConstantInt>(Val: MTI.getLength())->getZExtValue();
2616 if (ARMIsMemCpySmall(Len)) {
2617 Address Dest, Src;
2618 if (!ARMComputeAddress(Obj: MTI.getRawDest(), Addr&: Dest) ||
2619 !ARMComputeAddress(Obj: MTI.getRawSource(), Addr&: Src))
2620 return false;
2621 MaybeAlign Alignment;
2622 if (MTI.getDestAlign() || MTI.getSourceAlign())
2623 Alignment = std::min(a: MTI.getDestAlign().valueOrOne(),
2624 b: MTI.getSourceAlign().valueOrOne());
2625 if (ARMTryEmitSmallMemCpy(Dest, Src, Len, Alignment))
2626 return true;
2627 }
2628 }
2629
2630 if (!MTI.getLength()->getType()->isIntegerTy(Bitwidth: 32))
2631 return false;
2632
2633 if (MTI.getSourceAddressSpace() > 255 || MTI.getDestAddressSpace() > 255)
2634 return false;
2635
2636 const char *IntrMemName = isa<MemCpyInst>(Val: I) ? "memcpy" : "memmove";
2637 return SelectCall(I: &I, IntrMemName);
2638 }
2639 case Intrinsic::memset: {
2640 const MemSetInst &MSI = cast<MemSetInst>(Val: I);
2641 // Don't handle volatile.
2642 if (MSI.isVolatile())
2643 return false;
2644
2645 if (!MSI.getLength()->getType()->isIntegerTy(Bitwidth: 32))
2646 return false;
2647
2648 if (MSI.getDestAddressSpace() > 255)
2649 return false;
2650
2651 return SelectCall(I: &I, IntrMemName: "memset");
2652 }
2653 case Intrinsic::trap: {
2654 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
2655 MCID: TII.get(Opcode: Subtarget->isThumb() ? ARM::tTRAP : ARM::TRAP));
2656 return true;
2657 }
2658 }
2659}
2660
2661bool ARMFastISel::SelectTrunc(const Instruction *I) {
2662 // The high bits for a type smaller than the register size are assumed to be
2663 // undefined.
2664 Value *Op = I->getOperand(i: 0);
2665
2666 EVT SrcVT, DestVT;
2667 SrcVT = TLI.getValueType(DL, Ty: Op->getType(), AllowUnknown: true);
2668 DestVT = TLI.getValueType(DL, Ty: I->getType(), AllowUnknown: true);
2669
2670 if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8)
2671 return false;
2672 if (DestVT != MVT::i16 && DestVT != MVT::i8 && DestVT != MVT::i1)
2673 return false;
2674
2675 Register SrcReg = getRegForValue(V: Op);
2676 if (!SrcReg) return false;
2677
2678 // Because the high bits are undefined, a truncate doesn't generate
2679 // any code.
2680 updateValueMap(I, Reg: SrcReg);
2681 return true;
2682}
2683
2684Register ARMFastISel::ARMEmitIntExt(MVT SrcVT, Register SrcReg, MVT DestVT,
2685 bool isZExt) {
2686 if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8)
2687 return Register();
2688 if (SrcVT != MVT::i16 && SrcVT != MVT::i8 && SrcVT != MVT::i1)
2689 return Register();
2690
2691 // Table of which combinations can be emitted as a single instruction,
2692 // and which will require two.
2693 static const uint8_t isSingleInstrTbl[3][2][2][2] = {
2694 // ARM Thumb
2695 // !hasV6Ops hasV6Ops !hasV6Ops hasV6Ops
2696 // ext: s z s z s z s z
2697 /* 1 */ { { { 0, 1 }, { 0, 1 } }, { { 0, 0 }, { 0, 1 } } },
2698 /* 8 */ { { { 0, 1 }, { 1, 1 } }, { { 0, 0 }, { 1, 1 } } },
2699 /* 16 */ { { { 0, 0 }, { 1, 1 } }, { { 0, 0 }, { 1, 1 } } }
2700 };
2701
2702 // Target registers for:
2703 // - For ARM can never be PC.
2704 // - For 16-bit Thumb are restricted to lower 8 registers.
2705 // - For 32-bit Thumb are restricted to non-SP and non-PC.
2706 static const TargetRegisterClass *RCTbl[2][2] = {
2707 // Instructions: Two Single
2708 /* ARM */ { &ARM::GPRnopcRegClass, &ARM::GPRnopcRegClass },
2709 /* Thumb */ { &ARM::tGPRRegClass, &ARM::rGPRRegClass }
2710 };
2711
2712 // Table governing the instruction(s) to be emitted.
2713 static const struct InstructionTable {
2714 uint32_t Opc : 16;
2715 uint32_t hasS : 1; // Some instructions have an S bit, always set it to 0.
2716 uint32_t Shift : 7; // For shift operand addressing mode, used by MOVsi.
2717 uint32_t Imm : 8; // All instructions have either a shift or a mask.
2718 } IT[2][2][3][2] = {
2719 { // Two instructions (first is left shift, second is in this table).
2720 { // ARM Opc S Shift Imm
2721 /* 1 bit sext */ { { .Opc: ARM::MOVsi , .hasS: 1, .Shift: ARM_AM::asr , .Imm: 31 },
2722 /* 1 bit zext */ { .Opc: ARM::MOVsi , .hasS: 1, .Shift: ARM_AM::lsr , .Imm: 31 } },
2723 /* 8 bit sext */ { { .Opc: ARM::MOVsi , .hasS: 1, .Shift: ARM_AM::asr , .Imm: 24 },
2724 /* 8 bit zext */ { .Opc: ARM::MOVsi , .hasS: 1, .Shift: ARM_AM::lsr , .Imm: 24 } },
2725 /* 16 bit sext */ { { .Opc: ARM::MOVsi , .hasS: 1, .Shift: ARM_AM::asr , .Imm: 16 },
2726 /* 16 bit zext */ { .Opc: ARM::MOVsi , .hasS: 1, .Shift: ARM_AM::lsr , .Imm: 16 } }
2727 },
2728 { // Thumb Opc S Shift Imm
2729 /* 1 bit sext */ { { .Opc: ARM::tASRri , .hasS: 0, .Shift: ARM_AM::no_shift, .Imm: 31 },
2730 /* 1 bit zext */ { .Opc: ARM::tLSRri , .hasS: 0, .Shift: ARM_AM::no_shift, .Imm: 31 } },
2731 /* 8 bit sext */ { { .Opc: ARM::tASRri , .hasS: 0, .Shift: ARM_AM::no_shift, .Imm: 24 },
2732 /* 8 bit zext */ { .Opc: ARM::tLSRri , .hasS: 0, .Shift: ARM_AM::no_shift, .Imm: 24 } },
2733 /* 16 bit sext */ { { .Opc: ARM::tASRri , .hasS: 0, .Shift: ARM_AM::no_shift, .Imm: 16 },
2734 /* 16 bit zext */ { .Opc: ARM::tLSRri , .hasS: 0, .Shift: ARM_AM::no_shift, .Imm: 16 } }
2735 }
2736 },
2737 { // Single instruction.
2738 { // ARM Opc S Shift Imm
2739 /* 1 bit sext */ { { .Opc: ARM::KILL , .hasS: 0, .Shift: ARM_AM::no_shift, .Imm: 0 },
2740 /* 1 bit zext */ { .Opc: ARM::ANDri , .hasS: 1, .Shift: ARM_AM::no_shift, .Imm: 1 } },
2741 /* 8 bit sext */ { { .Opc: ARM::SXTB , .hasS: 0, .Shift: ARM_AM::no_shift, .Imm: 0 },
2742 /* 8 bit zext */ { .Opc: ARM::ANDri , .hasS: 1, .Shift: ARM_AM::no_shift, .Imm: 255 } },
2743 /* 16 bit sext */ { { .Opc: ARM::SXTH , .hasS: 0, .Shift: ARM_AM::no_shift, .Imm: 0 },
2744 /* 16 bit zext */ { .Opc: ARM::UXTH , .hasS: 0, .Shift: ARM_AM::no_shift, .Imm: 0 } }
2745 },
2746 { // Thumb Opc S Shift Imm
2747 /* 1 bit sext */ { { .Opc: ARM::KILL , .hasS: 0, .Shift: ARM_AM::no_shift, .Imm: 0 },
2748 /* 1 bit zext */ { .Opc: ARM::t2ANDri, .hasS: 1, .Shift: ARM_AM::no_shift, .Imm: 1 } },
2749 /* 8 bit sext */ { { .Opc: ARM::t2SXTB , .hasS: 0, .Shift: ARM_AM::no_shift, .Imm: 0 },
2750 /* 8 bit zext */ { .Opc: ARM::t2ANDri, .hasS: 1, .Shift: ARM_AM::no_shift, .Imm: 255 } },
2751 /* 16 bit sext */ { { .Opc: ARM::t2SXTH , .hasS: 0, .Shift: ARM_AM::no_shift, .Imm: 0 },
2752 /* 16 bit zext */ { .Opc: ARM::t2UXTH , .hasS: 0, .Shift: ARM_AM::no_shift, .Imm: 0 } }
2753 }
2754 }
2755 };
2756
2757 unsigned SrcBits = SrcVT.getSizeInBits();
2758 unsigned DestBits = DestVT.getSizeInBits();
2759 (void) DestBits;
2760 assert((SrcBits < DestBits) && "can only extend to larger types");
2761 assert((DestBits == 32 || DestBits == 16 || DestBits == 8) &&
2762 "other sizes unimplemented");
2763 assert((SrcBits == 16 || SrcBits == 8 || SrcBits == 1) &&
2764 "other sizes unimplemented");
2765
2766 bool hasV6Ops = Subtarget->hasV6Ops();
2767 unsigned Bitness = SrcBits / 8; // {1,8,16}=>{0,1,2}
2768 assert((Bitness < 3) && "sanity-check table bounds");
2769
2770 bool isSingleInstr = isSingleInstrTbl[Bitness][isThumb2][hasV6Ops][isZExt];
2771 const TargetRegisterClass *RC = RCTbl[isThumb2][isSingleInstr];
2772 const InstructionTable *ITP = &IT[isSingleInstr][isThumb2][Bitness][isZExt];
2773 unsigned Opc = ITP->Opc;
2774 assert(ARM::KILL != Opc && "Invalid table entry");
2775 unsigned hasS = ITP->hasS;
2776 ARM_AM::ShiftOpc Shift = (ARM_AM::ShiftOpc) ITP->Shift;
2777 assert(((Shift == ARM_AM::no_shift) == (Opc != ARM::MOVsi)) &&
2778 "only MOVsi has shift operand addressing mode");
2779 unsigned Imm = ITP->Imm;
2780
2781 // 16-bit Thumb instructions always set CPSR (unless they're in an IT block).
2782 bool setsCPSR = &ARM::tGPRRegClass == RC;
2783 unsigned LSLOpc = isThumb2 ? ARM::tLSLri : ARM::MOVsi;
2784 Register ResultReg;
2785 // MOVsi encodes shift and immediate in shift operand addressing mode.
2786 // The following condition has the same value when emitting two
2787 // instruction sequences: both are shifts.
2788 bool ImmIsSO = (Shift != ARM_AM::no_shift);
2789
2790 // Either one or two instructions are emitted.
2791 // They're always of the form:
2792 // dst = in OP imm
2793 // CPSR is set only by 16-bit Thumb instructions.
2794 // Predicate, if any, is AL.
2795 // S bit, if available, is always 0.
2796 // When two are emitted the first's result will feed as the second's input,
2797 // that value is then dead.
2798 unsigned NumInstrsEmitted = isSingleInstr ? 1 : 2;
2799 for (unsigned Instr = 0; Instr != NumInstrsEmitted; ++Instr) {
2800 ResultReg = createResultReg(RC);
2801 bool isLsl = (0 == Instr) && !isSingleInstr;
2802 unsigned Opcode = isLsl ? LSLOpc : Opc;
2803 ARM_AM::ShiftOpc ShiftAM = isLsl ? ARM_AM::lsl : Shift;
2804 unsigned ImmEnc = ImmIsSO ? ARM_AM::getSORegOpc(ShOp: ShiftAM, Imm) : Imm;
2805 bool isKill = 1 == Instr;
2806 MachineInstrBuilder MIB = BuildMI(
2807 BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode), DestReg: ResultReg);
2808 if (setsCPSR)
2809 MIB.addReg(RegNo: ARM::CPSR, Flags: RegState::Define);
2810 SrcReg = constrainOperandRegClass(II: TII.get(Opcode), Op: SrcReg, OpNum: 1 + setsCPSR);
2811 MIB.addReg(RegNo: SrcReg, Flags: getKillRegState(B: isKill))
2812 .addImm(Val: ImmEnc)
2813 .add(MOs: predOps(Pred: ARMCC::AL));
2814 if (hasS)
2815 MIB.add(MO: condCodeOp());
2816 // Second instruction consumes the first's result.
2817 SrcReg = ResultReg;
2818 }
2819
2820 return ResultReg;
2821}
2822
2823bool ARMFastISel::SelectIntExt(const Instruction *I) {
2824 // On ARM, in general, integer casts don't involve legal types; this code
2825 // handles promotable integers.
2826 Type *DestTy = I->getType();
2827 Value *Src = I->getOperand(i: 0);
2828 Type *SrcTy = Src->getType();
2829
2830 bool isZExt = isa<ZExtInst>(Val: I);
2831 Register SrcReg = getRegForValue(V: Src);
2832 if (!SrcReg) return false;
2833
2834 EVT SrcEVT, DestEVT;
2835 SrcEVT = TLI.getValueType(DL, Ty: SrcTy, AllowUnknown: true);
2836 DestEVT = TLI.getValueType(DL, Ty: DestTy, AllowUnknown: true);
2837 if (!SrcEVT.isSimple()) return false;
2838 if (!DestEVT.isSimple()) return false;
2839
2840 MVT SrcVT = SrcEVT.getSimpleVT();
2841 MVT DestVT = DestEVT.getSimpleVT();
2842 Register ResultReg = ARMEmitIntExt(SrcVT, SrcReg, DestVT, isZExt);
2843 if (!ResultReg)
2844 return false;
2845 updateValueMap(I, Reg: ResultReg);
2846 return true;
2847}
2848
2849bool ARMFastISel::SelectShift(const Instruction *I,
2850 ARM_AM::ShiftOpc ShiftTy) {
2851 // We handle thumb2 mode by target independent selector
2852 // or SelectionDAG ISel.
2853 if (isThumb2)
2854 return false;
2855
2856 // Only handle i32 now.
2857 EVT DestVT = TLI.getValueType(DL, Ty: I->getType(), AllowUnknown: true);
2858 if (DestVT != MVT::i32)
2859 return false;
2860
2861 unsigned Opc = ARM::MOVsr;
2862 unsigned ShiftImm;
2863 Value *Src2Value = I->getOperand(i: 1);
2864 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val: Src2Value)) {
2865 ShiftImm = CI->getZExtValue();
2866
2867 // Fall back to selection DAG isel if the shift amount
2868 // is zero or greater than the width of the value type.
2869 if (ShiftImm == 0 || ShiftImm >=32)
2870 return false;
2871
2872 Opc = ARM::MOVsi;
2873 }
2874
2875 Value *Src1Value = I->getOperand(i: 0);
2876 Register Reg1 = getRegForValue(V: Src1Value);
2877 if (!Reg1)
2878 return false;
2879
2880 Register Reg2;
2881 if (Opc == ARM::MOVsr) {
2882 Reg2 = getRegForValue(V: Src2Value);
2883 if (!Reg2)
2884 return false;
2885 }
2886
2887 Register ResultReg = createResultReg(RC: &ARM::GPRnopcRegClass);
2888 if (!ResultReg)
2889 return false;
2890
2891 MachineInstrBuilder MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
2892 MCID: TII.get(Opcode: Opc), DestReg: ResultReg)
2893 .addReg(RegNo: Reg1);
2894
2895 if (Opc == ARM::MOVsi)
2896 MIB.addImm(Val: ARM_AM::getSORegOpc(ShOp: ShiftTy, Imm: ShiftImm));
2897 else if (Opc == ARM::MOVsr) {
2898 MIB.addReg(RegNo: Reg2);
2899 MIB.addImm(Val: ARM_AM::getSORegOpc(ShOp: ShiftTy, Imm: 0));
2900 }
2901
2902 AddOptionalDefs(MIB);
2903 updateValueMap(I, Reg: ResultReg);
2904 return true;
2905}
2906
2907// TODO: SoftFP support.
2908bool ARMFastISel::fastSelectInstruction(const Instruction *I) {
2909 switch (I->getOpcode()) {
2910 case Instruction::Load:
2911 return SelectLoad(I);
2912 case Instruction::Store:
2913 return SelectStore(I);
2914 case Instruction::CondBr:
2915 return SelectBranch(I);
2916 case Instruction::IndirectBr:
2917 return SelectIndirectBr(I);
2918 case Instruction::ICmp:
2919 case Instruction::FCmp:
2920 return SelectCmp(I);
2921 case Instruction::FPExt:
2922 return SelectFPExt(I);
2923 case Instruction::FPTrunc:
2924 return SelectFPTrunc(I);
2925 case Instruction::SIToFP:
2926 return SelectIToFP(I, /*isSigned*/ true);
2927 case Instruction::UIToFP:
2928 return SelectIToFP(I, /*isSigned*/ false);
2929 case Instruction::FPToSI:
2930 return SelectFPToI(I, /*isSigned*/ true);
2931 case Instruction::FPToUI:
2932 return SelectFPToI(I, /*isSigned*/ false);
2933 case Instruction::Add:
2934 return SelectBinaryIntOp(I, ISDOpcode: ISD::ADD);
2935 case Instruction::Or:
2936 return SelectBinaryIntOp(I, ISDOpcode: ISD::OR);
2937 case Instruction::Sub:
2938 return SelectBinaryIntOp(I, ISDOpcode: ISD::SUB);
2939 case Instruction::FAdd:
2940 return SelectBinaryFPOp(I, ISDOpcode: ISD::FADD);
2941 case Instruction::FSub:
2942 return SelectBinaryFPOp(I, ISDOpcode: ISD::FSUB);
2943 case Instruction::FMul:
2944 return SelectBinaryFPOp(I, ISDOpcode: ISD::FMUL);
2945 case Instruction::SDiv:
2946 return SelectDiv(I, /*isSigned*/ true);
2947 case Instruction::UDiv:
2948 return SelectDiv(I, /*isSigned*/ false);
2949 case Instruction::SRem:
2950 return SelectRem(I, /*isSigned*/ true);
2951 case Instruction::URem:
2952 return SelectRem(I, /*isSigned*/ false);
2953 case Instruction::Call:
2954 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I))
2955 return SelectIntrinsicCall(I: *II);
2956 return SelectCall(I);
2957 case Instruction::Select:
2958 return SelectSelect(I);
2959 case Instruction::Ret:
2960 return SelectRet(I);
2961 case Instruction::Trunc:
2962 return SelectTrunc(I);
2963 case Instruction::ZExt:
2964 case Instruction::SExt:
2965 return SelectIntExt(I);
2966 case Instruction::Shl:
2967 return SelectShift(I, ShiftTy: ARM_AM::lsl);
2968 case Instruction::LShr:
2969 return SelectShift(I, ShiftTy: ARM_AM::lsr);
2970 case Instruction::AShr:
2971 return SelectShift(I, ShiftTy: ARM_AM::asr);
2972 default: break;
2973 }
2974 return false;
2975}
2976
2977// This table describes sign- and zero-extend instructions which can be
2978// folded into a preceding load. All of these extends have an immediate
2979// (sometimes a mask and sometimes a shift) that's applied after
2980// extension.
2981static const struct FoldableLoadExtendsStruct {
2982 uint16_t Opc[2]; // ARM, Thumb.
2983 uint8_t ExpectedImm;
2984 uint8_t isZExt : 1;
2985 uint8_t ExpectedVT : 7;
2986} FoldableLoadExtends[] = {
2987 { .Opc: { ARM::SXTH, ARM::t2SXTH }, .ExpectedImm: 0, .isZExt: 0, .ExpectedVT: MVT::i16 },
2988 { .Opc: { ARM::UXTH, ARM::t2UXTH }, .ExpectedImm: 0, .isZExt: 1, .ExpectedVT: MVT::i16 },
2989 { .Opc: { ARM::ANDri, ARM::t2ANDri }, .ExpectedImm: 255, .isZExt: 1, .ExpectedVT: MVT::i8 },
2990 { .Opc: { ARM::SXTB, ARM::t2SXTB }, .ExpectedImm: 0, .isZExt: 0, .ExpectedVT: MVT::i8 },
2991 { .Opc: { ARM::UXTB, ARM::t2UXTB }, .ExpectedImm: 0, .isZExt: 1, .ExpectedVT: MVT::i8 }
2992};
2993
2994/// The specified machine instr operand is a vreg, and that
2995/// vreg is being provided by the specified load instruction. If possible,
2996/// try to fold the load as an operand to the instruction, returning true if
2997/// successful.
2998bool ARMFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
2999 const LoadInst *LI) {
3000 // Verify we have a legal type before going any further.
3001 MVT VT;
3002 if (!isLoadTypeLegal(Ty: LI->getType(), VT))
3003 return false;
3004
3005 // Combine load followed by zero- or sign-extend.
3006 // ldrb r1, [r0] ldrb r1, [r0]
3007 // uxtb r2, r1 =>
3008 // mov r3, r2 mov r3, r1
3009 if (MI->getNumOperands() < 3 || !MI->getOperand(i: 2).isImm())
3010 return false;
3011 const uint64_t Imm = MI->getOperand(i: 2).getImm();
3012
3013 bool Found = false;
3014 bool isZExt;
3015 for (const FoldableLoadExtendsStruct &FLE : FoldableLoadExtends) {
3016 if (FLE.Opc[isThumb2] == MI->getOpcode() &&
3017 (uint64_t)FLE.ExpectedImm == Imm &&
3018 MVT((MVT::SimpleValueType)FLE.ExpectedVT) == VT) {
3019 Found = true;
3020 isZExt = FLE.isZExt;
3021 }
3022 }
3023 if (!Found) return false;
3024
3025 // See if we can handle this address.
3026 Address Addr;
3027 if (!ARMComputeAddress(Obj: LI->getOperand(i_nocapture: 0), Addr)) return false;
3028
3029 Register ResultReg = MI->getOperand(i: 0).getReg();
3030 if (!ARMEmitLoad(VT, ResultReg, Addr, Alignment: LI->getAlign(), isZExt, allocReg: false))
3031 return false;
3032 MachineBasicBlock::iterator I(MI);
3033 removeDeadCode(I, E: std::next(x: I));
3034 return true;
3035}
3036
3037Register ARMFastISel::ARMLowerPICELF(const GlobalValue *GV, MVT VT) {
3038 bool UseGOT_PREL = !GV->isDSOLocal();
3039 LLVMContext *Context = &MF->getFunction().getContext();
3040 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3041 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
3042 ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(
3043 C: GV, ID: ARMPCLabelIndex, Kind: ARMCP::CPValue, PCAdj,
3044 Modifier: UseGOT_PREL ? ARMCP::GOT_PREL : ARMCP::no_modifier,
3045 /*AddCurrentAddress=*/UseGOT_PREL);
3046
3047 Align ConstAlign =
3048 MF->getDataLayout().getPrefTypeAlign(Ty: PointerType::get(C&: *Context, AddressSpace: 0));
3049 unsigned Idx = MF->getConstantPool()->getConstantPoolIndex(V: CPV, Alignment: ConstAlign);
3050 MachineMemOperand *CPMMO =
3051 MF->getMachineMemOperand(PtrInfo: MachinePointerInfo::getConstantPool(MF&: *MF),
3052 F: MachineMemOperand::MOLoad, Size: 4, BaseAlignment: Align(4));
3053
3054 Register TempReg = MF->getRegInfo().createVirtualRegister(RegClass: &ARM::rGPRRegClass);
3055 unsigned Opc = isThumb2 ? ARM::t2LDRpci : ARM::LDRcp;
3056 MachineInstrBuilder MIB =
3057 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg: TempReg)
3058 .addConstantPoolIndex(Idx)
3059 .addMemOperand(MMO: CPMMO);
3060 if (Opc == ARM::LDRcp)
3061 MIB.addImm(Val: 0);
3062 MIB.add(MOs: predOps(Pred: ARMCC::AL));
3063
3064 // Fix the address by adding pc.
3065 Register DestReg = createResultReg(RC: TLI.getRegClassFor(VT));
3066 Opc = Subtarget->isThumb() ? ARM::tPICADD : UseGOT_PREL ? ARM::PICLDR
3067 : ARM::PICADD;
3068 DestReg = constrainOperandRegClass(II: TII.get(Opcode: Opc), Op: DestReg, OpNum: 0);
3069 MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg)
3070 .addReg(RegNo: TempReg)
3071 .addImm(Val: ARMPCLabelIndex);
3072
3073 if (!Subtarget->isThumb())
3074 MIB.add(MOs: predOps(Pred: ARMCC::AL));
3075
3076 if (UseGOT_PREL && Subtarget->isThumb()) {
3077 Register NewDestReg = createResultReg(RC: TLI.getRegClassFor(VT));
3078 MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
3079 MCID: TII.get(Opcode: ARM::t2LDRi12), DestReg: NewDestReg)
3080 .addReg(RegNo: DestReg)
3081 .addImm(Val: 0);
3082 DestReg = NewDestReg;
3083 AddOptionalDefs(MIB);
3084 }
3085 return DestReg;
3086}
3087
3088bool ARMFastISel::fastLowerArguments() {
3089 if (!FuncInfo.CanLowerReturn)
3090 return false;
3091
3092 const Function *F = FuncInfo.Fn;
3093 if (F->isVarArg())
3094 return false;
3095
3096 CallingConv::ID CC = F->getCallingConv();
3097 switch (CC) {
3098 default:
3099 return false;
3100 case CallingConv::Fast:
3101 case CallingConv::C:
3102 case CallingConv::ARM_AAPCS_VFP:
3103 case CallingConv::ARM_AAPCS:
3104 case CallingConv::ARM_APCS:
3105 case CallingConv::Swift:
3106 case CallingConv::SwiftTail:
3107 break;
3108 }
3109
3110 // Only handle simple cases. i.e. Up to 4 i8/i16/i32 scalar arguments
3111 // which are passed in r0 - r3.
3112 for (const Argument &Arg : F->args()) {
3113 if (Arg.getArgNo() >= 4)
3114 return false;
3115
3116 if (Arg.hasAttribute(Kind: Attribute::InReg) ||
3117 Arg.hasAttribute(Kind: Attribute::StructRet) ||
3118 Arg.hasAttribute(Kind: Attribute::SwiftSelf) ||
3119 Arg.hasAttribute(Kind: Attribute::SwiftError) ||
3120 Arg.hasAttribute(Kind: Attribute::ByVal))
3121 return false;
3122
3123 Type *ArgTy = Arg.getType();
3124 if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy())
3125 return false;
3126
3127 EVT ArgVT = TLI.getValueType(DL, Ty: ArgTy);
3128 if (!ArgVT.isSimple()) return false;
3129 switch (ArgVT.getSimpleVT().SimpleTy) {
3130 case MVT::i8:
3131 case MVT::i16:
3132 case MVT::i32:
3133 break;
3134 default:
3135 return false;
3136 }
3137 }
3138
3139 static const MCPhysReg GPRArgRegs[] = {
3140 ARM::R0, ARM::R1, ARM::R2, ARM::R3
3141 };
3142
3143 const TargetRegisterClass *RC = &ARM::rGPRRegClass;
3144 for (const Argument &Arg : F->args()) {
3145 unsigned ArgNo = Arg.getArgNo();
3146 MCRegister SrcReg = GPRArgRegs[ArgNo];
3147 Register DstReg = FuncInfo.MF->addLiveIn(PReg: SrcReg, RC);
3148 // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
3149 // Without this, EmitLiveInCopies may eliminate the livein if its only
3150 // use is a bitcast (which isn't turned into an instruction).
3151 Register ResultReg = createResultReg(RC);
3152 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
3153 MCID: TII.get(Opcode: TargetOpcode::COPY),
3154 DestReg: ResultReg).addReg(RegNo: DstReg, Flags: getKillRegState(B: true));
3155 updateValueMap(I: &Arg, Reg: ResultReg);
3156 }
3157
3158 return true;
3159}
3160
3161namespace llvm {
3162
3163FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo,
3164 const TargetLibraryInfo *libInfo,
3165 const LibcallLoweringInfo *libcallLowering) {
3166 if (funcInfo.MF->getSubtarget<ARMSubtarget>().useFastISel())
3167 return new ARMFastISel(funcInfo, libInfo, libcallLowering);
3168
3169 return nullptr;
3170}
3171
3172} // end namespace llvm
3173