1//===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the AArch64-specific support for the FastISel class. Some
10// of the target-specific code is generated by tablegen in the file
11// AArch64GenFastISel.inc, which is #included here.
12//
13//===----------------------------------------------------------------------===//
14
15#include "AArch64.h"
16#include "AArch64CallingConvention.h"
17#include "AArch64MachineFunctionInfo.h"
18#include "AArch64RegisterInfo.h"
19#include "AArch64SMEAttributes.h"
20#include "AArch64Subtarget.h"
21#include "MCTargetDesc/AArch64AddressingModes.h"
22#include "Utils/AArch64BaseInfo.h"
23#include "llvm/ADT/APFloat.h"
24#include "llvm/ADT/APInt.h"
25#include "llvm/ADT/DenseMap.h"
26#include "llvm/ADT/SmallVector.h"
27#include "llvm/Analysis/BranchProbabilityInfo.h"
28#include "llvm/CodeGen/CallingConvLower.h"
29#include "llvm/CodeGen/FastISel.h"
30#include "llvm/CodeGen/FunctionLoweringInfo.h"
31#include "llvm/CodeGen/ISDOpcodes.h"
32#include "llvm/CodeGen/MachineBasicBlock.h"
33#include "llvm/CodeGen/MachineConstantPool.h"
34#include "llvm/CodeGen/MachineFrameInfo.h"
35#include "llvm/CodeGen/MachineInstr.h"
36#include "llvm/CodeGen/MachineInstrBuilder.h"
37#include "llvm/CodeGen/MachineMemOperand.h"
38#include "llvm/CodeGen/MachineRegisterInfo.h"
39#include "llvm/CodeGen/ValueTypes.h"
40#include "llvm/CodeGenTypes/MachineValueType.h"
41#include "llvm/IR/Argument.h"
42#include "llvm/IR/Attributes.h"
43#include "llvm/IR/BasicBlock.h"
44#include "llvm/IR/CallingConv.h"
45#include "llvm/IR/Constant.h"
46#include "llvm/IR/Constants.h"
47#include "llvm/IR/DataLayout.h"
48#include "llvm/IR/DerivedTypes.h"
49#include "llvm/IR/Function.h"
50#include "llvm/IR/GetElementPtrTypeIterator.h"
51#include "llvm/IR/GlobalValue.h"
52#include "llvm/IR/InstrTypes.h"
53#include "llvm/IR/Instruction.h"
54#include "llvm/IR/Instructions.h"
55#include "llvm/IR/IntrinsicInst.h"
56#include "llvm/IR/Intrinsics.h"
57#include "llvm/IR/IntrinsicsAArch64.h"
58#include "llvm/IR/Module.h"
59#include "llvm/IR/Operator.h"
60#include "llvm/IR/Type.h"
61#include "llvm/IR/User.h"
62#include "llvm/IR/Value.h"
63#include "llvm/MC/MCInstrDesc.h"
64#include "llvm/MC/MCSymbol.h"
65#include "llvm/Support/AtomicOrdering.h"
66#include "llvm/Support/Casting.h"
67#include "llvm/Support/CodeGen.h"
68#include "llvm/Support/Compiler.h"
69#include "llvm/Support/ErrorHandling.h"
70#include "llvm/Support/MathExtras.h"
71#include <algorithm>
72#include <cassert>
73#include <cstdint>
74#include <iterator>
75#include <utility>
76
77using namespace llvm;
78
79namespace {
80
81class AArch64FastISel final : public FastISel {
82 class Address {
83 public:
84 enum BaseKind { RegBase, FrameIndexBase };
85
86 private:
87 BaseKind Kind = RegBase;
88 AArch64_AM::ShiftExtendType ExtType = AArch64_AM::InvalidShiftExtend;
89 union {
90 unsigned Reg;
91 int FI;
92 } Base;
93 Register OffsetReg;
94 unsigned Shift = 0;
95 int64_t Offset = 0;
96 const GlobalValue *GV = nullptr;
97
98 public:
99 Address() { Base.Reg = 0; }
100
101 void setKind(BaseKind K) { Kind = K; }
102 BaseKind getKind() const { return Kind; }
103 void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
104 AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
105 bool isRegBase() const { return Kind == RegBase; }
106 bool isFIBase() const { return Kind == FrameIndexBase; }
107
108 void setReg(Register Reg) {
109 assert(isRegBase() && "Invalid base register access!");
110 Base.Reg = Reg.id();
111 }
112
113 Register getReg() const {
114 assert(isRegBase() && "Invalid base register access!");
115 return Base.Reg;
116 }
117
118 void setOffsetReg(Register Reg) { OffsetReg = Reg; }
119
120 Register getOffsetReg() const { return OffsetReg; }
121
122 void setFI(unsigned FI) {
123 assert(isFIBase() && "Invalid base frame index access!");
124 Base.FI = FI;
125 }
126
127 unsigned getFI() const {
128 assert(isFIBase() && "Invalid base frame index access!");
129 return Base.FI;
130 }
131
132 void setOffset(int64_t O) { Offset = O; }
133 int64_t getOffset() { return Offset; }
134 void setShift(unsigned S) { Shift = S; }
135 unsigned getShift() { return Shift; }
136
137 void setGlobalValue(const GlobalValue *G) { GV = G; }
138 const GlobalValue *getGlobalValue() { return GV; }
139 };
140
141 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
142 /// make the right decision when generating code for different targets.
143 const AArch64Subtarget *Subtarget;
144 LLVMContext *Context;
145
146 bool fastLowerArguments() override;
147 bool fastLowerCall(CallLoweringInfo &CLI) override;
148 bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
149
150private:
151 // Selection routines.
152 bool selectAddSub(const Instruction *I);
153 bool selectLogicalOp(const Instruction *I);
154 bool selectLoad(const Instruction *I);
155 bool selectStore(const Instruction *I);
156 bool selectBranch(const Instruction *I);
157 bool selectIndirectBr(const Instruction *I);
158 bool selectCmp(const Instruction *I);
159 bool selectSelect(const Instruction *I);
160 bool selectFPExt(const Instruction *I);
161 bool selectFPTrunc(const Instruction *I);
162 bool selectFPToInt(const Instruction *I, bool Signed);
163 bool selectIntToFP(const Instruction *I, bool Signed);
164 bool selectRem(const Instruction *I, unsigned ISDOpcode);
165 bool selectRet(const Instruction *I);
166 bool selectTrunc(const Instruction *I);
167 bool selectIntExt(const Instruction *I);
168 bool selectMul(const Instruction *I);
169 bool selectShift(const Instruction *I);
170 bool selectBitCast(const Instruction *I);
171 bool selectFRem(const Instruction *I);
172 bool selectSDiv(const Instruction *I);
173 bool selectGetElementPtr(const Instruction *I);
174 bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I);
175
176 // Utility helper routines.
177 bool isTypeLegal(Type *Ty, MVT &VT);
178 bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
179 bool isValueAvailable(const Value *V) const;
180 bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
181 bool computeCallAddress(const Value *V, Address &Addr);
182 bool simplifyAddress(Address &Addr, MVT VT);
183 void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
184 MachineMemOperand::Flags Flags,
185 unsigned ScaleFactor, MachineMemOperand *MMO);
186 bool isMemCpySmall(uint64_t Len, MaybeAlign Alignment);
187 bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
188 MaybeAlign Alignment);
189 bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
190 const Value *Cond);
191 bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT);
192 bool optimizeSelect(const SelectInst *SI);
193 Register getRegForGEPIndex(const Value *Idx);
194
195 // Emit helper routines.
196 Register emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
197 const Value *RHS, bool SetFlags = false,
198 bool WantResult = true, bool IsZExt = false);
199 Register emitAddSub_rr(bool UseAdd, MVT RetVT, Register LHSReg,
200 Register RHSReg, bool SetFlags = false,
201 bool WantResult = true);
202 Register emitAddSub_ri(bool UseAdd, MVT RetVT, Register LHSReg, uint64_t Imm,
203 bool SetFlags = false, bool WantResult = true);
204 Register emitAddSub_rs(bool UseAdd, MVT RetVT, Register LHSReg,
205 Register RHSReg, AArch64_AM::ShiftExtendType ShiftType,
206 uint64_t ShiftImm, bool SetFlags = false,
207 bool WantResult = true);
208 Register emitAddSub_rx(bool UseAdd, MVT RetVT, Register LHSReg,
209 Register RHSReg, AArch64_AM::ShiftExtendType ExtType,
210 uint64_t ShiftImm, bool SetFlags = false,
211 bool WantResult = true);
212
213 // Emit functions.
214 bool emitCompareAndBranch(const BranchInst *BI);
215 bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
216 bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
217 bool emitICmp_ri(MVT RetVT, Register LHSReg, uint64_t Imm);
218 bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
219 Register emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true,
220 MachineMemOperand *MMO = nullptr);
221 bool emitStore(MVT VT, Register SrcReg, Address Addr,
222 MachineMemOperand *MMO = nullptr);
223 bool emitStoreRelease(MVT VT, Register SrcReg, Register AddrReg,
224 MachineMemOperand *MMO = nullptr);
225 Register emitIntExt(MVT SrcVT, Register SrcReg, MVT DestVT, bool isZExt);
226 Register emiti1Ext(Register SrcReg, MVT DestVT, bool isZExt);
227 Register emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
228 bool SetFlags = false, bool WantResult = true,
229 bool IsZExt = false);
230 Register emitAdd_ri_(MVT VT, Register Op0, int64_t Imm);
231 Register emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
232 bool SetFlags = false, bool WantResult = true,
233 bool IsZExt = false);
234 Register emitSubs_rr(MVT RetVT, Register LHSReg, Register RHSReg,
235 bool WantResult = true);
236 Register emitSubs_rs(MVT RetVT, Register LHSReg, Register RHSReg,
237 AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
238 bool WantResult = true);
239 Register emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
240 const Value *RHS);
241 Register emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, Register LHSReg,
242 uint64_t Imm);
243 Register emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, Register LHSReg,
244 Register RHSReg, uint64_t ShiftImm);
245 Register emitAnd_ri(MVT RetVT, Register LHSReg, uint64_t Imm);
246 Register emitMul_rr(MVT RetVT, Register Op0, Register Op1);
247 Register emitSMULL_rr(MVT RetVT, Register Op0, Register Op1);
248 Register emitUMULL_rr(MVT RetVT, Register Op0, Register Op1);
249 Register emitLSL_rr(MVT RetVT, Register Op0Reg, Register Op1Reg);
250 Register emitLSL_ri(MVT RetVT, MVT SrcVT, Register Op0Reg, uint64_t Imm,
251 bool IsZExt = true);
252 Register emitLSR_rr(MVT RetVT, Register Op0Reg, Register Op1Reg);
253 Register emitLSR_ri(MVT RetVT, MVT SrcVT, Register Op0Reg, uint64_t Imm,
254 bool IsZExt = true);
255 Register emitASR_rr(MVT RetVT, Register Op0Reg, Register Op1Reg);
256 Register emitASR_ri(MVT RetVT, MVT SrcVT, Register Op0Reg, uint64_t Imm,
257 bool IsZExt = false);
258
259 Register materializeInt(const ConstantInt *CI, MVT VT);
260 Register materializeFP(const ConstantFP *CFP, MVT VT);
261 Register materializeGV(const GlobalValue *GV);
262
263 // Call handling routines.
264private:
265 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
266 bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
267 SmallVectorImpl<Type *> &OrigTys, unsigned &NumBytes);
268 bool finishCall(CallLoweringInfo &CLI, unsigned NumBytes);
269
270public:
271 // Backend specific FastISel code.
272 Register fastMaterializeAlloca(const AllocaInst *AI) override;
273 Register fastMaterializeConstant(const Constant *C) override;
274 Register fastMaterializeFloatZero(const ConstantFP *CF) override;
275
276 explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
277 const TargetLibraryInfo *LibInfo,
278 const LibcallLoweringInfo *libcallLowering)
279 : FastISel(FuncInfo, LibInfo, libcallLowering,
280 /*SkipTargetIndependentISel=*/true) {
281 Subtarget = &FuncInfo.MF->getSubtarget<AArch64Subtarget>();
282 Context = &FuncInfo.Fn->getContext();
283 }
284
285 bool fastSelectInstruction(const Instruction *I) override;
286
287#include "AArch64GenFastISel.inc"
288};
289
290} // end anonymous namespace
291
292/// Check if the sign-/zero-extend will be a noop.
293static bool isIntExtFree(const Instruction *I) {
294 assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
295 "Unexpected integer extend instruction.");
296 assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&
297 "Unexpected value type.");
298 bool IsZExt = isa<ZExtInst>(Val: I);
299
300 if (const auto *LI = dyn_cast<LoadInst>(Val: I->getOperand(i: 0)))
301 if (LI->hasOneUse())
302 return true;
303
304 if (const auto *Arg = dyn_cast<Argument>(Val: I->getOperand(i: 0)))
305 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr()))
306 return true;
307
308 return false;
309}
310
311/// Determine the implicit scale factor that is applied by a memory
312/// operation for a given value type.
313static unsigned getImplicitScaleFactor(MVT VT) {
314 switch (VT.SimpleTy) {
315 default:
316 return 0; // invalid
317 case MVT::i1: // fall-through
318 case MVT::i8:
319 return 1;
320 case MVT::i16:
321 return 2;
322 case MVT::i32: // fall-through
323 case MVT::f32:
324 return 4;
325 case MVT::i64: // fall-through
326 case MVT::f64:
327 return 8;
328 }
329}
330
331CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
332 if (CC == CallingConv::GHC)
333 return CC_AArch64_GHC;
334 if (CC == CallingConv::CFGuard_Check)
335 return CC_AArch64_Win64_CFGuard_Check;
336 if (Subtarget->isTargetDarwin())
337 return CC_AArch64_DarwinPCS;
338 if (Subtarget->isTargetWindows())
339 return CC_AArch64_Win64PCS;
340 return CC_AArch64_AAPCS;
341}
342
343Register AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
344 assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 &&
345 "Alloca should always return a pointer.");
346
347 // Don't handle dynamic allocas.
348 auto SI = FuncInfo.StaticAllocaMap.find(Val: AI);
349 if (SI == FuncInfo.StaticAllocaMap.end())
350 return Register();
351
352 if (SI != FuncInfo.StaticAllocaMap.end()) {
353 Register ResultReg = createResultReg(RC: &AArch64::GPR64spRegClass);
354 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AArch64::ADDXri),
355 DestReg: ResultReg)
356 .addFrameIndex(Idx: SI->second)
357 .addImm(Val: 0)
358 .addImm(Val: 0);
359 return ResultReg;
360 }
361
362 return Register();
363}
364
365Register AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
366 if (VT > MVT::i64)
367 return Register();
368
369 if (!CI->isZero())
370 return fastEmit_i(VT, RetVT: VT, Opcode: ISD::Constant, imm0: CI->getZExtValue());
371
372 // Create a copy from the zero register to materialize a "0" value.
373 const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
374 : &AArch64::GPR32RegClass;
375 unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
376 Register ResultReg = createResultReg(RC);
377 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: TargetOpcode::COPY),
378 DestReg: ResultReg).addReg(RegNo: ZeroReg, Flags: getKillRegState(B: true));
379 return ResultReg;
380}
381
382Register AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
383 // Positive zero (+0.0) has to be materialized with a fmov from the zero
384 // register, because the immediate version of fmov cannot encode zero.
385 if (CFP->isNullValue())
386 return fastMaterializeFloatZero(CF: CFP);
387
388 if (VT != MVT::f32 && VT != MVT::f64)
389 return Register();
390
391 const APFloat Val = CFP->getValueAPF();
392 bool Is64Bit = (VT == MVT::f64);
393 // This checks to see if we can use FMOV instructions to materialize
394 // a constant, otherwise we have to materialize via the constant pool.
395 int Imm =
396 Is64Bit ? AArch64_AM::getFP64Imm(FPImm: Val) : AArch64_AM::getFP32Imm(FPImm: Val);
397 if (Imm != -1) {
398 unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
399 return fastEmitInst_i(MachineInstOpcode: Opc, RC: TLI.getRegClassFor(VT), Imm);
400 }
401
402 // For the large code model materialize the FP constant in code.
403 if (TM.getCodeModel() == CodeModel::Large) {
404 unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm;
405 const TargetRegisterClass *RC = Is64Bit ?
406 &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
407
408 Register TmpReg = createResultReg(RC);
409 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc1), DestReg: TmpReg)
410 .addImm(Val: CFP->getValueAPF().bitcastToAPInt().getZExtValue());
411
412 Register ResultReg = createResultReg(RC: TLI.getRegClassFor(VT));
413 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
414 MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: ResultReg)
415 .addReg(RegNo: TmpReg, Flags: getKillRegState(B: true));
416
417 return ResultReg;
418 }
419
420 // Materialize via constant pool. MachineConstantPool wants an explicit
421 // alignment.
422 Align Alignment = DL.getPrefTypeAlign(Ty: CFP->getType());
423
424 unsigned CPI = MCP.getConstantPoolIndex(C: cast<Constant>(Val: CFP), Alignment);
425 Register ADRPReg = createResultReg(RC: &AArch64::GPR64commonRegClass);
426 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AArch64::ADRP),
427 DestReg: ADRPReg).addConstantPoolIndex(Idx: CPI, Offset: 0, TargetFlags: AArch64II::MO_PAGE);
428
429 unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
430 Register ResultReg = createResultReg(RC: TLI.getRegClassFor(VT));
431 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg: ResultReg)
432 .addReg(RegNo: ADRPReg)
433 .addConstantPoolIndex(Idx: CPI, Offset: 0, TargetFlags: AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
434 return ResultReg;
435}
436
437Register AArch64FastISel::materializeGV(const GlobalValue *GV) {
438 // We can't handle thread-local variables quickly yet.
439 if (GV->isThreadLocal())
440 return Register();
441
442 // MachO still uses GOT for large code-model accesses, but ELF requires
443 // movz/movk sequences, which FastISel doesn't handle yet.
444 if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO())
445 return Register();
446
447 if (FuncInfo.MF->getInfo<AArch64FunctionInfo>()->hasELFSignedGOT())
448 return Register();
449
450 unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
451
452 EVT DestEVT = TLI.getValueType(DL, Ty: GV->getType(), AllowUnknown: true);
453 if (!DestEVT.isSimple())
454 return Register();
455
456 Register ADRPReg = createResultReg(RC: &AArch64::GPR64commonRegClass);
457 Register ResultReg;
458
459 if (OpFlags & AArch64II::MO_GOT) {
460 // ADRP + LDRX
461 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AArch64::ADRP),
462 DestReg: ADRPReg)
463 .addGlobalAddress(GV, Offset: 0, TargetFlags: AArch64II::MO_PAGE | OpFlags);
464
465 unsigned LdrOpc;
466 if (Subtarget->isTargetILP32()) {
467 ResultReg = createResultReg(RC: &AArch64::GPR32RegClass);
468 LdrOpc = AArch64::LDRWui;
469 } else {
470 ResultReg = createResultReg(RC: &AArch64::GPR64RegClass);
471 LdrOpc = AArch64::LDRXui;
472 }
473 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: LdrOpc),
474 DestReg: ResultReg)
475 .addReg(RegNo: ADRPReg)
476 .addGlobalAddress(GV, Offset: 0, TargetFlags: AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
477 AArch64II::MO_NC | OpFlags);
478 if (!Subtarget->isTargetILP32())
479 return ResultReg;
480
481 // LDRWui produces a 32-bit register, but pointers in-register are 64-bits
482 // so we must extend the result on ILP32.
483 Register Result64 = createResultReg(RC: &AArch64::GPR64RegClass);
484 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
485 MCID: TII.get(Opcode: TargetOpcode::SUBREG_TO_REG))
486 .addDef(RegNo: Result64)
487 .addImm(Val: 0)
488 .addReg(RegNo: ResultReg, Flags: RegState::Kill)
489 .addImm(Val: AArch64::sub_32);
490 return Result64;
491 } else {
492 // ADRP + ADDX
493 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AArch64::ADRP),
494 DestReg: ADRPReg)
495 .addGlobalAddress(GV, Offset: 0, TargetFlags: AArch64II::MO_PAGE | OpFlags);
496
497 if (OpFlags & AArch64II::MO_TAGGED) {
498 // MO_TAGGED on the page indicates a tagged address. Set the tag now.
499 // We do so by creating a MOVK that sets bits 48-63 of the register to
500 // (global address + 0x100000000 - PC) >> 48. This assumes that we're in
501 // the small code model so we can assume a binary size of <= 4GB, which
502 // makes the untagged PC relative offset positive. The binary must also be
503 // loaded into address range [0, 2^48). Both of these properties need to
504 // be ensured at runtime when using tagged addresses.
505 //
506 // TODO: There is duplicate logic in AArch64ExpandPseudoInsts.cpp that
507 // also uses BuildMI for making an ADRP (+ MOVK) + ADD, but the operands
508 // are not exactly 1:1 with FastISel so we cannot easily abstract this
509 // out. At some point, it would be nice to find a way to not have this
510 // duplicate code.
511 Register DstReg = createResultReg(RC: &AArch64::GPR64commonRegClass);
512 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AArch64::MOVKXi),
513 DestReg: DstReg)
514 .addReg(RegNo: ADRPReg)
515 .addGlobalAddress(GV, /*Offset=*/0x100000000,
516 TargetFlags: AArch64II::MO_PREL | AArch64II::MO_G3)
517 .addImm(Val: 48);
518 ADRPReg = DstReg;
519 }
520
521 ResultReg = createResultReg(RC: &AArch64::GPR64spRegClass);
522 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AArch64::ADDXri),
523 DestReg: ResultReg)
524 .addReg(RegNo: ADRPReg)
525 .addGlobalAddress(GV, Offset: 0,
526 TargetFlags: AArch64II::MO_PAGEOFF | AArch64II::MO_NC | OpFlags)
527 .addImm(Val: 0);
528 }
529 return ResultReg;
530}
531
532Register AArch64FastISel::fastMaterializeConstant(const Constant *C) {
533 EVT CEVT = TLI.getValueType(DL, Ty: C->getType(), AllowUnknown: true);
534
535 // Only handle simple types.
536 if (!CEVT.isSimple())
537 return Register();
538 MVT VT = CEVT.getSimpleVT();
539 // arm64_32 has 32-bit pointers held in 64-bit registers. Because of that,
540 // 'null' pointers need to have a somewhat special treatment.
541 if (isa<ConstantPointerNull>(Val: C)) {
542 assert(VT == MVT::i64 && "Expected 64-bit pointers");
543 return materializeInt(CI: ConstantInt::get(Ty: Type::getInt64Ty(C&: *Context), V: 0), VT);
544 }
545
546 if (const auto *CI = dyn_cast<ConstantInt>(Val: C))
547 return materializeInt(CI, VT);
548 else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(Val: C))
549 return materializeFP(CFP, VT);
550 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(Val: C))
551 return materializeGV(GV);
552
553 return Register();
554}
555
556Register AArch64FastISel::fastMaterializeFloatZero(const ConstantFP *CFP) {
557 assert(CFP->isNullValue() &&
558 "Floating-point constant is not a positive zero.");
559 MVT VT;
560 if (!isTypeLegal(Ty: CFP->getType(), VT))
561 return Register();
562
563 if (VT != MVT::f32 && VT != MVT::f64)
564 return Register();
565
566 bool Is64Bit = (VT == MVT::f64);
567 unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
568 unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
569 return fastEmitInst_r(MachineInstOpcode: Opc, RC: TLI.getRegClassFor(VT), Op0: ZReg);
570}
571
572/// Check if the multiply is by a power-of-2 constant.
573static bool isMulPowOf2(const Value *I) {
574 if (const auto *MI = dyn_cast<MulOperator>(Val: I)) {
575 if (const auto *C = dyn_cast<ConstantInt>(Val: MI->getOperand(i_nocapture: 0)))
576 if (C->getValue().isPowerOf2())
577 return true;
578 if (const auto *C = dyn_cast<ConstantInt>(Val: MI->getOperand(i_nocapture: 1)))
579 if (C->getValue().isPowerOf2())
580 return true;
581 }
582 return false;
583}
584
585// Computes the address to get to an object.
586bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
587{
588 const User *U = nullptr;
589 unsigned Opcode = Instruction::UserOp1;
590 if (const Instruction *I = dyn_cast<Instruction>(Val: Obj)) {
591 // Don't walk into other basic blocks unless the object is an alloca from
592 // another block, otherwise it may not have a virtual register assigned.
593 if (FuncInfo.StaticAllocaMap.count(Val: static_cast<const AllocaInst *>(Obj)) ||
594 FuncInfo.getMBB(BB: I->getParent()) == FuncInfo.MBB) {
595 Opcode = I->getOpcode();
596 U = I;
597 }
598 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Val: Obj)) {
599 Opcode = C->getOpcode();
600 U = C;
601 }
602
603 if (auto *Ty = dyn_cast<PointerType>(Val: Obj->getType()))
604 if (Ty->getAddressSpace() > 255)
605 // Fast instruction selection doesn't support the special
606 // address spaces.
607 return false;
608
609 switch (Opcode) {
610 default:
611 break;
612 case Instruction::BitCast:
613 // Look through bitcasts.
614 return computeAddress(Obj: U->getOperand(i: 0), Addr, Ty);
615
616 case Instruction::IntToPtr:
617 // Look past no-op inttoptrs.
618 if (TLI.getValueType(DL, Ty: U->getOperand(i: 0)->getType()) ==
619 TLI.getPointerTy(DL))
620 return computeAddress(Obj: U->getOperand(i: 0), Addr, Ty);
621 break;
622
623 case Instruction::PtrToInt:
624 // Look past no-op ptrtoints.
625 if (TLI.getValueType(DL, Ty: U->getType()) == TLI.getPointerTy(DL))
626 return computeAddress(Obj: U->getOperand(i: 0), Addr, Ty);
627 break;
628
629 case Instruction::GetElementPtr: {
630 Address SavedAddr = Addr;
631 uint64_t TmpOffset = Addr.getOffset();
632
633 // Iterate through the GEP folding the constants into offsets where
634 // we can.
635 for (gep_type_iterator GTI = gep_type_begin(GEP: U), E = gep_type_end(GEP: U);
636 GTI != E; ++GTI) {
637 const Value *Op = GTI.getOperand();
638 if (StructType *STy = GTI.getStructTypeOrNull()) {
639 const StructLayout *SL = DL.getStructLayout(Ty: STy);
640 unsigned Idx = cast<ConstantInt>(Val: Op)->getZExtValue();
641 TmpOffset += SL->getElementOffset(Idx);
642 } else {
643 uint64_t S = GTI.getSequentialElementStride(DL);
644 while (true) {
645 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val: Op)) {
646 // Constant-offset addressing.
647 TmpOffset += CI->getSExtValue() * S;
648 break;
649 }
650 if (canFoldAddIntoGEP(GEP: U, Add: Op)) {
651 // A compatible add with a constant operand. Fold the constant.
652 ConstantInt *CI =
653 cast<ConstantInt>(Val: cast<AddOperator>(Val: Op)->getOperand(i_nocapture: 1));
654 TmpOffset += CI->getSExtValue() * S;
655 // Iterate on the other operand.
656 Op = cast<AddOperator>(Val: Op)->getOperand(i_nocapture: 0);
657 continue;
658 }
659 // Unsupported
660 goto unsupported_gep;
661 }
662 }
663 }
664
665 // Try to grab the base operand now.
666 Addr.setOffset(TmpOffset);
667 if (computeAddress(Obj: U->getOperand(i: 0), Addr, Ty))
668 return true;
669
670 // We failed, restore everything and try the other options.
671 Addr = SavedAddr;
672
673 unsupported_gep:
674 break;
675 }
676 case Instruction::Alloca: {
677 const AllocaInst *AI = cast<AllocaInst>(Val: Obj);
678 DenseMap<const AllocaInst *, int>::iterator SI =
679 FuncInfo.StaticAllocaMap.find(Val: AI);
680 if (SI != FuncInfo.StaticAllocaMap.end()) {
681 Addr.setKind(Address::FrameIndexBase);
682 Addr.setFI(SI->second);
683 return true;
684 }
685 break;
686 }
687 case Instruction::Add: {
688 // Adds of constants are common and easy enough.
689 const Value *LHS = U->getOperand(i: 0);
690 const Value *RHS = U->getOperand(i: 1);
691
692 if (isa<ConstantInt>(Val: LHS))
693 std::swap(a&: LHS, b&: RHS);
694
695 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val: RHS)) {
696 Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
697 return computeAddress(Obj: LHS, Addr, Ty);
698 }
699
700 Address Backup = Addr;
701 if (computeAddress(Obj: LHS, Addr, Ty) && computeAddress(Obj: RHS, Addr, Ty))
702 return true;
703 Addr = Backup;
704
705 break;
706 }
707 case Instruction::Sub: {
708 // Subs of constants are common and easy enough.
709 const Value *LHS = U->getOperand(i: 0);
710 const Value *RHS = U->getOperand(i: 1);
711
712 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val: RHS)) {
713 Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
714 return computeAddress(Obj: LHS, Addr, Ty);
715 }
716 break;
717 }
718 case Instruction::Shl: {
719 if (Addr.getOffsetReg())
720 break;
721
722 const auto *CI = dyn_cast<ConstantInt>(Val: U->getOperand(i: 1));
723 if (!CI)
724 break;
725
726 unsigned Val = CI->getZExtValue();
727 if (Val < 1 || Val > 3)
728 break;
729
730 uint64_t NumBytes = 0;
731 if (Ty && Ty->isSized()) {
732 uint64_t NumBits = DL.getTypeSizeInBits(Ty);
733 NumBytes = NumBits / 8;
734 if (!isPowerOf2_64(Value: NumBits))
735 NumBytes = 0;
736 }
737
738 if (NumBytes != (1ULL << Val))
739 break;
740
741 Addr.setShift(Val);
742 Addr.setExtendType(AArch64_AM::LSL);
743
744 const Value *Src = U->getOperand(i: 0);
745 if (const auto *I = dyn_cast<Instruction>(Val: Src)) {
746 if (FuncInfo.getMBB(BB: I->getParent()) == FuncInfo.MBB) {
747 // Fold the zext or sext when it won't become a noop.
748 if (const auto *ZE = dyn_cast<ZExtInst>(Val: I)) {
749 if (!isIntExtFree(I: ZE) &&
750 ZE->getOperand(i_nocapture: 0)->getType()->isIntegerTy(Bitwidth: 32)) {
751 Addr.setExtendType(AArch64_AM::UXTW);
752 Src = ZE->getOperand(i_nocapture: 0);
753 }
754 } else if (const auto *SE = dyn_cast<SExtInst>(Val: I)) {
755 if (!isIntExtFree(I: SE) &&
756 SE->getOperand(i_nocapture: 0)->getType()->isIntegerTy(Bitwidth: 32)) {
757 Addr.setExtendType(AArch64_AM::SXTW);
758 Src = SE->getOperand(i_nocapture: 0);
759 }
760 }
761 }
762 }
763
764 if (const auto *AI = dyn_cast<BinaryOperator>(Val: Src))
765 if (AI->getOpcode() == Instruction::And) {
766 const Value *LHS = AI->getOperand(i_nocapture: 0);
767 const Value *RHS = AI->getOperand(i_nocapture: 1);
768
769 if (const auto *C = dyn_cast<ConstantInt>(Val: LHS))
770 if (C->getValue() == 0xffffffff)
771 std::swap(a&: LHS, b&: RHS);
772
773 if (const auto *C = dyn_cast<ConstantInt>(Val: RHS))
774 if (C->getValue() == 0xffffffff) {
775 Addr.setExtendType(AArch64_AM::UXTW);
776 Register Reg = getRegForValue(V: LHS);
777 if (!Reg)
778 return false;
779 Reg = fastEmitInst_extractsubreg(RetVT: MVT::i32, Op0: Reg, Idx: AArch64::sub_32);
780 Addr.setOffsetReg(Reg);
781 return true;
782 }
783 }
784
785 Register Reg = getRegForValue(V: Src);
786 if (!Reg)
787 return false;
788 Addr.setOffsetReg(Reg);
789 return true;
790 }
791 case Instruction::Mul: {
792 if (Addr.getOffsetReg())
793 break;
794
795 if (!isMulPowOf2(I: U))
796 break;
797
798 const Value *LHS = U->getOperand(i: 0);
799 const Value *RHS = U->getOperand(i: 1);
800
801 // Canonicalize power-of-2 value to the RHS.
802 if (const auto *C = dyn_cast<ConstantInt>(Val: LHS))
803 if (C->getValue().isPowerOf2())
804 std::swap(a&: LHS, b&: RHS);
805
806 assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
807 const auto *C = cast<ConstantInt>(Val: RHS);
808 unsigned Val = C->getValue().logBase2();
809 if (Val < 1 || Val > 3)
810 break;
811
812 uint64_t NumBytes = 0;
813 if (Ty && Ty->isSized()) {
814 uint64_t NumBits = DL.getTypeSizeInBits(Ty);
815 NumBytes = NumBits / 8;
816 if (!isPowerOf2_64(Value: NumBits))
817 NumBytes = 0;
818 }
819
820 if (NumBytes != (1ULL << Val))
821 break;
822
823 Addr.setShift(Val);
824 Addr.setExtendType(AArch64_AM::LSL);
825
826 const Value *Src = LHS;
827 if (const auto *I = dyn_cast<Instruction>(Val: Src)) {
828 if (FuncInfo.getMBB(BB: I->getParent()) == FuncInfo.MBB) {
829 // Fold the zext or sext when it won't become a noop.
830 if (const auto *ZE = dyn_cast<ZExtInst>(Val: I)) {
831 if (!isIntExtFree(I: ZE) &&
832 ZE->getOperand(i_nocapture: 0)->getType()->isIntegerTy(Bitwidth: 32)) {
833 Addr.setExtendType(AArch64_AM::UXTW);
834 Src = ZE->getOperand(i_nocapture: 0);
835 }
836 } else if (const auto *SE = dyn_cast<SExtInst>(Val: I)) {
837 if (!isIntExtFree(I: SE) &&
838 SE->getOperand(i_nocapture: 0)->getType()->isIntegerTy(Bitwidth: 32)) {
839 Addr.setExtendType(AArch64_AM::SXTW);
840 Src = SE->getOperand(i_nocapture: 0);
841 }
842 }
843 }
844 }
845
846 Register Reg = getRegForValue(V: Src);
847 if (!Reg)
848 return false;
849 Addr.setOffsetReg(Reg);
850 return true;
851 }
852 case Instruction::And: {
853 if (Addr.getOffsetReg())
854 break;
855
856 if (!Ty || DL.getTypeSizeInBits(Ty) != 8)
857 break;
858
859 const Value *LHS = U->getOperand(i: 0);
860 const Value *RHS = U->getOperand(i: 1);
861
862 if (const auto *C = dyn_cast<ConstantInt>(Val: LHS))
863 if (C->getValue() == 0xffffffff)
864 std::swap(a&: LHS, b&: RHS);
865
866 if (const auto *C = dyn_cast<ConstantInt>(Val: RHS))
867 if (C->getValue() == 0xffffffff) {
868 Addr.setShift(0);
869 Addr.setExtendType(AArch64_AM::LSL);
870 Addr.setExtendType(AArch64_AM::UXTW);
871
872 Register Reg = getRegForValue(V: LHS);
873 if (!Reg)
874 return false;
875 Reg = fastEmitInst_extractsubreg(RetVT: MVT::i32, Op0: Reg, Idx: AArch64::sub_32);
876 Addr.setOffsetReg(Reg);
877 return true;
878 }
879 break;
880 }
881 case Instruction::SExt:
882 case Instruction::ZExt: {
883 if (!Addr.getReg() || Addr.getOffsetReg())
884 break;
885
886 const Value *Src = nullptr;
887 // Fold the zext or sext when it won't become a noop.
888 if (const auto *ZE = dyn_cast<ZExtInst>(Val: U)) {
889 if (!isIntExtFree(I: ZE) && ZE->getOperand(i_nocapture: 0)->getType()->isIntegerTy(Bitwidth: 32)) {
890 Addr.setExtendType(AArch64_AM::UXTW);
891 Src = ZE->getOperand(i_nocapture: 0);
892 }
893 } else if (const auto *SE = dyn_cast<SExtInst>(Val: U)) {
894 if (!isIntExtFree(I: SE) && SE->getOperand(i_nocapture: 0)->getType()->isIntegerTy(Bitwidth: 32)) {
895 Addr.setExtendType(AArch64_AM::SXTW);
896 Src = SE->getOperand(i_nocapture: 0);
897 }
898 }
899
900 if (!Src)
901 break;
902
903 Addr.setShift(0);
904 Register Reg = getRegForValue(V: Src);
905 if (!Reg)
906 return false;
907 Addr.setOffsetReg(Reg);
908 return true;
909 }
910 } // end switch
911
912 if (Addr.isRegBase() && !Addr.getReg()) {
913 Register Reg = getRegForValue(V: Obj);
914 if (!Reg)
915 return false;
916 Addr.setReg(Reg);
917 return true;
918 }
919
920 if (!Addr.getOffsetReg()) {
921 Register Reg = getRegForValue(V: Obj);
922 if (!Reg)
923 return false;
924 Addr.setOffsetReg(Reg);
925 return true;
926 }
927
928 return false;
929}
930
931bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
932 const User *U = nullptr;
933 unsigned Opcode = Instruction::UserOp1;
934 bool InMBB = true;
935
936 if (const auto *I = dyn_cast<Instruction>(Val: V)) {
937 Opcode = I->getOpcode();
938 U = I;
939 InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
940 } else if (const auto *C = dyn_cast<ConstantExpr>(Val: V)) {
941 Opcode = C->getOpcode();
942 U = C;
943 }
944
945 switch (Opcode) {
946 default: break;
947 case Instruction::BitCast:
948 // Look past bitcasts if its operand is in the same BB.
949 if (InMBB)
950 return computeCallAddress(V: U->getOperand(i: 0), Addr);
951 break;
952 case Instruction::IntToPtr:
953 // Look past no-op inttoptrs if its operand is in the same BB.
954 if (InMBB &&
955 TLI.getValueType(DL, Ty: U->getOperand(i: 0)->getType()) ==
956 TLI.getPointerTy(DL))
957 return computeCallAddress(V: U->getOperand(i: 0), Addr);
958 break;
959 case Instruction::PtrToInt:
960 // Look past no-op ptrtoints if its operand is in the same BB.
961 if (InMBB && TLI.getValueType(DL, Ty: U->getType()) == TLI.getPointerTy(DL))
962 return computeCallAddress(V: U->getOperand(i: 0), Addr);
963 break;
964 }
965
966 if (const GlobalValue *GV = dyn_cast<GlobalValue>(Val: V)) {
967 Addr.setGlobalValue(GV);
968 return true;
969 }
970
971 // If all else fails, try to materialize the value in a register.
972 if (!Addr.getGlobalValue()) {
973 Addr.setReg(getRegForValue(V));
974 return Addr.getReg().isValid();
975 }
976
977 return false;
978}
979
980bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
981 EVT evt = TLI.getValueType(DL, Ty, AllowUnknown: true);
982
983 if (Subtarget->isTargetILP32() && Ty->isPointerTy())
984 return false;
985
986 // Only handle simple types.
987 if (evt == MVT::Other || !evt.isSimple())
988 return false;
989 VT = evt.getSimpleVT();
990
991 // This is a legal type, but it's not something we handle in fast-isel.
992 if (VT == MVT::f128)
993 return false;
994
995 // Handle all other legal types, i.e. a register that will directly hold this
996 // value.
997 return TLI.isTypeLegal(VT);
998}
999
1000/// Determine if the value type is supported by FastISel.
1001///
1002/// FastISel for AArch64 can handle more value types than are legal. This adds
1003/// simple value type such as i1, i8, and i16.
1004bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
1005 if (Ty->isVectorTy() && !IsVectorAllowed)
1006 return false;
1007
1008 if (isTypeLegal(Ty, VT))
1009 return true;
1010
1011 // If this is a type than can be sign or zero-extended to a basic operation
1012 // go ahead and accept it now.
1013 if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
1014 return true;
1015
1016 return false;
1017}
1018
1019bool AArch64FastISel::isValueAvailable(const Value *V) const {
1020 if (!isa<Instruction>(Val: V))
1021 return true;
1022
1023 const auto *I = cast<Instruction>(Val: V);
1024 return FuncInfo.getMBB(BB: I->getParent()) == FuncInfo.MBB;
1025}
1026
1027bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
1028 if (Subtarget->isTargetILP32())
1029 return false;
1030
1031 unsigned ScaleFactor = getImplicitScaleFactor(VT);
1032 if (!ScaleFactor)
1033 return false;
1034
1035 bool ImmediateOffsetNeedsLowering = false;
1036 bool RegisterOffsetNeedsLowering = false;
1037 int64_t Offset = Addr.getOffset();
1038 if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(x: Offset))
1039 ImmediateOffsetNeedsLowering = true;
1040 else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
1041 !isUInt<12>(x: Offset / ScaleFactor))
1042 ImmediateOffsetNeedsLowering = true;
1043
1044 // Cannot encode an offset register and an immediate offset in the same
1045 // instruction. Fold the immediate offset into the load/store instruction and
1046 // emit an additional add to take care of the offset register.
1047 if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg())
1048 RegisterOffsetNeedsLowering = true;
1049
1050 // Cannot encode zero register as base.
1051 if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
1052 RegisterOffsetNeedsLowering = true;
1053
1054 // If this is a stack pointer and the offset needs to be simplified then put
1055 // the alloca address into a register, set the base type back to register and
1056 // continue. This should almost never happen.
1057 if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase())
1058 {
1059 Register ResultReg = createResultReg(RC: &AArch64::GPR64spRegClass);
1060 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AArch64::ADDXri),
1061 DestReg: ResultReg)
1062 .addFrameIndex(Idx: Addr.getFI())
1063 .addImm(Val: 0)
1064 .addImm(Val: 0);
1065 Addr.setKind(Address::RegBase);
1066 Addr.setReg(ResultReg);
1067 }
1068
1069 if (RegisterOffsetNeedsLowering) {
1070 Register ResultReg;
1071 if (Addr.getReg()) {
1072 if (Addr.getExtendType() == AArch64_AM::SXTW ||
1073 Addr.getExtendType() == AArch64_AM::UXTW )
1074 ResultReg = emitAddSub_rx(/*UseAdd=*/true, RetVT: MVT::i64, LHSReg: Addr.getReg(),
1075 RHSReg: Addr.getOffsetReg(), ExtType: Addr.getExtendType(),
1076 ShiftImm: Addr.getShift());
1077 else
1078 ResultReg = emitAddSub_rs(/*UseAdd=*/true, RetVT: MVT::i64, LHSReg: Addr.getReg(),
1079 RHSReg: Addr.getOffsetReg(), ShiftType: AArch64_AM::LSL,
1080 ShiftImm: Addr.getShift());
1081 } else {
1082 if (Addr.getExtendType() == AArch64_AM::UXTW)
1083 ResultReg = emitLSL_ri(RetVT: MVT::i64, SrcVT: MVT::i32, Op0Reg: Addr.getOffsetReg(),
1084 Imm: Addr.getShift(), /*IsZExt=*/true);
1085 else if (Addr.getExtendType() == AArch64_AM::SXTW)
1086 ResultReg = emitLSL_ri(RetVT: MVT::i64, SrcVT: MVT::i32, Op0Reg: Addr.getOffsetReg(),
1087 Imm: Addr.getShift(), /*IsZExt=*/false);
1088 else
1089 ResultReg = emitLSL_ri(RetVT: MVT::i64, SrcVT: MVT::i64, Op0Reg: Addr.getOffsetReg(),
1090 Imm: Addr.getShift());
1091 }
1092 if (!ResultReg)
1093 return false;
1094
1095 Addr.setReg(ResultReg);
1096 Addr.setOffsetReg(0);
1097 Addr.setShift(0);
1098 Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
1099 }
1100
1101 // Since the offset is too large for the load/store instruction get the
1102 // reg+offset into a register.
1103 if (ImmediateOffsetNeedsLowering) {
1104 Register ResultReg;
1105 if (Addr.getReg())
1106 // Try to fold the immediate into the add instruction.
1107 ResultReg = emitAdd_ri_(VT: MVT::i64, Op0: Addr.getReg(), Imm: Offset);
1108 else
1109 ResultReg = fastEmit_i(VT: MVT::i64, RetVT: MVT::i64, Opcode: ISD::Constant, imm0: Offset);
1110
1111 if (!ResultReg)
1112 return false;
1113 Addr.setReg(ResultReg);
1114 Addr.setOffset(0);
1115 }
1116 return true;
1117}
1118
1119void AArch64FastISel::addLoadStoreOperands(Address &Addr,
1120 const MachineInstrBuilder &MIB,
1121 MachineMemOperand::Flags Flags,
1122 unsigned ScaleFactor,
1123 MachineMemOperand *MMO) {
1124 int64_t Offset = Addr.getOffset() / ScaleFactor;
1125 // Frame base works a bit differently. Handle it separately.
1126 if (Addr.isFIBase()) {
1127 int FI = Addr.getFI();
1128 // FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size
1129 // and alignment should be based on the VT.
1130 MMO = FuncInfo.MF->getMachineMemOperand(
1131 PtrInfo: MachinePointerInfo::getFixedStack(MF&: *FuncInfo.MF, FI, Offset), F: Flags,
1132 Size: MFI.getObjectSize(ObjectIdx: FI), BaseAlignment: MFI.getObjectAlign(ObjectIdx: FI));
1133 // Now add the rest of the operands.
1134 MIB.addFrameIndex(Idx: FI).addImm(Val: Offset);
1135 } else {
1136 assert(Addr.isRegBase() && "Unexpected address kind.");
1137 const MCInstrDesc &II = MIB->getDesc();
1138 unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
1139 Addr.setReg(
1140 constrainOperandRegClass(II, Op: Addr.getReg(), OpNum: II.getNumDefs()+Idx));
1141 Addr.setOffsetReg(
1142 constrainOperandRegClass(II, Op: Addr.getOffsetReg(), OpNum: II.getNumDefs()+Idx+1));
1143 if (Addr.getOffsetReg()) {
1144 assert(Addr.getOffset() == 0 && "Unexpected offset");
1145 bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
1146 Addr.getExtendType() == AArch64_AM::SXTX;
1147 MIB.addReg(RegNo: Addr.getReg());
1148 MIB.addReg(RegNo: Addr.getOffsetReg());
1149 MIB.addImm(Val: IsSigned);
1150 MIB.addImm(Val: Addr.getShift() != 0);
1151 } else
1152 MIB.addReg(RegNo: Addr.getReg()).addImm(Val: Offset);
1153 }
1154
1155 if (MMO)
1156 MIB.addMemOperand(MMO);
1157}
1158
1159Register AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
1160 const Value *RHS, bool SetFlags,
1161 bool WantResult, bool IsZExt) {
1162 AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend;
1163 bool NeedExtend = false;
1164 switch (RetVT.SimpleTy) {
1165 default:
1166 return Register();
1167 case MVT::i1:
1168 NeedExtend = true;
1169 break;
1170 case MVT::i8:
1171 NeedExtend = true;
1172 ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
1173 break;
1174 case MVT::i16:
1175 NeedExtend = true;
1176 ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
1177 break;
1178 case MVT::i32: // fall-through
1179 case MVT::i64:
1180 break;
1181 }
1182 MVT SrcVT = RetVT;
1183 RetVT.SimpleTy = std::max(a: RetVT.SimpleTy, b: MVT::i32);
1184
1185 // Canonicalize immediates to the RHS first.
1186 if (UseAdd && isa<Constant>(Val: LHS) && !isa<Constant>(Val: RHS))
1187 std::swap(a&: LHS, b&: RHS);
1188
1189 // Canonicalize mul by power of 2 to the RHS.
1190 if (UseAdd && LHS->hasOneUse() && isValueAvailable(V: LHS))
1191 if (isMulPowOf2(I: LHS))
1192 std::swap(a&: LHS, b&: RHS);
1193
1194 // Canonicalize shift immediate to the RHS.
1195 if (UseAdd && LHS->hasOneUse() && isValueAvailable(V: LHS))
1196 if (const auto *SI = dyn_cast<BinaryOperator>(Val: LHS))
1197 if (isa<ConstantInt>(Val: SI->getOperand(i_nocapture: 1)))
1198 if (SI->getOpcode() == Instruction::Shl ||
1199 SI->getOpcode() == Instruction::LShr ||
1200 SI->getOpcode() == Instruction::AShr )
1201 std::swap(a&: LHS, b&: RHS);
1202
1203 Register LHSReg = getRegForValue(V: LHS);
1204 if (!LHSReg)
1205 return Register();
1206
1207 if (NeedExtend)
1208 LHSReg = emitIntExt(SrcVT, SrcReg: LHSReg, DestVT: RetVT, isZExt: IsZExt);
1209
1210 Register ResultReg;
1211 if (const auto *C = dyn_cast<ConstantInt>(Val: RHS)) {
1212 uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
1213 if (C->isNegative())
1214 ResultReg = emitAddSub_ri(UseAdd: !UseAdd, RetVT, LHSReg, Imm: -Imm, SetFlags,
1215 WantResult);
1216 else
1217 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, Imm, SetFlags,
1218 WantResult);
1219 } else if (const auto *C = dyn_cast<Constant>(Val: RHS))
1220 if (C->isNullValue())
1221 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, Imm: 0, SetFlags, WantResult);
1222
1223 if (ResultReg)
1224 return ResultReg;
1225
1226 // Only extend the RHS within the instruction if there is a valid extend type.
1227 if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
1228 isValueAvailable(V: RHS)) {
1229 Register RHSReg = getRegForValue(V: RHS);
1230 if (!RHSReg)
1231 return Register();
1232 return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtType: ExtendType, ShiftImm: 0,
1233 SetFlags, WantResult);
1234 }
1235
1236 // Check if the mul can be folded into the instruction.
1237 if (RHS->hasOneUse() && isValueAvailable(V: RHS)) {
1238 if (isMulPowOf2(I: RHS)) {
1239 const Value *MulLHS = cast<MulOperator>(Val: RHS)->getOperand(i_nocapture: 0);
1240 const Value *MulRHS = cast<MulOperator>(Val: RHS)->getOperand(i_nocapture: 1);
1241
1242 if (const auto *C = dyn_cast<ConstantInt>(Val: MulLHS))
1243 if (C->getValue().isPowerOf2())
1244 std::swap(a&: MulLHS, b&: MulRHS);
1245
1246 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1247 uint64_t ShiftVal = cast<ConstantInt>(Val: MulRHS)->getValue().logBase2();
1248 Register RHSReg = getRegForValue(V: MulLHS);
1249 if (!RHSReg)
1250 return Register();
1251 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, ShiftType: AArch64_AM::LSL,
1252 ShiftImm: ShiftVal, SetFlags, WantResult);
1253 if (ResultReg)
1254 return ResultReg;
1255 }
1256 }
1257
1258 // Check if the shift can be folded into the instruction.
1259 if (RHS->hasOneUse() && isValueAvailable(V: RHS)) {
1260 if (const auto *SI = dyn_cast<BinaryOperator>(Val: RHS)) {
1261 if (const auto *C = dyn_cast<ConstantInt>(Val: SI->getOperand(i_nocapture: 1))) {
1262 AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend;
1263 switch (SI->getOpcode()) {
1264 default: break;
1265 case Instruction::Shl: ShiftType = AArch64_AM::LSL; break;
1266 case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
1267 case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
1268 }
1269 uint64_t ShiftVal = C->getZExtValue();
1270 if (ShiftType != AArch64_AM::InvalidShiftExtend) {
1271 Register RHSReg = getRegForValue(V: SI->getOperand(i_nocapture: 0));
1272 if (!RHSReg)
1273 return Register();
1274 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, ShiftType,
1275 ShiftImm: ShiftVal, SetFlags, WantResult);
1276 if (ResultReg)
1277 return ResultReg;
1278 }
1279 }
1280 }
1281 }
1282
1283 Register RHSReg = getRegForValue(V: RHS);
1284 if (!RHSReg)
1285 return Register();
1286
1287 if (NeedExtend)
1288 RHSReg = emitIntExt(SrcVT, SrcReg: RHSReg, DestVT: RetVT, isZExt: IsZExt);
1289
1290 return emitAddSub_rr(UseAdd, RetVT, LHSReg, RHSReg, SetFlags, WantResult);
1291}
1292
1293Register AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, Register LHSReg,
1294 Register RHSReg, bool SetFlags,
1295 bool WantResult) {
1296 assert(LHSReg && RHSReg && "Invalid register number.");
1297
1298 if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP ||
1299 RHSReg == AArch64::SP || RHSReg == AArch64::WSP)
1300 return Register();
1301
1302 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1303 return Register();
1304
1305 static const unsigned OpcTable[2][2][2] = {
1306 { { AArch64::SUBWrr, AArch64::SUBXrr },
1307 { AArch64::ADDWrr, AArch64::ADDXrr } },
1308 { { AArch64::SUBSWrr, AArch64::SUBSXrr },
1309 { AArch64::ADDSWrr, AArch64::ADDSXrr } }
1310 };
1311 bool Is64Bit = RetVT == MVT::i64;
1312 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1313 const TargetRegisterClass *RC =
1314 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1315 Register ResultReg;
1316 if (WantResult)
1317 ResultReg = createResultReg(RC);
1318 else
1319 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1320
1321 const MCInstrDesc &II = TII.get(Opcode: Opc);
1322 LHSReg = constrainOperandRegClass(II, Op: LHSReg, OpNum: II.getNumDefs());
1323 RHSReg = constrainOperandRegClass(II, Op: RHSReg, OpNum: II.getNumDefs() + 1);
1324 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II, DestReg: ResultReg)
1325 .addReg(RegNo: LHSReg)
1326 .addReg(RegNo: RHSReg);
1327 return ResultReg;
1328}
1329
1330Register AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, Register LHSReg,
1331 uint64_t Imm, bool SetFlags,
1332 bool WantResult) {
1333 assert(LHSReg && "Invalid register number.");
1334
1335 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1336 return Register();
1337
1338 unsigned ShiftImm;
1339 if (isUInt<12>(x: Imm))
1340 ShiftImm = 0;
1341 else if ((Imm & 0xfff000) == Imm) {
1342 ShiftImm = 12;
1343 Imm >>= 12;
1344 } else
1345 return Register();
1346
1347 static const unsigned OpcTable[2][2][2] = {
1348 { { AArch64::SUBWri, AArch64::SUBXri },
1349 { AArch64::ADDWri, AArch64::ADDXri } },
1350 { { AArch64::SUBSWri, AArch64::SUBSXri },
1351 { AArch64::ADDSWri, AArch64::ADDSXri } }
1352 };
1353 bool Is64Bit = RetVT == MVT::i64;
1354 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1355 const TargetRegisterClass *RC;
1356 if (SetFlags)
1357 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1358 else
1359 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1360 Register ResultReg;
1361 if (WantResult)
1362 ResultReg = createResultReg(RC);
1363 else
1364 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1365
1366 const MCInstrDesc &II = TII.get(Opcode: Opc);
1367 LHSReg = constrainOperandRegClass(II, Op: LHSReg, OpNum: II.getNumDefs());
1368 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II, DestReg: ResultReg)
1369 .addReg(RegNo: LHSReg)
1370 .addImm(Val: Imm)
1371 .addImm(Val: getShifterImm(ST: AArch64_AM::LSL, Imm: ShiftImm));
1372 return ResultReg;
1373}
1374
1375Register AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, Register LHSReg,
1376 Register RHSReg,
1377 AArch64_AM::ShiftExtendType ShiftType,
1378 uint64_t ShiftImm, bool SetFlags,
1379 bool WantResult) {
1380 assert(LHSReg && RHSReg && "Invalid register number.");
1381 assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP &&
1382 RHSReg != AArch64::SP && RHSReg != AArch64::WSP);
1383
1384 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1385 return Register();
1386
1387 // Don't deal with undefined shifts.
1388 if (ShiftImm >= RetVT.getSizeInBits())
1389 return Register();
1390
1391 static const unsigned OpcTable[2][2][2] = {
1392 { { AArch64::SUBWrs, AArch64::SUBXrs },
1393 { AArch64::ADDWrs, AArch64::ADDXrs } },
1394 { { AArch64::SUBSWrs, AArch64::SUBSXrs },
1395 { AArch64::ADDSWrs, AArch64::ADDSXrs } }
1396 };
1397 bool Is64Bit = RetVT == MVT::i64;
1398 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1399 const TargetRegisterClass *RC =
1400 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1401 Register ResultReg;
1402 if (WantResult)
1403 ResultReg = createResultReg(RC);
1404 else
1405 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1406
1407 const MCInstrDesc &II = TII.get(Opcode: Opc);
1408 LHSReg = constrainOperandRegClass(II, Op: LHSReg, OpNum: II.getNumDefs());
1409 RHSReg = constrainOperandRegClass(II, Op: RHSReg, OpNum: II.getNumDefs() + 1);
1410 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II, DestReg: ResultReg)
1411 .addReg(RegNo: LHSReg)
1412 .addReg(RegNo: RHSReg)
1413 .addImm(Val: getShifterImm(ST: ShiftType, Imm: ShiftImm));
1414 return ResultReg;
1415}
1416
1417Register AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, Register LHSReg,
1418 Register RHSReg,
1419 AArch64_AM::ShiftExtendType ExtType,
1420 uint64_t ShiftImm, bool SetFlags,
1421 bool WantResult) {
1422 assert(LHSReg && RHSReg && "Invalid register number.");
1423 assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR &&
1424 RHSReg != AArch64::XZR && RHSReg != AArch64::WZR);
1425
1426 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1427 return Register();
1428
1429 if (ShiftImm >= 4)
1430 return Register();
1431
1432 static const unsigned OpcTable[2][2][2] = {
1433 { { AArch64::SUBWrx, AArch64::SUBXrx },
1434 { AArch64::ADDWrx, AArch64::ADDXrx } },
1435 { { AArch64::SUBSWrx, AArch64::SUBSXrx },
1436 { AArch64::ADDSWrx, AArch64::ADDSXrx } }
1437 };
1438 bool Is64Bit = RetVT == MVT::i64;
1439 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1440 const TargetRegisterClass *RC = nullptr;
1441 if (SetFlags)
1442 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1443 else
1444 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1445 Register ResultReg;
1446 if (WantResult)
1447 ResultReg = createResultReg(RC);
1448 else
1449 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1450
1451 const MCInstrDesc &II = TII.get(Opcode: Opc);
1452 LHSReg = constrainOperandRegClass(II, Op: LHSReg, OpNum: II.getNumDefs());
1453 RHSReg = constrainOperandRegClass(II, Op: RHSReg, OpNum: II.getNumDefs() + 1);
1454 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II, DestReg: ResultReg)
1455 .addReg(RegNo: LHSReg)
1456 .addReg(RegNo: RHSReg)
1457 .addImm(Val: getArithExtendImm(ET: ExtType, Imm: ShiftImm));
1458 return ResultReg;
1459}
1460
1461bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
1462 Type *Ty = LHS->getType();
1463 EVT EVT = TLI.getValueType(DL, Ty, AllowUnknown: true);
1464 if (!EVT.isSimple())
1465 return false;
1466 MVT VT = EVT.getSimpleVT();
1467
1468 switch (VT.SimpleTy) {
1469 default:
1470 return false;
1471 case MVT::i1:
1472 case MVT::i8:
1473 case MVT::i16:
1474 case MVT::i32:
1475 case MVT::i64:
1476 return emitICmp(RetVT: VT, LHS, RHS, IsZExt);
1477 case MVT::f32:
1478 case MVT::f64:
1479 return emitFCmp(RetVT: VT, LHS, RHS);
1480 }
1481}
1482
1483bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
1484 bool IsZExt) {
1485 return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
1486 IsZExt)
1487 .isValid();
1488}
1489
1490bool AArch64FastISel::emitICmp_ri(MVT RetVT, Register LHSReg, uint64_t Imm) {
1491 return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, Imm,
1492 /*SetFlags=*/true, /*WantResult=*/false)
1493 .isValid();
1494}
1495
1496bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
1497 if (RetVT != MVT::f32 && RetVT != MVT::f64)
1498 return false;
1499
1500 // Check to see if the 2nd operand is a constant that we can encode directly
1501 // in the compare.
1502 bool UseImm = false;
1503 if (const auto *CFP = dyn_cast<ConstantFP>(Val: RHS))
1504 if (CFP->isZero() && !CFP->isNegative())
1505 UseImm = true;
1506
1507 Register LHSReg = getRegForValue(V: LHS);
1508 if (!LHSReg)
1509 return false;
1510
1511 if (UseImm) {
1512 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
1513 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc))
1514 .addReg(RegNo: LHSReg);
1515 return true;
1516 }
1517
1518 Register RHSReg = getRegForValue(V: RHS);
1519 if (!RHSReg)
1520 return false;
1521
1522 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
1523 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc))
1524 .addReg(RegNo: LHSReg)
1525 .addReg(RegNo: RHSReg);
1526 return true;
1527}
1528
1529Register AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
1530 bool SetFlags, bool WantResult, bool IsZExt) {
1531 return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
1532 IsZExt);
1533}
1534
1535/// This method is a wrapper to simplify add emission.
1536///
1537/// First try to emit an add with an immediate operand using emitAddSub_ri. If
1538/// that fails, then try to materialize the immediate into a register and use
1539/// emitAddSub_rr instead.
1540Register AArch64FastISel::emitAdd_ri_(MVT VT, Register Op0, int64_t Imm) {
1541 Register ResultReg;
1542 if (Imm < 0)
1543 ResultReg = emitAddSub_ri(UseAdd: false, RetVT: VT, LHSReg: Op0, Imm: -Imm);
1544 else
1545 ResultReg = emitAddSub_ri(UseAdd: true, RetVT: VT, LHSReg: Op0, Imm);
1546
1547 if (ResultReg)
1548 return ResultReg;
1549
1550 Register CReg = fastEmit_i(VT, RetVT: VT, Opcode: ISD::Constant, imm0: Imm);
1551 if (!CReg)
1552 return Register();
1553
1554 ResultReg = emitAddSub_rr(UseAdd: true, RetVT: VT, LHSReg: Op0, RHSReg: CReg);
1555 return ResultReg;
1556}
1557
1558Register AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
1559 bool SetFlags, bool WantResult, bool IsZExt) {
1560 return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
1561 IsZExt);
1562}
1563
1564Register AArch64FastISel::emitSubs_rr(MVT RetVT, Register LHSReg,
1565 Register RHSReg, bool WantResult) {
1566 return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, RHSReg,
1567 /*SetFlags=*/true, WantResult);
1568}
1569
1570Register AArch64FastISel::emitSubs_rs(MVT RetVT, Register LHSReg,
1571 Register RHSReg,
1572 AArch64_AM::ShiftExtendType ShiftType,
1573 uint64_t ShiftImm, bool WantResult) {
1574 return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, RHSReg, ShiftType,
1575 ShiftImm, /*SetFlags=*/true, WantResult);
1576}
1577
1578Register AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
1579 const Value *LHS, const Value *RHS) {
1580 // Canonicalize immediates to the RHS first.
1581 if (isa<ConstantInt>(Val: LHS) && !isa<ConstantInt>(Val: RHS))
1582 std::swap(a&: LHS, b&: RHS);
1583
1584 // Canonicalize mul by power-of-2 to the RHS.
1585 if (LHS->hasOneUse() && isValueAvailable(V: LHS))
1586 if (isMulPowOf2(I: LHS))
1587 std::swap(a&: LHS, b&: RHS);
1588
1589 // Canonicalize shift immediate to the RHS.
1590 if (LHS->hasOneUse() && isValueAvailable(V: LHS))
1591 if (const auto *SI = dyn_cast<ShlOperator>(Val: LHS))
1592 if (isa<ConstantInt>(Val: SI->getOperand(i_nocapture: 1)))
1593 std::swap(a&: LHS, b&: RHS);
1594
1595 Register LHSReg = getRegForValue(V: LHS);
1596 if (!LHSReg)
1597 return Register();
1598
1599 Register ResultReg;
1600 if (const auto *C = dyn_cast<ConstantInt>(Val: RHS)) {
1601 uint64_t Imm = C->getZExtValue();
1602 ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, Imm);
1603 }
1604 if (ResultReg)
1605 return ResultReg;
1606
1607 // Check if the mul can be folded into the instruction.
1608 if (RHS->hasOneUse() && isValueAvailable(V: RHS)) {
1609 if (isMulPowOf2(I: RHS)) {
1610 const Value *MulLHS = cast<MulOperator>(Val: RHS)->getOperand(i_nocapture: 0);
1611 const Value *MulRHS = cast<MulOperator>(Val: RHS)->getOperand(i_nocapture: 1);
1612
1613 if (const auto *C = dyn_cast<ConstantInt>(Val: MulLHS))
1614 if (C->getValue().isPowerOf2())
1615 std::swap(a&: MulLHS, b&: MulRHS);
1616
1617 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1618 uint64_t ShiftVal = cast<ConstantInt>(Val: MulRHS)->getValue().logBase2();
1619
1620 Register RHSReg = getRegForValue(V: MulLHS);
1621 if (!RHSReg)
1622 return Register();
1623 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftImm: ShiftVal);
1624 if (ResultReg)
1625 return ResultReg;
1626 }
1627 }
1628
1629 // Check if the shift can be folded into the instruction.
1630 if (RHS->hasOneUse() && isValueAvailable(V: RHS)) {
1631 if (const auto *SI = dyn_cast<ShlOperator>(Val: RHS))
1632 if (const auto *C = dyn_cast<ConstantInt>(Val: SI->getOperand(i_nocapture: 1))) {
1633 uint64_t ShiftVal = C->getZExtValue();
1634 Register RHSReg = getRegForValue(V: SI->getOperand(i_nocapture: 0));
1635 if (!RHSReg)
1636 return Register();
1637 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftImm: ShiftVal);
1638 if (ResultReg)
1639 return ResultReg;
1640 }
1641 }
1642
1643 Register RHSReg = getRegForValue(V: RHS);
1644 if (!RHSReg)
1645 return Register();
1646
1647 MVT VT = std::max(a: MVT::i32, b: RetVT.SimpleTy);
1648 ResultReg = fastEmit_rr(VT, RetVT: VT, Opcode: ISDOpc, Op0: LHSReg, Op1: RHSReg);
1649 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1650 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1651 ResultReg = emitAnd_ri(RetVT: MVT::i32, LHSReg: ResultReg, Imm: Mask);
1652 }
1653 return ResultReg;
1654}
1655
1656Register AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
1657 Register LHSReg, uint64_t Imm) {
1658 static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1659 "ISD nodes are not consecutive!");
1660 static const unsigned OpcTable[3][2] = {
1661 { AArch64::ANDWri, AArch64::ANDXri },
1662 { AArch64::ORRWri, AArch64::ORRXri },
1663 { AArch64::EORWri, AArch64::EORXri }
1664 };
1665 const TargetRegisterClass *RC;
1666 unsigned Opc;
1667 unsigned RegSize;
1668 switch (RetVT.SimpleTy) {
1669 default:
1670 return Register();
1671 case MVT::i1:
1672 case MVT::i8:
1673 case MVT::i16:
1674 case MVT::i32: {
1675 unsigned Idx = ISDOpc - ISD::AND;
1676 Opc = OpcTable[Idx][0];
1677 RC = &AArch64::GPR32spRegClass;
1678 RegSize = 32;
1679 break;
1680 }
1681 case MVT::i64:
1682 Opc = OpcTable[ISDOpc - ISD::AND][1];
1683 RC = &AArch64::GPR64spRegClass;
1684 RegSize = 64;
1685 break;
1686 }
1687
1688 if (!AArch64_AM::isLogicalImmediate(imm: Imm, regSize: RegSize))
1689 return Register();
1690
1691 Register ResultReg =
1692 fastEmitInst_ri(MachineInstOpcode: Opc, RC, Op0: LHSReg,
1693 Imm: AArch64_AM::encodeLogicalImmediate(imm: Imm, regSize: RegSize));
1694 if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
1695 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1696 ResultReg = emitAnd_ri(RetVT: MVT::i32, LHSReg: ResultReg, Imm: Mask);
1697 }
1698 return ResultReg;
1699}
1700
1701Register AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
1702 Register LHSReg, Register RHSReg,
1703 uint64_t ShiftImm) {
1704 static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1705 "ISD nodes are not consecutive!");
1706 static const unsigned OpcTable[3][2] = {
1707 { AArch64::ANDWrs, AArch64::ANDXrs },
1708 { AArch64::ORRWrs, AArch64::ORRXrs },
1709 { AArch64::EORWrs, AArch64::EORXrs }
1710 };
1711
1712 // Don't deal with undefined shifts.
1713 if (ShiftImm >= RetVT.getSizeInBits())
1714 return Register();
1715
1716 const TargetRegisterClass *RC;
1717 unsigned Opc;
1718 switch (RetVT.SimpleTy) {
1719 default:
1720 return Register();
1721 case MVT::i1:
1722 case MVT::i8:
1723 case MVT::i16:
1724 case MVT::i32:
1725 Opc = OpcTable[ISDOpc - ISD::AND][0];
1726 RC = &AArch64::GPR32RegClass;
1727 break;
1728 case MVT::i64:
1729 Opc = OpcTable[ISDOpc - ISD::AND][1];
1730 RC = &AArch64::GPR64RegClass;
1731 break;
1732 }
1733 Register ResultReg =
1734 fastEmitInst_rri(MachineInstOpcode: Opc, RC, Op0: LHSReg, Op1: RHSReg,
1735 Imm: AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: ShiftImm));
1736 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1737 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1738 ResultReg = emitAnd_ri(RetVT: MVT::i32, LHSReg: ResultReg, Imm: Mask);
1739 }
1740 return ResultReg;
1741}
1742
1743Register AArch64FastISel::emitAnd_ri(MVT RetVT, Register LHSReg, uint64_t Imm) {
1744 return emitLogicalOp_ri(ISDOpc: ISD::AND, RetVT, LHSReg, Imm);
1745}
1746
1747Register AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
1748 bool WantZExt, MachineMemOperand *MMO) {
1749 if (!TLI.allowsMisalignedMemoryAccesses(VT))
1750 return Register();
1751
1752 // Simplify this down to something we can handle.
1753 if (!simplifyAddress(Addr, VT))
1754 return Register();
1755
1756 unsigned ScaleFactor = getImplicitScaleFactor(VT);
1757 if (!ScaleFactor)
1758 llvm_unreachable("Unexpected value type.");
1759
1760 // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1761 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1762 bool UseScaled = true;
1763 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1764 UseScaled = false;
1765 ScaleFactor = 1;
1766 }
1767
1768 static const unsigned GPOpcTable[2][8][4] = {
1769 // Sign-extend.
1770 { { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi,
1771 AArch64::LDURXi },
1772 { AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi,
1773 AArch64::LDURXi },
1774 { AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui,
1775 AArch64::LDRXui },
1776 { AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui,
1777 AArch64::LDRXui },
1778 { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
1779 AArch64::LDRXroX },
1780 { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
1781 AArch64::LDRXroX },
1782 { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
1783 AArch64::LDRXroW },
1784 { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
1785 AArch64::LDRXroW }
1786 },
1787 // Zero-extend.
1788 { { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1789 AArch64::LDURXi },
1790 { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1791 AArch64::LDURXi },
1792 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1793 AArch64::LDRXui },
1794 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1795 AArch64::LDRXui },
1796 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1797 AArch64::LDRXroX },
1798 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1799 AArch64::LDRXroX },
1800 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1801 AArch64::LDRXroW },
1802 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1803 AArch64::LDRXroW }
1804 }
1805 };
1806
1807 static const unsigned FPOpcTable[4][2] = {
1808 { AArch64::LDURSi, AArch64::LDURDi },
1809 { AArch64::LDRSui, AArch64::LDRDui },
1810 { AArch64::LDRSroX, AArch64::LDRDroX },
1811 { AArch64::LDRSroW, AArch64::LDRDroW }
1812 };
1813
1814 unsigned Opc;
1815 const TargetRegisterClass *RC;
1816 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1817 Addr.getOffsetReg();
1818 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1819 if (Addr.getExtendType() == AArch64_AM::UXTW ||
1820 Addr.getExtendType() == AArch64_AM::SXTW)
1821 Idx++;
1822
1823 bool IsRet64Bit = RetVT == MVT::i64;
1824 switch (VT.SimpleTy) {
1825 default:
1826 llvm_unreachable("Unexpected value type.");
1827 case MVT::i1: // Intentional fall-through.
1828 case MVT::i8:
1829 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];
1830 RC = (IsRet64Bit && !WantZExt) ?
1831 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1832 break;
1833 case MVT::i16:
1834 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];
1835 RC = (IsRet64Bit && !WantZExt) ?
1836 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1837 break;
1838 case MVT::i32:
1839 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];
1840 RC = (IsRet64Bit && !WantZExt) ?
1841 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1842 break;
1843 case MVT::i64:
1844 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];
1845 RC = &AArch64::GPR64RegClass;
1846 break;
1847 case MVT::f32:
1848 Opc = FPOpcTable[Idx][0];
1849 RC = &AArch64::FPR32RegClass;
1850 break;
1851 case MVT::f64:
1852 Opc = FPOpcTable[Idx][1];
1853 RC = &AArch64::FPR64RegClass;
1854 break;
1855 }
1856
1857 // Create the base instruction, then add the operands.
1858 Register ResultReg = createResultReg(RC);
1859 MachineInstrBuilder MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1860 MCID: TII.get(Opcode: Opc), DestReg: ResultReg);
1861 addLoadStoreOperands(Addr, MIB, Flags: MachineMemOperand::MOLoad, ScaleFactor, MMO);
1862
1863 // Loading an i1 requires special handling.
1864 if (VT == MVT::i1) {
1865 Register ANDReg = emitAnd_ri(RetVT: MVT::i32, LHSReg: ResultReg, Imm: 1);
1866 assert(ANDReg && "Unexpected AND instruction emission failure.");
1867 ResultReg = ANDReg;
1868 }
1869
1870 // For zero-extending loads to 64bit we emit a 32bit load and then convert
1871 // the 32bit reg to a 64bit reg.
1872 if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
1873 Register Reg64 = createResultReg(RC: &AArch64::GPR64RegClass);
1874 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
1875 MCID: TII.get(Opcode: AArch64::SUBREG_TO_REG), DestReg: Reg64)
1876 .addImm(Val: 0)
1877 .addReg(RegNo: ResultReg, Flags: getKillRegState(B: true))
1878 .addImm(Val: AArch64::sub_32);
1879 ResultReg = Reg64;
1880 }
1881 return ResultReg;
1882}
1883
1884bool AArch64FastISel::selectAddSub(const Instruction *I) {
1885 MVT VT;
1886 if (!isTypeSupported(Ty: I->getType(), VT, /*IsVectorAllowed=*/true))
1887 return false;
1888
1889 if (VT.isVector())
1890 return selectOperator(I, Opcode: I->getOpcode());
1891
1892 Register ResultReg;
1893 switch (I->getOpcode()) {
1894 default:
1895 llvm_unreachable("Unexpected instruction.");
1896 case Instruction::Add:
1897 ResultReg = emitAdd(RetVT: VT, LHS: I->getOperand(i: 0), RHS: I->getOperand(i: 1));
1898 break;
1899 case Instruction::Sub:
1900 ResultReg = emitSub(RetVT: VT, LHS: I->getOperand(i: 0), RHS: I->getOperand(i: 1));
1901 break;
1902 }
1903 if (!ResultReg)
1904 return false;
1905
1906 updateValueMap(I, Reg: ResultReg);
1907 return true;
1908}
1909
1910bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
1911 MVT VT;
1912 if (!isTypeSupported(Ty: I->getType(), VT, /*IsVectorAllowed=*/true))
1913 return false;
1914
1915 if (VT.isVector())
1916 return selectOperator(I, Opcode: I->getOpcode());
1917
1918 Register ResultReg;
1919 switch (I->getOpcode()) {
1920 default:
1921 llvm_unreachable("Unexpected instruction.");
1922 case Instruction::And:
1923 ResultReg = emitLogicalOp(ISDOpc: ISD::AND, RetVT: VT, LHS: I->getOperand(i: 0), RHS: I->getOperand(i: 1));
1924 break;
1925 case Instruction::Or:
1926 ResultReg = emitLogicalOp(ISDOpc: ISD::OR, RetVT: VT, LHS: I->getOperand(i: 0), RHS: I->getOperand(i: 1));
1927 break;
1928 case Instruction::Xor:
1929 ResultReg = emitLogicalOp(ISDOpc: ISD::XOR, RetVT: VT, LHS: I->getOperand(i: 0), RHS: I->getOperand(i: 1));
1930 break;
1931 }
1932 if (!ResultReg)
1933 return false;
1934
1935 updateValueMap(I, Reg: ResultReg);
1936 return true;
1937}
1938
1939bool AArch64FastISel::selectLoad(const Instruction *I) {
1940 MVT VT;
1941 // Verify we have a legal type before going any further. Currently, we handle
1942 // simple types that will directly fit in a register (i32/f32/i64/f64) or
1943 // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1944 if (!isTypeSupported(Ty: I->getType(), VT, /*IsVectorAllowed=*/true) ||
1945 cast<LoadInst>(Val: I)->isAtomic())
1946 return false;
1947
1948 const Value *SV = I->getOperand(i: 0);
1949 if (TLI.supportSwiftError()) {
1950 // Swifterror values can come from either a function parameter with
1951 // swifterror attribute or an alloca with swifterror attribute.
1952 if (const Argument *Arg = dyn_cast<Argument>(Val: SV)) {
1953 if (Arg->hasSwiftErrorAttr())
1954 return false;
1955 }
1956
1957 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(Val: SV)) {
1958 if (Alloca->isSwiftError())
1959 return false;
1960 }
1961 }
1962
1963 // See if we can handle this address.
1964 Address Addr;
1965 if (!computeAddress(Obj: I->getOperand(i: 0), Addr, Ty: I->getType()))
1966 return false;
1967
1968 // Fold the following sign-/zero-extend into the load instruction.
1969 bool WantZExt = true;
1970 MVT RetVT = VT;
1971 const Value *IntExtVal = nullptr;
1972 if (I->hasOneUse()) {
1973 if (const auto *ZE = dyn_cast<ZExtInst>(Val: I->use_begin()->getUser())) {
1974 if (isTypeSupported(Ty: ZE->getType(), VT&: RetVT))
1975 IntExtVal = ZE;
1976 else
1977 RetVT = VT;
1978 } else if (const auto *SE = dyn_cast<SExtInst>(Val: I->use_begin()->getUser())) {
1979 if (isTypeSupported(Ty: SE->getType(), VT&: RetVT))
1980 IntExtVal = SE;
1981 else
1982 RetVT = VT;
1983 WantZExt = false;
1984 }
1985 }
1986
1987 Register ResultReg =
1988 emitLoad(VT, RetVT, Addr, WantZExt, MMO: createMachineMemOperandFor(I));
1989 if (!ResultReg)
1990 return false;
1991
1992 // There are a few different cases we have to handle, because the load or the
1993 // sign-/zero-extend might not be selected by FastISel if we fall-back to
1994 // SelectionDAG. There is also an ordering issue when both instructions are in
1995 // different basic blocks.
1996 // 1.) The load instruction is selected by FastISel, but the integer extend
1997 // not. This usually happens when the integer extend is in a different
1998 // basic block and SelectionDAG took over for that basic block.
1999 // 2.) The load instruction is selected before the integer extend. This only
2000 // happens when the integer extend is in a different basic block.
2001 // 3.) The load instruction is selected by SelectionDAG and the integer extend
2002 // by FastISel. This happens if there are instructions between the load
2003 // and the integer extend that couldn't be selected by FastISel.
2004 if (IntExtVal) {
2005 // The integer extend hasn't been emitted yet. FastISel or SelectionDAG
2006 // could select it. Emit a copy to subreg if necessary. FastISel will remove
2007 // it when it selects the integer extend.
2008 Register Reg = lookUpRegForValue(V: IntExtVal);
2009 auto *MI = MRI.getUniqueVRegDef(Reg);
2010 if (!MI) {
2011 if (RetVT == MVT::i64 && VT <= MVT::i32) {
2012 if (WantZExt) {
2013 // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
2014 MachineBasicBlock::iterator I(std::prev(x: FuncInfo.InsertPt));
2015 ResultReg = std::prev(x: I)->getOperand(i: 0).getReg();
2016 removeDeadCode(I, E: std::next(x: I));
2017 } else
2018 ResultReg = fastEmitInst_extractsubreg(RetVT: MVT::i32, Op0: ResultReg,
2019 Idx: AArch64::sub_32);
2020 }
2021 updateValueMap(I, Reg: ResultReg);
2022 return true;
2023 }
2024
2025 // The integer extend has already been emitted - delete all the instructions
2026 // that have been emitted by the integer extend lowering code and use the
2027 // result from the load instruction directly.
2028 while (MI) {
2029 Reg = 0;
2030 for (auto &Opnd : MI->uses()) {
2031 if (Opnd.isReg()) {
2032 Reg = Opnd.getReg();
2033 break;
2034 }
2035 }
2036 MachineBasicBlock::iterator I(MI);
2037 removeDeadCode(I, E: std::next(x: I));
2038 MI = nullptr;
2039 if (Reg)
2040 MI = MRI.getUniqueVRegDef(Reg);
2041 }
2042 updateValueMap(I: IntExtVal, Reg: ResultReg);
2043 return true;
2044 }
2045
2046 updateValueMap(I, Reg: ResultReg);
2047 return true;
2048}
2049
2050bool AArch64FastISel::emitStoreRelease(MVT VT, Register SrcReg,
2051 Register AddrReg,
2052 MachineMemOperand *MMO) {
2053 unsigned Opc;
2054 switch (VT.SimpleTy) {
2055 default: return false;
2056 case MVT::i8: Opc = AArch64::STLRB; break;
2057 case MVT::i16: Opc = AArch64::STLRH; break;
2058 case MVT::i32: Opc = AArch64::STLRW; break;
2059 case MVT::i64: Opc = AArch64::STLRX; break;
2060 }
2061
2062 const MCInstrDesc &II = TII.get(Opcode: Opc);
2063 SrcReg = constrainOperandRegClass(II, Op: SrcReg, OpNum: 0);
2064 AddrReg = constrainOperandRegClass(II, Op: AddrReg, OpNum: 1);
2065 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II)
2066 .addReg(RegNo: SrcReg)
2067 .addReg(RegNo: AddrReg)
2068 .addMemOperand(MMO);
2069 return true;
2070}
2071
2072bool AArch64FastISel::emitStore(MVT VT, Register SrcReg, Address Addr,
2073 MachineMemOperand *MMO) {
2074 if (!TLI.allowsMisalignedMemoryAccesses(VT))
2075 return false;
2076
2077 // Simplify this down to something we can handle.
2078 if (!simplifyAddress(Addr, VT))
2079 return false;
2080
2081 unsigned ScaleFactor = getImplicitScaleFactor(VT);
2082 if (!ScaleFactor)
2083 llvm_unreachable("Unexpected value type.");
2084
2085 // Negative offsets require unscaled, 9-bit, signed immediate offsets.
2086 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
2087 bool UseScaled = true;
2088 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
2089 UseScaled = false;
2090 ScaleFactor = 1;
2091 }
2092
2093 static const unsigned OpcTable[4][6] = {
2094 { AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi,
2095 AArch64::STURSi, AArch64::STURDi },
2096 { AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui,
2097 AArch64::STRSui, AArch64::STRDui },
2098 { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
2099 AArch64::STRSroX, AArch64::STRDroX },
2100 { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
2101 AArch64::STRSroW, AArch64::STRDroW }
2102 };
2103
2104 unsigned Opc;
2105 bool VTIsi1 = false;
2106 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
2107 Addr.getOffsetReg();
2108 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
2109 if (Addr.getExtendType() == AArch64_AM::UXTW ||
2110 Addr.getExtendType() == AArch64_AM::SXTW)
2111 Idx++;
2112
2113 switch (VT.SimpleTy) {
2114 default: llvm_unreachable("Unexpected value type.");
2115 case MVT::i1: VTIsi1 = true; [[fallthrough]];
2116 case MVT::i8: Opc = OpcTable[Idx][0]; break;
2117 case MVT::i16: Opc = OpcTable[Idx][1]; break;
2118 case MVT::i32: Opc = OpcTable[Idx][2]; break;
2119 case MVT::i64: Opc = OpcTable[Idx][3]; break;
2120 case MVT::f32: Opc = OpcTable[Idx][4]; break;
2121 case MVT::f64: Opc = OpcTable[Idx][5]; break;
2122 }
2123
2124 // Storing an i1 requires special handling.
2125 if (VTIsi1 && SrcReg != AArch64::WZR) {
2126 Register ANDReg = emitAnd_ri(RetVT: MVT::i32, LHSReg: SrcReg, Imm: 1);
2127 assert(ANDReg && "Unexpected AND instruction emission failure.");
2128 SrcReg = ANDReg;
2129 }
2130 // Create the base instruction, then add the operands.
2131 const MCInstrDesc &II = TII.get(Opcode: Opc);
2132 SrcReg = constrainOperandRegClass(II, Op: SrcReg, OpNum: II.getNumDefs());
2133 MachineInstrBuilder MIB =
2134 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II).addReg(RegNo: SrcReg);
2135 addLoadStoreOperands(Addr, MIB, Flags: MachineMemOperand::MOStore, ScaleFactor, MMO);
2136
2137 return true;
2138}
2139
2140bool AArch64FastISel::selectStore(const Instruction *I) {
2141 MVT VT;
2142 const Value *Op0 = I->getOperand(i: 0);
2143 // Verify we have a legal type before going any further. Currently, we handle
2144 // simple types that will directly fit in a register (i32/f32/i64/f64) or
2145 // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
2146 if (!isTypeSupported(Ty: Op0->getType(), VT, /*IsVectorAllowed=*/true))
2147 return false;
2148
2149 const Value *PtrV = I->getOperand(i: 1);
2150 if (TLI.supportSwiftError()) {
2151 // Swifterror values can come from either a function parameter with
2152 // swifterror attribute or an alloca with swifterror attribute.
2153 if (const Argument *Arg = dyn_cast<Argument>(Val: PtrV)) {
2154 if (Arg->hasSwiftErrorAttr())
2155 return false;
2156 }
2157
2158 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(Val: PtrV)) {
2159 if (Alloca->isSwiftError())
2160 return false;
2161 }
2162 }
2163
2164 // Get the value to be stored into a register. Use the zero register directly
2165 // when possible to avoid an unnecessary copy and a wasted register.
2166 Register SrcReg;
2167 if (const auto *CI = dyn_cast<ConstantInt>(Val: Op0)) {
2168 if (CI->isZero())
2169 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2170 } else if (const auto *CF = dyn_cast<ConstantFP>(Val: Op0)) {
2171 if (CF->isZero() && !CF->isNegative()) {
2172 VT = MVT::getIntegerVT(BitWidth: VT.getSizeInBits());
2173 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2174 }
2175 }
2176
2177 if (!SrcReg)
2178 SrcReg = getRegForValue(V: Op0);
2179
2180 if (!SrcReg)
2181 return false;
2182
2183 auto *SI = cast<StoreInst>(Val: I);
2184
2185 // Try to emit a STLR for seq_cst/release.
2186 if (SI->isAtomic()) {
2187 AtomicOrdering Ord = SI->getOrdering();
2188 // The non-atomic instructions are sufficient for relaxed stores.
2189 if (isReleaseOrStronger(AO: Ord)) {
2190 // The STLR addressing mode only supports a base reg; pass that directly.
2191 Register AddrReg = getRegForValue(V: PtrV);
2192 if (!AddrReg)
2193 return false;
2194 return emitStoreRelease(VT, SrcReg, AddrReg,
2195 MMO: createMachineMemOperandFor(I));
2196 }
2197 }
2198
2199 // See if we can handle this address.
2200 Address Addr;
2201 if (!computeAddress(Obj: PtrV, Addr, Ty: Op0->getType()))
2202 return false;
2203
2204 if (!emitStore(VT, SrcReg, Addr, MMO: createMachineMemOperandFor(I)))
2205 return false;
2206 return true;
2207}
2208
2209static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
2210 switch (Pred) {
2211 case CmpInst::FCMP_ONE:
2212 case CmpInst::FCMP_UEQ:
2213 default:
2214 // AL is our "false" for now. The other two need more compares.
2215 return AArch64CC::AL;
2216 case CmpInst::ICMP_EQ:
2217 case CmpInst::FCMP_OEQ:
2218 return AArch64CC::EQ;
2219 case CmpInst::ICMP_SGT:
2220 case CmpInst::FCMP_OGT:
2221 return AArch64CC::GT;
2222 case CmpInst::ICMP_SGE:
2223 case CmpInst::FCMP_OGE:
2224 return AArch64CC::GE;
2225 case CmpInst::ICMP_UGT:
2226 case CmpInst::FCMP_UGT:
2227 return AArch64CC::HI;
2228 case CmpInst::FCMP_OLT:
2229 return AArch64CC::MI;
2230 case CmpInst::ICMP_ULE:
2231 case CmpInst::FCMP_OLE:
2232 return AArch64CC::LS;
2233 case CmpInst::FCMP_ORD:
2234 return AArch64CC::VC;
2235 case CmpInst::FCMP_UNO:
2236 return AArch64CC::VS;
2237 case CmpInst::FCMP_UGE:
2238 return AArch64CC::PL;
2239 case CmpInst::ICMP_SLT:
2240 case CmpInst::FCMP_ULT:
2241 return AArch64CC::LT;
2242 case CmpInst::ICMP_SLE:
2243 case CmpInst::FCMP_ULE:
2244 return AArch64CC::LE;
2245 case CmpInst::FCMP_UNE:
2246 case CmpInst::ICMP_NE:
2247 return AArch64CC::NE;
2248 case CmpInst::ICMP_UGE:
2249 return AArch64CC::HS;
2250 case CmpInst::ICMP_ULT:
2251 return AArch64CC::LO;
2252 }
2253}
2254
2255/// Try to emit a combined compare-and-branch instruction.
2256bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
2257 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
2258 // will not be produced, as they are conditional branch instructions that do
2259 // not set flags.
2260 if (FuncInfo.MF->getFunction().hasFnAttribute(
2261 Kind: Attribute::SpeculativeLoadHardening))
2262 return false;
2263
2264 assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
2265 const CmpInst *CI = cast<CmpInst>(Val: BI->getCondition());
2266 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2267
2268 const Value *LHS = CI->getOperand(i_nocapture: 0);
2269 const Value *RHS = CI->getOperand(i_nocapture: 1);
2270
2271 MVT VT;
2272 if (!isTypeSupported(Ty: LHS->getType(), VT))
2273 return false;
2274
2275 unsigned BW = VT.getSizeInBits();
2276 if (BW > 64)
2277 return false;
2278
2279 MachineBasicBlock *TBB = FuncInfo.getMBB(BB: BI->getSuccessor(i: 0));
2280 MachineBasicBlock *FBB = FuncInfo.getMBB(BB: BI->getSuccessor(i: 1));
2281
2282 // Try to take advantage of fallthrough opportunities.
2283 if (FuncInfo.MBB->isLayoutSuccessor(MBB: TBB)) {
2284 std::swap(a&: TBB, b&: FBB);
2285 Predicate = CmpInst::getInversePredicate(pred: Predicate);
2286 }
2287
2288 int TestBit = -1;
2289 bool IsCmpNE;
2290 switch (Predicate) {
2291 default:
2292 return false;
2293 case CmpInst::ICMP_EQ:
2294 case CmpInst::ICMP_NE:
2295 if (isa<Constant>(Val: LHS) && cast<Constant>(Val: LHS)->isNullValue())
2296 std::swap(a&: LHS, b&: RHS);
2297
2298 if (!isa<Constant>(Val: RHS) || !cast<Constant>(Val: RHS)->isNullValue())
2299 return false;
2300
2301 if (const auto *AI = dyn_cast<BinaryOperator>(Val: LHS))
2302 if (AI->getOpcode() == Instruction::And && isValueAvailable(V: AI)) {
2303 const Value *AndLHS = AI->getOperand(i_nocapture: 0);
2304 const Value *AndRHS = AI->getOperand(i_nocapture: 1);
2305
2306 if (const auto *C = dyn_cast<ConstantInt>(Val: AndLHS))
2307 if (C->getValue().isPowerOf2())
2308 std::swap(a&: AndLHS, b&: AndRHS);
2309
2310 if (const auto *C = dyn_cast<ConstantInt>(Val: AndRHS))
2311 if (C->getValue().isPowerOf2()) {
2312 TestBit = C->getValue().logBase2();
2313 LHS = AndLHS;
2314 }
2315 }
2316
2317 if (VT == MVT::i1)
2318 TestBit = 0;
2319
2320 IsCmpNE = Predicate == CmpInst::ICMP_NE;
2321 break;
2322 case CmpInst::ICMP_SLT:
2323 case CmpInst::ICMP_SGE:
2324 if (!isa<Constant>(Val: RHS) || !cast<Constant>(Val: RHS)->isNullValue())
2325 return false;
2326
2327 TestBit = BW - 1;
2328 IsCmpNE = Predicate == CmpInst::ICMP_SLT;
2329 break;
2330 case CmpInst::ICMP_SGT:
2331 case CmpInst::ICMP_SLE:
2332 if (!isa<ConstantInt>(Val: RHS))
2333 return false;
2334
2335 if (cast<ConstantInt>(Val: RHS)->getValue() != APInt(BW, -1, true))
2336 return false;
2337
2338 TestBit = BW - 1;
2339 IsCmpNE = Predicate == CmpInst::ICMP_SLE;
2340 break;
2341 } // end switch
2342
2343 static const unsigned OpcTable[2][2][2] = {
2344 { {AArch64::CBZW, AArch64::CBZX },
2345 {AArch64::CBNZW, AArch64::CBNZX} },
2346 { {AArch64::TBZW, AArch64::TBZX },
2347 {AArch64::TBNZW, AArch64::TBNZX} }
2348 };
2349
2350 bool IsBitTest = TestBit != -1;
2351 bool Is64Bit = BW == 64;
2352 if (TestBit < 32 && TestBit >= 0)
2353 Is64Bit = false;
2354
2355 unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
2356 const MCInstrDesc &II = TII.get(Opcode: Opc);
2357
2358 Register SrcReg = getRegForValue(V: LHS);
2359 if (!SrcReg)
2360 return false;
2361
2362 if (BW == 64 && !Is64Bit)
2363 SrcReg = fastEmitInst_extractsubreg(RetVT: MVT::i32, Op0: SrcReg, Idx: AArch64::sub_32);
2364
2365 if ((BW < 32) && !IsBitTest)
2366 SrcReg = emitIntExt(SrcVT: VT, SrcReg, DestVT: MVT::i32, /*isZExt=*/true);
2367
2368 // Emit the combined compare and branch instruction.
2369 SrcReg = constrainOperandRegClass(II, Op: SrcReg, OpNum: II.getNumDefs());
2370 MachineInstrBuilder MIB =
2371 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc))
2372 .addReg(RegNo: SrcReg);
2373 if (IsBitTest)
2374 MIB.addImm(Val: TestBit);
2375 MIB.addMBB(MBB: TBB);
2376
2377 finishCondBranch(BranchBB: BI->getParent(), TrueMBB: TBB, FalseMBB: FBB);
2378 return true;
2379}
2380
2381bool AArch64FastISel::selectBranch(const Instruction *I) {
2382 const BranchInst *BI = cast<BranchInst>(Val: I);
2383 if (BI->isUnconditional()) {
2384 MachineBasicBlock *MSucc = FuncInfo.getMBB(BB: BI->getSuccessor(i: 0));
2385 fastEmitBranch(MSucc, DbgLoc: BI->getDebugLoc());
2386 return true;
2387 }
2388
2389 MachineBasicBlock *TBB = FuncInfo.getMBB(BB: BI->getSuccessor(i: 0));
2390 MachineBasicBlock *FBB = FuncInfo.getMBB(BB: BI->getSuccessor(i: 1));
2391
2392 if (const CmpInst *CI = dyn_cast<CmpInst>(Val: BI->getCondition())) {
2393 if (CI->hasOneUse() && isValueAvailable(V: CI)) {
2394 // Try to optimize or fold the cmp.
2395 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2396 switch (Predicate) {
2397 default:
2398 break;
2399 case CmpInst::FCMP_FALSE:
2400 fastEmitBranch(MSucc: FBB, DbgLoc: MIMD.getDL());
2401 return true;
2402 case CmpInst::FCMP_TRUE:
2403 fastEmitBranch(MSucc: TBB, DbgLoc: MIMD.getDL());
2404 return true;
2405 }
2406
2407 // Try to emit a combined compare-and-branch first.
2408 if (emitCompareAndBranch(BI))
2409 return true;
2410
2411 // Try to take advantage of fallthrough opportunities.
2412 if (FuncInfo.MBB->isLayoutSuccessor(MBB: TBB)) {
2413 std::swap(a&: TBB, b&: FBB);
2414 Predicate = CmpInst::getInversePredicate(pred: Predicate);
2415 }
2416
2417 // Emit the cmp.
2418 if (!emitCmp(LHS: CI->getOperand(i_nocapture: 0), RHS: CI->getOperand(i_nocapture: 1), IsZExt: CI->isUnsigned()))
2419 return false;
2420
2421 // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
2422 // instruction.
2423 AArch64CC::CondCode CC = getCompareCC(Pred: Predicate);
2424 AArch64CC::CondCode ExtraCC = AArch64CC::AL;
2425 switch (Predicate) {
2426 default:
2427 break;
2428 case CmpInst::FCMP_UEQ:
2429 ExtraCC = AArch64CC::EQ;
2430 CC = AArch64CC::VS;
2431 break;
2432 case CmpInst::FCMP_ONE:
2433 ExtraCC = AArch64CC::MI;
2434 CC = AArch64CC::GT;
2435 break;
2436 }
2437 assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2438
2439 // Emit the extra branch for FCMP_UEQ and FCMP_ONE.
2440 if (ExtraCC != AArch64CC::AL) {
2441 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AArch64::Bcc))
2442 .addImm(Val: ExtraCC)
2443 .addMBB(MBB: TBB);
2444 }
2445
2446 // Emit the branch.
2447 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AArch64::Bcc))
2448 .addImm(Val: CC)
2449 .addMBB(MBB: TBB);
2450
2451 finishCondBranch(BranchBB: BI->getParent(), TrueMBB: TBB, FalseMBB: FBB);
2452 return true;
2453 }
2454 } else if (const auto *CI = dyn_cast<ConstantInt>(Val: BI->getCondition())) {
2455 uint64_t Imm = CI->getZExtValue();
2456 MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
2457 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AArch64::B))
2458 .addMBB(MBB: Target);
2459
2460 // Obtain the branch probability and add the target to the successor list.
2461 if (FuncInfo.BPI) {
2462 auto BranchProbability = FuncInfo.BPI->getEdgeProbability(
2463 Src: BI->getParent(), Dst: Target->getBasicBlock());
2464 FuncInfo.MBB->addSuccessor(Succ: Target, Prob: BranchProbability);
2465 } else
2466 FuncInfo.MBB->addSuccessorWithoutProb(Succ: Target);
2467 return true;
2468 } else {
2469 AArch64CC::CondCode CC = AArch64CC::NE;
2470 if (foldXALUIntrinsic(CC, I, Cond: BI->getCondition())) {
2471 // Fake request the condition, otherwise the intrinsic might be completely
2472 // optimized away.
2473 Register CondReg = getRegForValue(V: BI->getCondition());
2474 if (!CondReg)
2475 return false;
2476
2477 // Emit the branch.
2478 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AArch64::Bcc))
2479 .addImm(Val: CC)
2480 .addMBB(MBB: TBB);
2481
2482 finishCondBranch(BranchBB: BI->getParent(), TrueMBB: TBB, FalseMBB: FBB);
2483 return true;
2484 }
2485 }
2486
2487 Register CondReg = getRegForValue(V: BI->getCondition());
2488 if (!CondReg)
2489 return false;
2490
2491 // i1 conditions come as i32 values, test the lowest bit with tb(n)z.
2492 unsigned Opcode = AArch64::TBNZW;
2493 if (FuncInfo.MBB->isLayoutSuccessor(MBB: TBB)) {
2494 std::swap(a&: TBB, b&: FBB);
2495 Opcode = AArch64::TBZW;
2496 }
2497
2498 const MCInstrDesc &II = TII.get(Opcode);
2499 Register ConstrainedCondReg
2500 = constrainOperandRegClass(II, Op: CondReg, OpNum: II.getNumDefs());
2501 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II)
2502 .addReg(RegNo: ConstrainedCondReg)
2503 .addImm(Val: 0)
2504 .addMBB(MBB: TBB);
2505
2506 finishCondBranch(BranchBB: BI->getParent(), TrueMBB: TBB, FalseMBB: FBB);
2507 return true;
2508}
2509
2510bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
2511 const IndirectBrInst *BI = cast<IndirectBrInst>(Val: I);
2512 Register AddrReg = getRegForValue(V: BI->getOperand(i_nocapture: 0));
2513 if (!AddrReg)
2514 return false;
2515
2516 // Authenticated indirectbr is not implemented yet.
2517 if (FuncInfo.MF->getFunction().hasFnAttribute(Kind: "ptrauth-indirect-gotos"))
2518 return false;
2519
2520 // Emit the indirect branch.
2521 const MCInstrDesc &II = TII.get(Opcode: AArch64::BR);
2522 AddrReg = constrainOperandRegClass(II, Op: AddrReg, OpNum: II.getNumDefs());
2523 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II).addReg(RegNo: AddrReg);
2524
2525 // Make sure the CFG is up-to-date.
2526 for (const auto *Succ : BI->successors())
2527 FuncInfo.MBB->addSuccessor(Succ: FuncInfo.getMBB(BB: Succ));
2528
2529 return true;
2530}
2531
2532bool AArch64FastISel::selectCmp(const Instruction *I) {
2533 const CmpInst *CI = cast<CmpInst>(Val: I);
2534
2535 // Vectors of i1 are weird: bail out.
2536 if (CI->getType()->isVectorTy())
2537 return false;
2538
2539 // Try to optimize or fold the cmp.
2540 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2541 Register ResultReg;
2542 switch (Predicate) {
2543 default:
2544 break;
2545 case CmpInst::FCMP_FALSE:
2546 ResultReg = createResultReg(RC: &AArch64::GPR32RegClass);
2547 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
2548 MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: ResultReg)
2549 .addReg(RegNo: AArch64::WZR, Flags: getKillRegState(B: true));
2550 break;
2551 case CmpInst::FCMP_TRUE:
2552 ResultReg = fastEmit_i(VT: MVT::i32, RetVT: MVT::i32, Opcode: ISD::Constant, imm0: 1);
2553 break;
2554 }
2555
2556 if (ResultReg) {
2557 updateValueMap(I, Reg: ResultReg);
2558 return true;
2559 }
2560
2561 // Emit the cmp.
2562 if (!emitCmp(LHS: CI->getOperand(i_nocapture: 0), RHS: CI->getOperand(i_nocapture: 1), IsZExt: CI->isUnsigned()))
2563 return false;
2564
2565 ResultReg = createResultReg(RC: &AArch64::GPR32RegClass);
2566
2567 // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
2568 // condition codes are inverted, because they are used by CSINC.
2569 static unsigned CondCodeTable[2][2] = {
2570 { AArch64CC::NE, AArch64CC::VC },
2571 { AArch64CC::PL, AArch64CC::LE }
2572 };
2573 unsigned *CondCodes = nullptr;
2574 switch (Predicate) {
2575 default:
2576 break;
2577 case CmpInst::FCMP_UEQ:
2578 CondCodes = &CondCodeTable[0][0];
2579 break;
2580 case CmpInst::FCMP_ONE:
2581 CondCodes = &CondCodeTable[1][0];
2582 break;
2583 }
2584
2585 if (CondCodes) {
2586 Register TmpReg1 = createResultReg(RC: &AArch64::GPR32RegClass);
2587 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AArch64::CSINCWr),
2588 DestReg: TmpReg1)
2589 .addReg(RegNo: AArch64::WZR, Flags: getKillRegState(B: true))
2590 .addReg(RegNo: AArch64::WZR, Flags: getKillRegState(B: true))
2591 .addImm(Val: CondCodes[0]);
2592 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AArch64::CSINCWr),
2593 DestReg: ResultReg)
2594 .addReg(RegNo: TmpReg1, Flags: getKillRegState(B: true))
2595 .addReg(RegNo: AArch64::WZR, Flags: getKillRegState(B: true))
2596 .addImm(Val: CondCodes[1]);
2597
2598 updateValueMap(I, Reg: ResultReg);
2599 return true;
2600 }
2601
2602 // Now set a register based on the comparison.
2603 AArch64CC::CondCode CC = getCompareCC(Pred: Predicate);
2604 assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2605 AArch64CC::CondCode invertedCC = getInvertedCondCode(Code: CC);
2606 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AArch64::CSINCWr),
2607 DestReg: ResultReg)
2608 .addReg(RegNo: AArch64::WZR, Flags: getKillRegState(B: true))
2609 .addReg(RegNo: AArch64::WZR, Flags: getKillRegState(B: true))
2610 .addImm(Val: invertedCC);
2611
2612 updateValueMap(I, Reg: ResultReg);
2613 return true;
2614}
2615
2616/// Optimize selects of i1 if one of the operands has a 'true' or 'false'
2617/// value.
2618bool AArch64FastISel::optimizeSelect(const SelectInst *SI) {
2619 if (!SI->getType()->isIntegerTy(Bitwidth: 1))
2620 return false;
2621
2622 const Value *Src1Val, *Src2Val;
2623 unsigned Opc = 0;
2624 bool NeedExtraOp = false;
2625 if (auto *CI = dyn_cast<ConstantInt>(Val: SI->getTrueValue())) {
2626 if (CI->isOne()) {
2627 Src1Val = SI->getCondition();
2628 Src2Val = SI->getFalseValue();
2629 Opc = AArch64::ORRWrr;
2630 } else {
2631 assert(CI->isZero());
2632 Src1Val = SI->getFalseValue();
2633 Src2Val = SI->getCondition();
2634 Opc = AArch64::BICWrr;
2635 }
2636 } else if (auto *CI = dyn_cast<ConstantInt>(Val: SI->getFalseValue())) {
2637 if (CI->isOne()) {
2638 Src1Val = SI->getCondition();
2639 Src2Val = SI->getTrueValue();
2640 Opc = AArch64::ORRWrr;
2641 NeedExtraOp = true;
2642 } else {
2643 assert(CI->isZero());
2644 Src1Val = SI->getCondition();
2645 Src2Val = SI->getTrueValue();
2646 Opc = AArch64::ANDWrr;
2647 }
2648 }
2649
2650 if (!Opc)
2651 return false;
2652
2653 Register Src1Reg = getRegForValue(V: Src1Val);
2654 if (!Src1Reg)
2655 return false;
2656
2657 Register Src2Reg = getRegForValue(V: Src2Val);
2658 if (!Src2Reg)
2659 return false;
2660
2661 if (NeedExtraOp)
2662 Src1Reg = emitLogicalOp_ri(ISDOpc: ISD::XOR, RetVT: MVT::i32, LHSReg: Src1Reg, Imm: 1);
2663
2664 Register ResultReg = fastEmitInst_rr(MachineInstOpcode: Opc, RC: &AArch64::GPR32RegClass, Op0: Src1Reg,
2665 Op1: Src2Reg);
2666 updateValueMap(I: SI, Reg: ResultReg);
2667 return true;
2668}
2669
2670bool AArch64FastISel::selectSelect(const Instruction *I) {
2671 assert(isa<SelectInst>(I) && "Expected a select instruction.");
2672 MVT VT;
2673 if (!isTypeSupported(Ty: I->getType(), VT))
2674 return false;
2675
2676 unsigned Opc;
2677 const TargetRegisterClass *RC;
2678 switch (VT.SimpleTy) {
2679 default:
2680 return false;
2681 case MVT::i1:
2682 case MVT::i8:
2683 case MVT::i16:
2684 case MVT::i32:
2685 Opc = AArch64::CSELWr;
2686 RC = &AArch64::GPR32RegClass;
2687 break;
2688 case MVT::i64:
2689 Opc = AArch64::CSELXr;
2690 RC = &AArch64::GPR64RegClass;
2691 break;
2692 case MVT::f32:
2693 Opc = AArch64::FCSELSrrr;
2694 RC = &AArch64::FPR32RegClass;
2695 break;
2696 case MVT::f64:
2697 Opc = AArch64::FCSELDrrr;
2698 RC = &AArch64::FPR64RegClass;
2699 break;
2700 }
2701
2702 const SelectInst *SI = cast<SelectInst>(Val: I);
2703 const Value *Cond = SI->getCondition();
2704 AArch64CC::CondCode CC = AArch64CC::NE;
2705 AArch64CC::CondCode ExtraCC = AArch64CC::AL;
2706
2707 if (optimizeSelect(SI))
2708 return true;
2709
2710 // Try to pickup the flags, so we don't have to emit another compare.
2711 if (foldXALUIntrinsic(CC, I, Cond)) {
2712 // Fake request the condition to force emission of the XALU intrinsic.
2713 Register CondReg = getRegForValue(V: Cond);
2714 if (!CondReg)
2715 return false;
2716 } else if (isa<CmpInst>(Val: Cond) && cast<CmpInst>(Val: Cond)->hasOneUse() &&
2717 isValueAvailable(V: Cond)) {
2718 const auto *Cmp = cast<CmpInst>(Val: Cond);
2719 // Try to optimize or fold the cmp.
2720 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI: Cmp);
2721 const Value *FoldSelect = nullptr;
2722 switch (Predicate) {
2723 default:
2724 break;
2725 case CmpInst::FCMP_FALSE:
2726 FoldSelect = SI->getFalseValue();
2727 break;
2728 case CmpInst::FCMP_TRUE:
2729 FoldSelect = SI->getTrueValue();
2730 break;
2731 }
2732
2733 if (FoldSelect) {
2734 Register SrcReg = getRegForValue(V: FoldSelect);
2735 if (!SrcReg)
2736 return false;
2737
2738 updateValueMap(I, Reg: SrcReg);
2739 return true;
2740 }
2741
2742 // Emit the cmp.
2743 if (!emitCmp(LHS: Cmp->getOperand(i_nocapture: 0), RHS: Cmp->getOperand(i_nocapture: 1), IsZExt: Cmp->isUnsigned()))
2744 return false;
2745
2746 // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction.
2747 CC = getCompareCC(Pred: Predicate);
2748 switch (Predicate) {
2749 default:
2750 break;
2751 case CmpInst::FCMP_UEQ:
2752 ExtraCC = AArch64CC::EQ;
2753 CC = AArch64CC::VS;
2754 break;
2755 case CmpInst::FCMP_ONE:
2756 ExtraCC = AArch64CC::MI;
2757 CC = AArch64CC::GT;
2758 break;
2759 }
2760 assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2761 } else {
2762 Register CondReg = getRegForValue(V: Cond);
2763 if (!CondReg)
2764 return false;
2765
2766 const MCInstrDesc &II = TII.get(Opcode: AArch64::ANDSWri);
2767 CondReg = constrainOperandRegClass(II, Op: CondReg, OpNum: 1);
2768
2769 // Emit a TST instruction (ANDS wzr, reg, #imm).
2770 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II,
2771 DestReg: AArch64::WZR)
2772 .addReg(RegNo: CondReg)
2773 .addImm(Val: AArch64_AM::encodeLogicalImmediate(imm: 1, regSize: 32));
2774 }
2775
2776 Register Src1Reg = getRegForValue(V: SI->getTrueValue());
2777 Register Src2Reg = getRegForValue(V: SI->getFalseValue());
2778
2779 if (!Src1Reg || !Src2Reg)
2780 return false;
2781
2782 if (ExtraCC != AArch64CC::AL)
2783 Src2Reg = fastEmitInst_rri(MachineInstOpcode: Opc, RC, Op0: Src1Reg, Op1: Src2Reg, Imm: ExtraCC);
2784
2785 Register ResultReg = fastEmitInst_rri(MachineInstOpcode: Opc, RC, Op0: Src1Reg, Op1: Src2Reg, Imm: CC);
2786 updateValueMap(I, Reg: ResultReg);
2787 return true;
2788}
2789
2790bool AArch64FastISel::selectFPExt(const Instruction *I) {
2791 Value *V = I->getOperand(i: 0);
2792 if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
2793 return false;
2794
2795 Register Op = getRegForValue(V);
2796 if (Op == 0)
2797 return false;
2798
2799 Register ResultReg = createResultReg(RC: &AArch64::FPR64RegClass);
2800 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AArch64::FCVTDSr),
2801 DestReg: ResultReg).addReg(RegNo: Op);
2802 updateValueMap(I, Reg: ResultReg);
2803 return true;
2804}
2805
2806bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
2807 Value *V = I->getOperand(i: 0);
2808 if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
2809 return false;
2810
2811 Register Op = getRegForValue(V);
2812 if (Op == 0)
2813 return false;
2814
2815 Register ResultReg = createResultReg(RC: &AArch64::FPR32RegClass);
2816 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AArch64::FCVTSDr),
2817 DestReg: ResultReg).addReg(RegNo: Op);
2818 updateValueMap(I, Reg: ResultReg);
2819 return true;
2820}
2821
2822// FPToUI and FPToSI
2823bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
2824 MVT DestVT;
2825 if (!isTypeLegal(Ty: I->getType(), VT&: DestVT) || DestVT.isVector())
2826 return false;
2827
2828 Register SrcReg = getRegForValue(V: I->getOperand(i: 0));
2829 if (!SrcReg)
2830 return false;
2831
2832 EVT SrcVT = TLI.getValueType(DL, Ty: I->getOperand(i: 0)->getType(), AllowUnknown: true);
2833 if (SrcVT == MVT::f128 || SrcVT == MVT::f16 || SrcVT == MVT::bf16)
2834 return false;
2835
2836 unsigned Opc;
2837 if (SrcVT == MVT::f64) {
2838 if (Signed)
2839 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
2840 else
2841 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
2842 } else {
2843 if (Signed)
2844 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
2845 else
2846 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
2847 }
2848 Register ResultReg = createResultReg(
2849 RC: DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2850 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg: ResultReg)
2851 .addReg(RegNo: SrcReg);
2852 updateValueMap(I, Reg: ResultReg);
2853 return true;
2854}
2855
2856bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
2857 MVT DestVT;
2858 if (!isTypeLegal(Ty: I->getType(), VT&: DestVT) || DestVT.isVector())
2859 return false;
2860 // Let regular ISEL handle FP16
2861 if (DestVT == MVT::f16 || DestVT == MVT::bf16)
2862 return false;
2863
2864 assert((DestVT == MVT::f32 || DestVT == MVT::f64) &&
2865 "Unexpected value type.");
2866
2867 Register SrcReg = getRegForValue(V: I->getOperand(i: 0));
2868 if (!SrcReg)
2869 return false;
2870
2871 EVT SrcVT = TLI.getValueType(DL, Ty: I->getOperand(i: 0)->getType(), AllowUnknown: true);
2872
2873 // Handle sign-extension.
2874 if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
2875 SrcReg =
2876 emitIntExt(SrcVT: SrcVT.getSimpleVT(), SrcReg, DestVT: MVT::i32, /*isZExt*/ !Signed);
2877 if (!SrcReg)
2878 return false;
2879 }
2880
2881 unsigned Opc;
2882 if (SrcVT == MVT::i64) {
2883 if (Signed)
2884 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
2885 else
2886 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
2887 } else {
2888 if (Signed)
2889 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
2890 else
2891 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
2892 }
2893
2894 Register ResultReg = fastEmitInst_r(MachineInstOpcode: Opc, RC: TLI.getRegClassFor(VT: DestVT), Op0: SrcReg);
2895 updateValueMap(I, Reg: ResultReg);
2896 return true;
2897}
2898
2899bool AArch64FastISel::fastLowerArguments() {
2900 if (!FuncInfo.CanLowerReturn)
2901 return false;
2902
2903 const Function *F = FuncInfo.Fn;
2904 if (F->isVarArg())
2905 return false;
2906
2907 CallingConv::ID CC = F->getCallingConv();
2908 if (CC != CallingConv::C && CC != CallingConv::Swift)
2909 return false;
2910
2911 if (Subtarget->hasCustomCallingConv())
2912 return false;
2913
2914 // Only handle simple cases of up to 8 GPR and FPR each.
2915 unsigned GPRCnt = 0;
2916 unsigned FPRCnt = 0;
2917 for (auto const &Arg : F->args()) {
2918 if (Arg.hasAttribute(Kind: Attribute::ByVal) ||
2919 Arg.hasAttribute(Kind: Attribute::InReg) ||
2920 Arg.hasAttribute(Kind: Attribute::StructRet) ||
2921 Arg.hasAttribute(Kind: Attribute::SwiftSelf) ||
2922 Arg.hasAttribute(Kind: Attribute::SwiftAsync) ||
2923 Arg.hasAttribute(Kind: Attribute::SwiftError) ||
2924 Arg.hasAttribute(Kind: Attribute::Nest))
2925 return false;
2926
2927 Type *ArgTy = Arg.getType();
2928 if (ArgTy->isStructTy() || ArgTy->isArrayTy())
2929 return false;
2930
2931 EVT ArgVT = TLI.getValueType(DL, Ty: ArgTy);
2932 if (!ArgVT.isSimple())
2933 return false;
2934
2935 MVT VT = ArgVT.getSimpleVT().SimpleTy;
2936 if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8())
2937 return false;
2938
2939 if (VT.isVector() &&
2940 (!Subtarget->hasNEON() || !Subtarget->isLittleEndian()))
2941 return false;
2942
2943 if (VT >= MVT::i1 && VT <= MVT::i64)
2944 ++GPRCnt;
2945 else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() ||
2946 VT.is128BitVector())
2947 ++FPRCnt;
2948 else
2949 return false;
2950
2951 if (GPRCnt > 8 || FPRCnt > 8)
2952 return false;
2953 }
2954
2955 static const MCPhysReg Registers[6][8] = {
2956 { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
2957 AArch64::W5, AArch64::W6, AArch64::W7 },
2958 { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
2959 AArch64::X5, AArch64::X6, AArch64::X7 },
2960 { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
2961 AArch64::H5, AArch64::H6, AArch64::H7 },
2962 { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
2963 AArch64::S5, AArch64::S6, AArch64::S7 },
2964 { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
2965 AArch64::D5, AArch64::D6, AArch64::D7 },
2966 { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
2967 AArch64::Q5, AArch64::Q6, AArch64::Q7 }
2968 };
2969
2970 unsigned GPRIdx = 0;
2971 unsigned FPRIdx = 0;
2972 for (auto const &Arg : F->args()) {
2973 MVT VT = TLI.getSimpleValueType(DL, Ty: Arg.getType());
2974 unsigned SrcReg;
2975 const TargetRegisterClass *RC;
2976 if (VT >= MVT::i1 && VT <= MVT::i32) {
2977 SrcReg = Registers[0][GPRIdx++];
2978 RC = &AArch64::GPR32RegClass;
2979 VT = MVT::i32;
2980 } else if (VT == MVT::i64) {
2981 SrcReg = Registers[1][GPRIdx++];
2982 RC = &AArch64::GPR64RegClass;
2983 } else if (VT == MVT::f16 || VT == MVT::bf16) {
2984 SrcReg = Registers[2][FPRIdx++];
2985 RC = &AArch64::FPR16RegClass;
2986 } else if (VT == MVT::f32) {
2987 SrcReg = Registers[3][FPRIdx++];
2988 RC = &AArch64::FPR32RegClass;
2989 } else if ((VT == MVT::f64) || VT.is64BitVector()) {
2990 SrcReg = Registers[4][FPRIdx++];
2991 RC = &AArch64::FPR64RegClass;
2992 } else if (VT.is128BitVector()) {
2993 SrcReg = Registers[5][FPRIdx++];
2994 RC = &AArch64::FPR128RegClass;
2995 } else
2996 llvm_unreachable("Unexpected value type.");
2997
2998 Register DstReg = FuncInfo.MF->addLiveIn(PReg: SrcReg, RC);
2999 // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
3000 // Without this, EmitLiveInCopies may eliminate the livein if its only
3001 // use is a bitcast (which isn't turned into an instruction).
3002 Register ResultReg = createResultReg(RC);
3003 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
3004 MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: ResultReg)
3005 .addReg(RegNo: DstReg, Flags: getKillRegState(B: true));
3006 updateValueMap(I: &Arg, Reg: ResultReg);
3007 }
3008 return true;
3009}
3010
3011bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
3012 SmallVectorImpl<MVT> &OutVTs,
3013 SmallVectorImpl<Type *> &OrigTys,
3014 unsigned &NumBytes) {
3015 CallingConv::ID CC = CLI.CallConv;
3016 SmallVector<CCValAssign, 16> ArgLocs;
3017 CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
3018 CCInfo.AnalyzeCallOperands(ArgVTs&: OutVTs, Flags&: CLI.OutFlags, OrigTys,
3019 Fn: CCAssignFnForCall(CC));
3020
3021 // Get a count of how many bytes are to be pushed on the stack.
3022 NumBytes = CCInfo.getStackSize();
3023
3024 // Issue CALLSEQ_START
3025 unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
3026 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AdjStackDown))
3027 .addImm(Val: NumBytes).addImm(Val: 0);
3028
3029 // Process the args.
3030 for (CCValAssign &VA : ArgLocs) {
3031 const Value *ArgVal = CLI.OutVals[VA.getValNo()];
3032 MVT ArgVT = OutVTs[VA.getValNo()];
3033
3034 Register ArgReg = getRegForValue(V: ArgVal);
3035 if (!ArgReg)
3036 return false;
3037
3038 // Handle arg promotion: SExt, ZExt, AExt.
3039 switch (VA.getLocInfo()) {
3040 case CCValAssign::Full:
3041 break;
3042 case CCValAssign::SExt: {
3043 MVT DestVT = VA.getLocVT();
3044 MVT SrcVT = ArgVT;
3045 ArgReg = emitIntExt(SrcVT, SrcReg: ArgReg, DestVT, /*isZExt=*/false);
3046 if (!ArgReg)
3047 return false;
3048 break;
3049 }
3050 case CCValAssign::AExt:
3051 // Intentional fall-through.
3052 case CCValAssign::ZExt: {
3053 MVT DestVT = VA.getLocVT();
3054 MVT SrcVT = ArgVT;
3055 ArgReg = emitIntExt(SrcVT, SrcReg: ArgReg, DestVT, /*isZExt=*/true);
3056 if (!ArgReg)
3057 return false;
3058 break;
3059 }
3060 default:
3061 llvm_unreachable("Unknown arg promotion!");
3062 }
3063
3064 // Now copy/store arg to correct locations.
3065 if (VA.isRegLoc() && !VA.needsCustom()) {
3066 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
3067 MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: VA.getLocReg()).addReg(RegNo: ArgReg);
3068 CLI.OutRegs.push_back(Elt: VA.getLocReg());
3069 } else if (VA.needsCustom()) {
3070 // FIXME: Handle custom args.
3071 return false;
3072 } else {
3073 assert(VA.isMemLoc() && "Assuming store on stack.");
3074
3075 // Don't emit stores for undef values.
3076 if (isa<UndefValue>(Val: ArgVal))
3077 continue;
3078
3079 // Need to store on the stack.
3080 unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
3081
3082 unsigned BEAlign = 0;
3083 if (ArgSize < 8 && !Subtarget->isLittleEndian())
3084 BEAlign = 8 - ArgSize;
3085
3086 Address Addr;
3087 Addr.setKind(Address::RegBase);
3088 Addr.setReg(AArch64::SP);
3089 Addr.setOffset(VA.getLocMemOffset() + BEAlign);
3090
3091 Align Alignment = DL.getABITypeAlign(Ty: ArgVal->getType());
3092 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3093 PtrInfo: MachinePointerInfo::getStack(MF&: *FuncInfo.MF, Offset: Addr.getOffset()),
3094 F: MachineMemOperand::MOStore, Size: ArgVT.getStoreSize(), BaseAlignment: Alignment);
3095
3096 if (!emitStore(VT: ArgVT, SrcReg: ArgReg, Addr, MMO))
3097 return false;
3098 }
3099 }
3100 return true;
3101}
3102
3103bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, unsigned NumBytes) {
3104 CallingConv::ID CC = CLI.CallConv;
3105
3106 // Issue CALLSEQ_END
3107 unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3108 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AdjStackUp))
3109 .addImm(Val: NumBytes).addImm(Val: 0);
3110
3111 // Now the return values.
3112 SmallVector<CCValAssign, 16> RVLocs;
3113 CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
3114 CCInfo.AnalyzeCallResult(Ins: CLI.Ins, Fn: CCAssignFnForCall(CC));
3115
3116 Register ResultReg = FuncInfo.CreateRegs(Ty: CLI.RetTy);
3117 for (unsigned i = 0; i != RVLocs.size(); ++i) {
3118 CCValAssign &VA = RVLocs[i];
3119 MVT CopyVT = VA.getValVT();
3120 Register CopyReg = ResultReg + i;
3121
3122 // TODO: Handle big-endian results
3123 if (CopyVT.isVector() && !Subtarget->isLittleEndian())
3124 return false;
3125
3126 // Copy result out of their specified physreg.
3127 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: TargetOpcode::COPY),
3128 DestReg: CopyReg)
3129 .addReg(RegNo: VA.getLocReg());
3130 CLI.InRegs.push_back(Elt: VA.getLocReg());
3131 }
3132
3133 CLI.ResultReg = ResultReg;
3134 CLI.NumResultRegs = RVLocs.size();
3135
3136 return true;
3137}
3138
3139bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3140 CallingConv::ID CC = CLI.CallConv;
3141 bool IsTailCall = CLI.IsTailCall;
3142 bool IsVarArg = CLI.IsVarArg;
3143 const Value *Callee = CLI.Callee;
3144 MCSymbol *Symbol = CLI.Symbol;
3145
3146 if (!Callee && !Symbol)
3147 return false;
3148
3149 // Allow SelectionDAG isel to handle calls to functions like setjmp that need
3150 // a bti instruction following the call.
3151 if (CLI.CB && CLI.CB->hasFnAttr(Kind: Attribute::ReturnsTwice) &&
3152 !Subtarget->noBTIAtReturnTwice() &&
3153 MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement())
3154 return false;
3155
3156 // Allow SelectionDAG isel to handle indirect calls with KCFI checks.
3157 if (CLI.CB && CLI.CB->isIndirectCall() &&
3158 CLI.CB->getOperandBundle(ID: LLVMContext::OB_kcfi))
3159 return false;
3160
3161 // Allow SelectionDAG isel to handle tail calls.
3162 if (IsTailCall)
3163 return false;
3164
3165 // FIXME: we could and should support this, but for now correctness at -O0 is
3166 // more important.
3167 if (Subtarget->isTargetILP32())
3168 return false;
3169
3170 CodeModel::Model CM = TM.getCodeModel();
3171 // Only support the small-addressing and large code models.
3172 if (CM != CodeModel::Large && !Subtarget->useSmallAddressing())
3173 return false;
3174
3175 // FIXME: Add large code model support for ELF.
3176 if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
3177 return false;
3178
3179 // ELF -fno-plt compiled intrinsic calls do not have the nonlazybind
3180 // attribute. Check "RtLibUseGOT" instead.
3181 if (MF->getFunction().getParent()->getRtLibUseGOT())
3182 return false;
3183
3184 // Let SDISel handle vararg functions.
3185 if (IsVarArg)
3186 return false;
3187
3188 if (Subtarget->isWindowsArm64EC())
3189 return false;
3190
3191 for (auto Flag : CLI.OutFlags)
3192 if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() ||
3193 Flag.isSwiftSelf() || Flag.isSwiftAsync() || Flag.isSwiftError())
3194 return false;
3195
3196 // Set up the argument vectors.
3197 SmallVector<MVT, 16> OutVTs;
3198 SmallVector<Type *, 16> OrigTys;
3199 OutVTs.reserve(N: CLI.OutVals.size());
3200
3201 for (auto *Val : CLI.OutVals) {
3202 MVT VT;
3203 if (!isTypeLegal(Ty: Val->getType(), VT) &&
3204 !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
3205 return false;
3206
3207 // We don't handle vector parameters yet.
3208 if (VT.isVector() || VT.getSizeInBits() > 64)
3209 return false;
3210
3211 OutVTs.push_back(Elt: VT);
3212 OrigTys.push_back(Elt: Val->getType());
3213 }
3214
3215 Address Addr;
3216 if (Callee && !computeCallAddress(V: Callee, Addr))
3217 return false;
3218
3219 // The weak function target may be zero; in that case we must use indirect
3220 // addressing via a stub on windows as it may be out of range for a
3221 // PC-relative jump.
3222 if (Subtarget->isTargetWindows() && Addr.getGlobalValue() &&
3223 Addr.getGlobalValue()->hasExternalWeakLinkage())
3224 return false;
3225
3226 // Handle the arguments now that we've gotten them.
3227 unsigned NumBytes;
3228 if (!processCallArgs(CLI, OutVTs, OrigTys, NumBytes))
3229 return false;
3230
3231 const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3232 if (RegInfo->isAnyArgRegReserved(MF: *MF))
3233 RegInfo->emitReservedArgRegCallError(MF: *MF);
3234
3235 // Issue the call.
3236 MachineInstrBuilder MIB;
3237 if (Subtarget->useSmallAddressing()) {
3238 const MCInstrDesc &II =
3239 TII.get(Opcode: Addr.getReg() ? getBLRCallOpcode(MF: *MF) : (unsigned)AArch64::BL);
3240 MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II);
3241 if (Symbol)
3242 MIB.addSym(Sym: Symbol, TargetFlags: 0);
3243 else if (Addr.getGlobalValue())
3244 MIB.addGlobalAddress(GV: Addr.getGlobalValue(), Offset: 0, TargetFlags: 0);
3245 else if (Addr.getReg()) {
3246 Register Reg = constrainOperandRegClass(II, Op: Addr.getReg(), OpNum: 0);
3247 MIB.addReg(RegNo: Reg);
3248 } else
3249 return false;
3250 } else {
3251 Register CallReg;
3252 if (Symbol) {
3253 Register ADRPReg = createResultReg(RC: &AArch64::GPR64commonRegClass);
3254 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AArch64::ADRP),
3255 DestReg: ADRPReg)
3256 .addSym(Sym: Symbol, TargetFlags: AArch64II::MO_GOT | AArch64II::MO_PAGE);
3257
3258 CallReg = createResultReg(RC: &AArch64::GPR64RegClass);
3259 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
3260 MCID: TII.get(Opcode: AArch64::LDRXui), DestReg: CallReg)
3261 .addReg(RegNo: ADRPReg)
3262 .addSym(Sym: Symbol,
3263 TargetFlags: AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3264 } else if (Addr.getGlobalValue())
3265 CallReg = materializeGV(GV: Addr.getGlobalValue());
3266 else if (Addr.getReg())
3267 CallReg = Addr.getReg();
3268
3269 if (!CallReg)
3270 return false;
3271
3272 const MCInstrDesc &II = TII.get(Opcode: getBLRCallOpcode(MF: *MF));
3273 CallReg = constrainOperandRegClass(II, Op: CallReg, OpNum: 0);
3274 MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II).addReg(RegNo: CallReg);
3275 }
3276
3277 // Add implicit physical register uses to the call.
3278 for (auto Reg : CLI.OutRegs)
3279 MIB.addReg(RegNo: Reg, Flags: RegState::Implicit);
3280
3281 // Add a register mask with the call-preserved registers.
3282 // Proper defs for return values will be added by setPhysRegsDeadExcept().
3283 MIB.addRegMask(Mask: TRI.getCallPreservedMask(MF: *FuncInfo.MF, CC));
3284
3285 CLI.Call = MIB;
3286
3287 // Finish off the call including any return values.
3288 return finishCall(CLI, NumBytes);
3289}
3290
3291bool AArch64FastISel::isMemCpySmall(uint64_t Len, MaybeAlign Alignment) {
3292 if (Alignment)
3293 return Len / Alignment->value() <= 4;
3294 else
3295 return Len < 32;
3296}
3297
3298bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
3299 uint64_t Len, MaybeAlign Alignment) {
3300 // Make sure we don't bloat code by inlining very large memcpy's.
3301 if (!isMemCpySmall(Len, Alignment))
3302 return false;
3303
3304 int64_t UnscaledOffset = 0;
3305 Address OrigDest = Dest;
3306 Address OrigSrc = Src;
3307
3308 while (Len) {
3309 MVT VT;
3310 if (!Alignment || *Alignment >= 8) {
3311 if (Len >= 8)
3312 VT = MVT::i64;
3313 else if (Len >= 4)
3314 VT = MVT::i32;
3315 else if (Len >= 2)
3316 VT = MVT::i16;
3317 else {
3318 VT = MVT::i8;
3319 }
3320 } else {
3321 assert(Alignment && "Alignment is set in this branch");
3322 // Bound based on alignment.
3323 if (Len >= 4 && *Alignment == 4)
3324 VT = MVT::i32;
3325 else if (Len >= 2 && *Alignment == 2)
3326 VT = MVT::i16;
3327 else {
3328 VT = MVT::i8;
3329 }
3330 }
3331
3332 Register ResultReg = emitLoad(VT, RetVT: VT, Addr: Src);
3333 if (!ResultReg)
3334 return false;
3335
3336 if (!emitStore(VT, SrcReg: ResultReg, Addr: Dest))
3337 return false;
3338
3339 int64_t Size = VT.getSizeInBits() / 8;
3340 Len -= Size;
3341 UnscaledOffset += Size;
3342
3343 // We need to recompute the unscaled offset for each iteration.
3344 Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
3345 Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
3346 }
3347
3348 return true;
3349}
3350
3351/// Check if it is possible to fold the condition from the XALU intrinsic
3352/// into the user. The condition code will only be updated on success.
3353bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
3354 const Instruction *I,
3355 const Value *Cond) {
3356 if (!isa<ExtractValueInst>(Val: Cond))
3357 return false;
3358
3359 const auto *EV = cast<ExtractValueInst>(Val: Cond);
3360 if (!isa<IntrinsicInst>(Val: EV->getAggregateOperand()))
3361 return false;
3362
3363 const auto *II = cast<IntrinsicInst>(Val: EV->getAggregateOperand());
3364 MVT RetVT;
3365 const Function *Callee = II->getCalledFunction();
3366 Type *RetTy =
3367 cast<StructType>(Val: Callee->getReturnType())->getTypeAtIndex(N: 0U);
3368 if (!isTypeLegal(Ty: RetTy, VT&: RetVT))
3369 return false;
3370
3371 if (RetVT != MVT::i32 && RetVT != MVT::i64)
3372 return false;
3373
3374 const Value *LHS = II->getArgOperand(i: 0);
3375 const Value *RHS = II->getArgOperand(i: 1);
3376
3377 // Canonicalize immediate to the RHS.
3378 if (isa<ConstantInt>(Val: LHS) && !isa<ConstantInt>(Val: RHS) && II->isCommutative())
3379 std::swap(a&: LHS, b&: RHS);
3380
3381 // Simplify multiplies.
3382 Intrinsic::ID IID = II->getIntrinsicID();
3383 switch (IID) {
3384 default:
3385 break;
3386 case Intrinsic::smul_with_overflow:
3387 if (const auto *C = dyn_cast<ConstantInt>(Val: RHS))
3388 if (C->getValue() == 2)
3389 IID = Intrinsic::sadd_with_overflow;
3390 break;
3391 case Intrinsic::umul_with_overflow:
3392 if (const auto *C = dyn_cast<ConstantInt>(Val: RHS))
3393 if (C->getValue() == 2)
3394 IID = Intrinsic::uadd_with_overflow;
3395 break;
3396 }
3397
3398 AArch64CC::CondCode TmpCC;
3399 switch (IID) {
3400 default:
3401 return false;
3402 case Intrinsic::sadd_with_overflow:
3403 case Intrinsic::ssub_with_overflow:
3404 TmpCC = AArch64CC::VS;
3405 break;
3406 case Intrinsic::uadd_with_overflow:
3407 TmpCC = AArch64CC::HS;
3408 break;
3409 case Intrinsic::usub_with_overflow:
3410 TmpCC = AArch64CC::LO;
3411 break;
3412 case Intrinsic::smul_with_overflow:
3413 case Intrinsic::umul_with_overflow:
3414 TmpCC = AArch64CC::NE;
3415 break;
3416 }
3417
3418 // Check if both instructions are in the same basic block.
3419 if (!isValueAvailable(V: II))
3420 return false;
3421
3422 // Make sure nothing is in the way
3423 BasicBlock::const_iterator Start(I);
3424 BasicBlock::const_iterator End(II);
3425 for (auto Itr = std::prev(x: Start); Itr != End; --Itr) {
3426 // We only expect extractvalue instructions between the intrinsic and the
3427 // instruction to be selected.
3428 if (!isa<ExtractValueInst>(Val: Itr))
3429 return false;
3430
3431 // Check that the extractvalue operand comes from the intrinsic.
3432 const auto *EVI = cast<ExtractValueInst>(Val&: Itr);
3433 if (EVI->getAggregateOperand() != II)
3434 return false;
3435 }
3436
3437 CC = TmpCC;
3438 return true;
3439}
3440
3441bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
3442 // FIXME: Handle more intrinsics.
3443 switch (II->getIntrinsicID()) {
3444 default: return false;
3445 case Intrinsic::frameaddress: {
3446 MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3447 MFI.setFrameAddressIsTaken(true);
3448
3449 const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3450 Register FramePtr = RegInfo->getFrameRegister(MF: *(FuncInfo.MF));
3451 Register SrcReg = MRI.createVirtualRegister(RegClass: &AArch64::GPR64RegClass);
3452 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
3453 MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: SrcReg).addReg(RegNo: FramePtr);
3454 // Recursively load frame address
3455 // ldr x0, [fp]
3456 // ldr x0, [x0]
3457 // ldr x0, [x0]
3458 // ...
3459 Register DestReg;
3460 unsigned Depth = cast<ConstantInt>(Val: II->getOperand(i_nocapture: 0))->getZExtValue();
3461 while (Depth--) {
3462 DestReg = fastEmitInst_ri(MachineInstOpcode: AArch64::LDRXui, RC: &AArch64::GPR64RegClass,
3463 Op0: SrcReg, Imm: 0);
3464 assert(DestReg && "Unexpected LDR instruction emission failure.");
3465 SrcReg = DestReg;
3466 }
3467
3468 updateValueMap(I: II, Reg: SrcReg);
3469 return true;
3470 }
3471 case Intrinsic::sponentry: {
3472 MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3473
3474 // SP = FP + Fixed Object + 16
3475 int FI = MFI.CreateFixedObject(Size: 4, SPOffset: 0, IsImmutable: false);
3476 Register ResultReg = createResultReg(RC: &AArch64::GPR64spRegClass);
3477 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
3478 MCID: TII.get(Opcode: AArch64::ADDXri), DestReg: ResultReg)
3479 .addFrameIndex(Idx: FI)
3480 .addImm(Val: 0)
3481 .addImm(Val: 0);
3482
3483 updateValueMap(I: II, Reg: ResultReg);
3484 return true;
3485 }
3486 case Intrinsic::memcpy:
3487 case Intrinsic::memmove: {
3488 const auto *MTI = cast<MemTransferInst>(Val: II);
3489 // Don't handle volatile.
3490 if (MTI->isVolatile())
3491 return false;
3492
3493 // Disable inlining for memmove before calls to ComputeAddress. Otherwise,
3494 // we would emit dead code because we don't currently handle memmoves.
3495 bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
3496 if (isa<ConstantInt>(Val: MTI->getLength()) && IsMemCpy) {
3497 // Small memcpy's are common enough that we want to do them without a call
3498 // if possible.
3499 uint64_t Len = cast<ConstantInt>(Val: MTI->getLength())->getZExtValue();
3500 MaybeAlign Alignment;
3501 if (MTI->getDestAlign() || MTI->getSourceAlign())
3502 Alignment = std::min(a: MTI->getDestAlign().valueOrOne(),
3503 b: MTI->getSourceAlign().valueOrOne());
3504 if (isMemCpySmall(Len, Alignment)) {
3505 Address Dest, Src;
3506 if (!computeAddress(Obj: MTI->getRawDest(), Addr&: Dest) ||
3507 !computeAddress(Obj: MTI->getRawSource(), Addr&: Src))
3508 return false;
3509 if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
3510 return true;
3511 }
3512 }
3513
3514 if (!MTI->getLength()->getType()->isIntegerTy(Bitwidth: 64))
3515 return false;
3516
3517 if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
3518 // Fast instruction selection doesn't support the special
3519 // address spaces.
3520 return false;
3521
3522 const char *IntrMemName = isa<MemCpyInst>(Val: II) ? "memcpy" : "memmove";
3523 return lowerCallTo(CI: II, SymName: IntrMemName, NumArgs: II->arg_size() - 1);
3524 }
3525 case Intrinsic::memset: {
3526 const MemSetInst *MSI = cast<MemSetInst>(Val: II);
3527 // Don't handle volatile.
3528 if (MSI->isVolatile())
3529 return false;
3530
3531 if (!MSI->getLength()->getType()->isIntegerTy(Bitwidth: 64))
3532 return false;
3533
3534 if (MSI->getDestAddressSpace() > 255)
3535 // Fast instruction selection doesn't support the special
3536 // address spaces.
3537 return false;
3538
3539 return lowerCallTo(CI: II, SymName: "memset", NumArgs: II->arg_size() - 1);
3540 }
3541 case Intrinsic::sin:
3542 case Intrinsic::cos:
3543 case Intrinsic::tan:
3544 case Intrinsic::pow: {
3545 MVT RetVT;
3546 if (!isTypeLegal(Ty: II->getType(), VT&: RetVT))
3547 return false;
3548
3549 if (RetVT != MVT::f32 && RetVT != MVT::f64)
3550 return false;
3551
3552 static const RTLIB::Libcall LibCallTable[4][2] = {
3553 {RTLIB::SIN_F32, RTLIB::SIN_F64},
3554 {RTLIB::COS_F32, RTLIB::COS_F64},
3555 {RTLIB::TAN_F32, RTLIB::TAN_F64},
3556 {RTLIB::POW_F32, RTLIB::POW_F64}};
3557 RTLIB::Libcall LC;
3558 bool Is64Bit = RetVT == MVT::f64;
3559 switch (II->getIntrinsicID()) {
3560 default:
3561 llvm_unreachable("Unexpected intrinsic.");
3562 case Intrinsic::sin:
3563 LC = LibCallTable[0][Is64Bit];
3564 break;
3565 case Intrinsic::cos:
3566 LC = LibCallTable[1][Is64Bit];
3567 break;
3568 case Intrinsic::tan:
3569 LC = LibCallTable[2][Is64Bit];
3570 break;
3571 case Intrinsic::pow:
3572 LC = LibCallTable[3][Is64Bit];
3573 break;
3574 }
3575
3576 ArgListTy Args;
3577 Args.reserve(n: II->arg_size());
3578
3579 // Populate the argument list.
3580 for (auto &Arg : II->args())
3581 Args.emplace_back(args: Arg);
3582
3583 CallLoweringInfo CLI;
3584 MCContext &Ctx = MF->getContext();
3585
3586 RTLIB::LibcallImpl LCImpl = LibcallLowering->getLibcallImpl(Call: LC);
3587 if (LCImpl == RTLIB::Unsupported)
3588 return false;
3589
3590 CallingConv::ID CC = LibcallLowering->getLibcallImplCallingConv(Call: LCImpl);
3591 StringRef FuncName = RTLIB::RuntimeLibcallsInfo::getLibcallImplName(CallImpl: LCImpl);
3592 CLI.setCallee(DL, Ctx, CC, ResultTy: II->getType(), Target: FuncName, ArgsList: std::move(Args));
3593 if (!lowerCallTo(CLI))
3594 return false;
3595 updateValueMap(I: II, Reg: CLI.ResultReg);
3596 return true;
3597 }
3598 case Intrinsic::fabs: {
3599 MVT VT;
3600 if (!isTypeLegal(Ty: II->getType(), VT))
3601 return false;
3602
3603 unsigned Opc;
3604 switch (VT.SimpleTy) {
3605 default:
3606 return false;
3607 case MVT::f32:
3608 Opc = AArch64::FABSSr;
3609 break;
3610 case MVT::f64:
3611 Opc = AArch64::FABSDr;
3612 break;
3613 }
3614 Register SrcReg = getRegForValue(V: II->getOperand(i_nocapture: 0));
3615 if (!SrcReg)
3616 return false;
3617 Register ResultReg = createResultReg(RC: TLI.getRegClassFor(VT));
3618 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg: ResultReg)
3619 .addReg(RegNo: SrcReg);
3620 updateValueMap(I: II, Reg: ResultReg);
3621 return true;
3622 }
3623 case Intrinsic::trap:
3624 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AArch64::BRK))
3625 .addImm(Val: 1);
3626 return true;
3627 case Intrinsic::debugtrap:
3628 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AArch64::BRK))
3629 .addImm(Val: 0xF000);
3630 return true;
3631
3632 case Intrinsic::sqrt: {
3633 Type *RetTy = II->getCalledFunction()->getReturnType();
3634
3635 MVT VT;
3636 if (!isTypeLegal(Ty: RetTy, VT))
3637 return false;
3638
3639 Register Op0Reg = getRegForValue(V: II->getOperand(i_nocapture: 0));
3640 if (!Op0Reg)
3641 return false;
3642
3643 Register ResultReg = fastEmit_r(VT, RetVT: VT, Opcode: ISD::FSQRT, Op0: Op0Reg);
3644 if (!ResultReg)
3645 return false;
3646
3647 updateValueMap(I: II, Reg: ResultReg);
3648 return true;
3649 }
3650 case Intrinsic::sadd_with_overflow:
3651 case Intrinsic::uadd_with_overflow:
3652 case Intrinsic::ssub_with_overflow:
3653 case Intrinsic::usub_with_overflow:
3654 case Intrinsic::smul_with_overflow:
3655 case Intrinsic::umul_with_overflow: {
3656 // This implements the basic lowering of the xalu with overflow intrinsics.
3657 const Function *Callee = II->getCalledFunction();
3658 auto *Ty = cast<StructType>(Val: Callee->getReturnType());
3659 Type *RetTy = Ty->getTypeAtIndex(N: 0U);
3660
3661 MVT VT;
3662 if (!isTypeLegal(Ty: RetTy, VT))
3663 return false;
3664
3665 if (VT != MVT::i32 && VT != MVT::i64)
3666 return false;
3667
3668 const Value *LHS = II->getArgOperand(i: 0);
3669 const Value *RHS = II->getArgOperand(i: 1);
3670 // Canonicalize immediate to the RHS.
3671 if (isa<ConstantInt>(Val: LHS) && !isa<ConstantInt>(Val: RHS) && II->isCommutative())
3672 std::swap(a&: LHS, b&: RHS);
3673
3674 // Simplify multiplies.
3675 Intrinsic::ID IID = II->getIntrinsicID();
3676 switch (IID) {
3677 default:
3678 break;
3679 case Intrinsic::smul_with_overflow:
3680 if (const auto *C = dyn_cast<ConstantInt>(Val: RHS))
3681 if (C->getValue() == 2) {
3682 IID = Intrinsic::sadd_with_overflow;
3683 RHS = LHS;
3684 }
3685 break;
3686 case Intrinsic::umul_with_overflow:
3687 if (const auto *C = dyn_cast<ConstantInt>(Val: RHS))
3688 if (C->getValue() == 2) {
3689 IID = Intrinsic::uadd_with_overflow;
3690 RHS = LHS;
3691 }
3692 break;
3693 }
3694
3695 Register ResultReg1, ResultReg2, MulReg;
3696 AArch64CC::CondCode CC = AArch64CC::Invalid;
3697 switch (IID) {
3698 default: llvm_unreachable("Unexpected intrinsic!");
3699 case Intrinsic::sadd_with_overflow:
3700 ResultReg1 = emitAdd(RetVT: VT, LHS, RHS, /*SetFlags=*/true);
3701 CC = AArch64CC::VS;
3702 break;
3703 case Intrinsic::uadd_with_overflow:
3704 ResultReg1 = emitAdd(RetVT: VT, LHS, RHS, /*SetFlags=*/true);
3705 CC = AArch64CC::HS;
3706 break;
3707 case Intrinsic::ssub_with_overflow:
3708 ResultReg1 = emitSub(RetVT: VT, LHS, RHS, /*SetFlags=*/true);
3709 CC = AArch64CC::VS;
3710 break;
3711 case Intrinsic::usub_with_overflow:
3712 ResultReg1 = emitSub(RetVT: VT, LHS, RHS, /*SetFlags=*/true);
3713 CC = AArch64CC::LO;
3714 break;
3715 case Intrinsic::smul_with_overflow: {
3716 CC = AArch64CC::NE;
3717 Register LHSReg = getRegForValue(V: LHS);
3718 if (!LHSReg)
3719 return false;
3720
3721 Register RHSReg = getRegForValue(V: RHS);
3722 if (!RHSReg)
3723 return false;
3724
3725 if (VT == MVT::i32) {
3726 MulReg = emitSMULL_rr(RetVT: MVT::i64, Op0: LHSReg, Op1: RHSReg);
3727 Register MulSubReg =
3728 fastEmitInst_extractsubreg(RetVT: VT, Op0: MulReg, Idx: AArch64::sub_32);
3729 // cmp xreg, wreg, sxtw
3730 emitAddSub_rx(/*UseAdd=*/false, RetVT: MVT::i64, LHSReg: MulReg, RHSReg: MulSubReg,
3731 ExtType: AArch64_AM::SXTW, /*ShiftImm=*/0, /*SetFlags=*/true,
3732 /*WantResult=*/false);
3733 MulReg = MulSubReg;
3734 } else {
3735 assert(VT == MVT::i64 && "Unexpected value type.");
3736 // LHSReg and RHSReg cannot be killed by this Mul, since they are
3737 // reused in the next instruction.
3738 MulReg = emitMul_rr(RetVT: VT, Op0: LHSReg, Op1: RHSReg);
3739 Register SMULHReg = fastEmit_rr(VT, RetVT: VT, Opcode: ISD::MULHS, Op0: LHSReg, Op1: RHSReg);
3740 emitSubs_rs(RetVT: VT, LHSReg: SMULHReg, RHSReg: MulReg, ShiftType: AArch64_AM::ASR, ShiftImm: 63,
3741 /*WantResult=*/false);
3742 }
3743 break;
3744 }
3745 case Intrinsic::umul_with_overflow: {
3746 CC = AArch64CC::NE;
3747 Register LHSReg = getRegForValue(V: LHS);
3748 if (!LHSReg)
3749 return false;
3750
3751 Register RHSReg = getRegForValue(V: RHS);
3752 if (!RHSReg)
3753 return false;
3754
3755 if (VT == MVT::i32) {
3756 MulReg = emitUMULL_rr(RetVT: MVT::i64, Op0: LHSReg, Op1: RHSReg);
3757 // tst xreg, #0xffffffff00000000
3758 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
3759 MCID: TII.get(Opcode: AArch64::ANDSXri), DestReg: AArch64::XZR)
3760 .addReg(RegNo: MulReg)
3761 .addImm(Val: AArch64_AM::encodeLogicalImmediate(imm: 0xFFFFFFFF00000000, regSize: 64));
3762 MulReg = fastEmitInst_extractsubreg(RetVT: VT, Op0: MulReg, Idx: AArch64::sub_32);
3763 } else {
3764 assert(VT == MVT::i64 && "Unexpected value type.");
3765 // LHSReg and RHSReg cannot be killed by this Mul, since they are
3766 // reused in the next instruction.
3767 MulReg = emitMul_rr(RetVT: VT, Op0: LHSReg, Op1: RHSReg);
3768 Register UMULHReg = fastEmit_rr(VT, RetVT: VT, Opcode: ISD::MULHU, Op0: LHSReg, Op1: RHSReg);
3769 emitSubs_rr(RetVT: VT, LHSReg: AArch64::XZR, RHSReg: UMULHReg, /*WantResult=*/false);
3770 }
3771 break;
3772 }
3773 }
3774
3775 if (MulReg) {
3776 ResultReg1 = createResultReg(RC: TLI.getRegClassFor(VT));
3777 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
3778 MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: ResultReg1).addReg(RegNo: MulReg);
3779 }
3780
3781 if (!ResultReg1)
3782 return false;
3783
3784 ResultReg2 = fastEmitInst_rri(MachineInstOpcode: AArch64::CSINCWr, RC: &AArch64::GPR32RegClass,
3785 Op0: AArch64::WZR, Op1: AArch64::WZR,
3786 Imm: getInvertedCondCode(Code: CC));
3787 (void)ResultReg2;
3788 assert((ResultReg1 + 1) == ResultReg2 &&
3789 "Nonconsecutive result registers.");
3790 updateValueMap(I: II, Reg: ResultReg1, NumRegs: 2);
3791 return true;
3792 }
3793 case Intrinsic::aarch64_crc32b:
3794 case Intrinsic::aarch64_crc32h:
3795 case Intrinsic::aarch64_crc32w:
3796 case Intrinsic::aarch64_crc32x:
3797 case Intrinsic::aarch64_crc32cb:
3798 case Intrinsic::aarch64_crc32ch:
3799 case Intrinsic::aarch64_crc32cw:
3800 case Intrinsic::aarch64_crc32cx: {
3801 if (!Subtarget->hasCRC())
3802 return false;
3803
3804 unsigned Opc;
3805 switch (II->getIntrinsicID()) {
3806 default:
3807 llvm_unreachable("Unexpected intrinsic!");
3808 case Intrinsic::aarch64_crc32b:
3809 Opc = AArch64::CRC32Brr;
3810 break;
3811 case Intrinsic::aarch64_crc32h:
3812 Opc = AArch64::CRC32Hrr;
3813 break;
3814 case Intrinsic::aarch64_crc32w:
3815 Opc = AArch64::CRC32Wrr;
3816 break;
3817 case Intrinsic::aarch64_crc32x:
3818 Opc = AArch64::CRC32Xrr;
3819 break;
3820 case Intrinsic::aarch64_crc32cb:
3821 Opc = AArch64::CRC32CBrr;
3822 break;
3823 case Intrinsic::aarch64_crc32ch:
3824 Opc = AArch64::CRC32CHrr;
3825 break;
3826 case Intrinsic::aarch64_crc32cw:
3827 Opc = AArch64::CRC32CWrr;
3828 break;
3829 case Intrinsic::aarch64_crc32cx:
3830 Opc = AArch64::CRC32CXrr;
3831 break;
3832 }
3833
3834 Register LHSReg = getRegForValue(V: II->getArgOperand(i: 0));
3835 Register RHSReg = getRegForValue(V: II->getArgOperand(i: 1));
3836 if (!LHSReg || !RHSReg)
3837 return false;
3838
3839 Register ResultReg =
3840 fastEmitInst_rr(MachineInstOpcode: Opc, RC: &AArch64::GPR32RegClass, Op0: LHSReg, Op1: RHSReg);
3841 updateValueMap(I: II, Reg: ResultReg);
3842 return true;
3843 }
3844 }
3845 return false;
3846}
3847
3848bool AArch64FastISel::selectRet(const Instruction *I) {
3849 const ReturnInst *Ret = cast<ReturnInst>(Val: I);
3850 const Function &F = *I->getParent()->getParent();
3851
3852 if (!FuncInfo.CanLowerReturn)
3853 return false;
3854
3855 if (F.isVarArg())
3856 return false;
3857
3858 if (TLI.supportSwiftError() &&
3859 F.getAttributes().hasAttrSomewhere(Kind: Attribute::SwiftError))
3860 return false;
3861
3862 if (TLI.supportSplitCSR(MF: FuncInfo.MF))
3863 return false;
3864
3865 // Build a list of return value registers.
3866 SmallVector<Register, 4> RetRegs;
3867
3868 if (Ret->getNumOperands() > 0) {
3869 CallingConv::ID CC = F.getCallingConv();
3870 SmallVector<ISD::OutputArg, 4> Outs;
3871 GetReturnInfo(CC, ReturnType: F.getReturnType(), attr: F.getAttributes(), Outs, TLI, DL);
3872
3873 // Analyze operands of the call, assigning locations to each operand.
3874 SmallVector<CCValAssign, 16> ValLocs;
3875 CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
3876 CCInfo.AnalyzeReturn(Outs, Fn: RetCC_AArch64_AAPCS);
3877
3878 // Only handle a single return value for now.
3879 if (ValLocs.size() != 1)
3880 return false;
3881
3882 CCValAssign &VA = ValLocs[0];
3883 const Value *RV = Ret->getOperand(i_nocapture: 0);
3884
3885 // Don't bother handling odd stuff for now.
3886 if ((VA.getLocInfo() != CCValAssign::Full) &&
3887 (VA.getLocInfo() != CCValAssign::BCvt))
3888 return false;
3889
3890 // Only handle register returns for now.
3891 if (!VA.isRegLoc())
3892 return false;
3893
3894 Register Reg = getRegForValue(V: RV);
3895 if (!Reg)
3896 return false;
3897
3898 Register SrcReg = Reg + VA.getValNo();
3899 Register DestReg = VA.getLocReg();
3900 // Avoid a cross-class copy. This is very unlikely.
3901 if (!MRI.getRegClass(Reg: SrcReg)->contains(Reg: DestReg))
3902 return false;
3903
3904 EVT RVEVT = TLI.getValueType(DL, Ty: RV->getType());
3905 if (!RVEVT.isSimple())
3906 return false;
3907
3908 // Vectors (of > 1 lane) in big endian need tricky handling.
3909 if (RVEVT.isVector() && RVEVT.getVectorElementCount().isVector() &&
3910 !Subtarget->isLittleEndian())
3911 return false;
3912
3913 MVT RVVT = RVEVT.getSimpleVT();
3914 if (RVVT == MVT::f128)
3915 return false;
3916
3917 MVT DestVT = VA.getValVT();
3918 // Special handling for extended integers.
3919 if (RVVT != DestVT) {
3920 if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
3921 return false;
3922
3923 if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
3924 return false;
3925
3926 bool IsZExt = Outs[0].Flags.isZExt();
3927 SrcReg = emitIntExt(SrcVT: RVVT, SrcReg, DestVT, isZExt: IsZExt);
3928 if (!SrcReg)
3929 return false;
3930 }
3931
3932 // "Callee" (i.e. value producer) zero extends pointers at function
3933 // boundary.
3934 if (Subtarget->isTargetILP32() && RV->getType()->isPointerTy())
3935 SrcReg = emitAnd_ri(RetVT: MVT::i64, LHSReg: SrcReg, Imm: 0xffffffff);
3936
3937 // Make the copy.
3938 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
3939 MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg).addReg(RegNo: SrcReg);
3940
3941 // Add register to return instruction.
3942 RetRegs.push_back(Elt: VA.getLocReg());
3943 }
3944
3945 MachineInstrBuilder MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
3946 MCID: TII.get(Opcode: AArch64::RET_ReallyLR));
3947 for (Register RetReg : RetRegs)
3948 MIB.addReg(RegNo: RetReg, Flags: RegState::Implicit);
3949 return true;
3950}
3951
3952bool AArch64FastISel::selectTrunc(const Instruction *I) {
3953 Type *DestTy = I->getType();
3954 Value *Op = I->getOperand(i: 0);
3955 Type *SrcTy = Op->getType();
3956
3957 EVT SrcEVT = TLI.getValueType(DL, Ty: SrcTy, AllowUnknown: true);
3958 EVT DestEVT = TLI.getValueType(DL, Ty: DestTy, AllowUnknown: true);
3959 if (!SrcEVT.isSimple())
3960 return false;
3961 if (!DestEVT.isSimple())
3962 return false;
3963
3964 MVT SrcVT = SrcEVT.getSimpleVT();
3965 MVT DestVT = DestEVT.getSimpleVT();
3966
3967 if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
3968 SrcVT != MVT::i8)
3969 return false;
3970 if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
3971 DestVT != MVT::i1)
3972 return false;
3973
3974 Register SrcReg = getRegForValue(V: Op);
3975 if (!SrcReg)
3976 return false;
3977
3978 // If we're truncating from i64 to a smaller non-legal type then generate an
3979 // AND. Otherwise, we know the high bits are undefined and a truncate only
3980 // generate a COPY. We cannot mark the source register also as result
3981 // register, because this can incorrectly transfer the kill flag onto the
3982 // source register.
3983 Register ResultReg;
3984 if (SrcVT == MVT::i64) {
3985 uint64_t Mask = 0;
3986 switch (DestVT.SimpleTy) {
3987 default:
3988 // Trunc i64 to i32 is handled by the target-independent fast-isel.
3989 return false;
3990 case MVT::i1:
3991 Mask = 0x1;
3992 break;
3993 case MVT::i8:
3994 Mask = 0xff;
3995 break;
3996 case MVT::i16:
3997 Mask = 0xffff;
3998 break;
3999 }
4000 // Issue an extract_subreg to get the lower 32-bits.
4001 Register Reg32 = fastEmitInst_extractsubreg(RetVT: MVT::i32, Op0: SrcReg,
4002 Idx: AArch64::sub_32);
4003 // Create the AND instruction which performs the actual truncation.
4004 ResultReg = emitAnd_ri(RetVT: MVT::i32, LHSReg: Reg32, Imm: Mask);
4005 assert(ResultReg && "Unexpected AND instruction emission failure.");
4006 } else {
4007 ResultReg = createResultReg(RC: &AArch64::GPR32RegClass);
4008 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
4009 MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: ResultReg)
4010 .addReg(RegNo: SrcReg);
4011 }
4012
4013 updateValueMap(I, Reg: ResultReg);
4014 return true;
4015}
4016
4017Register AArch64FastISel::emiti1Ext(Register SrcReg, MVT DestVT, bool IsZExt) {
4018 assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
4019 DestVT == MVT::i64) &&
4020 "Unexpected value type.");
4021 // Handle i8 and i16 as i32.
4022 if (DestVT == MVT::i8 || DestVT == MVT::i16)
4023 DestVT = MVT::i32;
4024
4025 if (IsZExt) {
4026 Register ResultReg = emitAnd_ri(RetVT: MVT::i32, LHSReg: SrcReg, Imm: 1);
4027 assert(ResultReg && "Unexpected AND instruction emission failure.");
4028 if (DestVT == MVT::i64) {
4029 // We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the
4030 // upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd.
4031 Register Reg64 = MRI.createVirtualRegister(RegClass: &AArch64::GPR64RegClass);
4032 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
4033 MCID: TII.get(Opcode: AArch64::SUBREG_TO_REG), DestReg: Reg64)
4034 .addImm(Val: 0)
4035 .addReg(RegNo: ResultReg)
4036 .addImm(Val: AArch64::sub_32);
4037 ResultReg = Reg64;
4038 }
4039 return ResultReg;
4040 } else {
4041 if (DestVT == MVT::i64) {
4042 // FIXME: We're SExt i1 to i64.
4043 return Register();
4044 }
4045 return fastEmitInst_rii(MachineInstOpcode: AArch64::SBFMWri, RC: &AArch64::GPR32RegClass, Op0: SrcReg,
4046 Imm1: 0, Imm2: 0);
4047 }
4048}
4049
4050Register AArch64FastISel::emitMul_rr(MVT RetVT, Register Op0, Register Op1) {
4051 unsigned Opc;
4052 Register ZReg;
4053 switch (RetVT.SimpleTy) {
4054 default:
4055 return Register();
4056 case MVT::i8:
4057 case MVT::i16:
4058 case MVT::i32:
4059 RetVT = MVT::i32;
4060 Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
4061 case MVT::i64:
4062 Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
4063 }
4064
4065 const TargetRegisterClass *RC =
4066 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4067 return fastEmitInst_rrr(MachineInstOpcode: Opc, RC, Op0, Op1, Op2: ZReg);
4068}
4069
4070Register AArch64FastISel::emitSMULL_rr(MVT RetVT, Register Op0, Register Op1) {
4071 if (RetVT != MVT::i64)
4072 return Register();
4073
4074 return fastEmitInst_rrr(MachineInstOpcode: AArch64::SMADDLrrr, RC: &AArch64::GPR64RegClass,
4075 Op0, Op1, Op2: AArch64::XZR);
4076}
4077
4078Register AArch64FastISel::emitUMULL_rr(MVT RetVT, Register Op0, Register Op1) {
4079 if (RetVT != MVT::i64)
4080 return Register();
4081
4082 return fastEmitInst_rrr(MachineInstOpcode: AArch64::UMADDLrrr, RC: &AArch64::GPR64RegClass,
4083 Op0, Op1, Op2: AArch64::XZR);
4084}
4085
4086Register AArch64FastISel::emitLSL_rr(MVT RetVT, Register Op0Reg,
4087 Register Op1Reg) {
4088 unsigned Opc = 0;
4089 bool NeedTrunc = false;
4090 uint64_t Mask = 0;
4091 switch (RetVT.SimpleTy) {
4092 default:
4093 return Register();
4094 case MVT::i8: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff; break;
4095 case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
4096 case MVT::i32: Opc = AArch64::LSLVWr; break;
4097 case MVT::i64: Opc = AArch64::LSLVXr; break;
4098 }
4099
4100 const TargetRegisterClass *RC =
4101 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4102 if (NeedTrunc)
4103 Op1Reg = emitAnd_ri(RetVT: MVT::i32, LHSReg: Op1Reg, Imm: Mask);
4104
4105 Register ResultReg = fastEmitInst_rr(MachineInstOpcode: Opc, RC, Op0: Op0Reg, Op1: Op1Reg);
4106 if (NeedTrunc)
4107 ResultReg = emitAnd_ri(RetVT: MVT::i32, LHSReg: ResultReg, Imm: Mask);
4108 return ResultReg;
4109}
4110
4111Register AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, Register Op0,
4112 uint64_t Shift, bool IsZExt) {
4113 assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4114 "Unexpected source/return type pair.");
4115 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4116 SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4117 "Unexpected source value type.");
4118 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4119 RetVT == MVT::i64) && "Unexpected return value type.");
4120
4121 bool Is64Bit = (RetVT == MVT::i64);
4122 unsigned RegSize = Is64Bit ? 64 : 32;
4123 unsigned DstBits = RetVT.getSizeInBits();
4124 unsigned SrcBits = SrcVT.getSizeInBits();
4125 const TargetRegisterClass *RC =
4126 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4127
4128 // Just emit a copy for "zero" shifts.
4129 if (Shift == 0) {
4130 if (RetVT == SrcVT) {
4131 Register ResultReg = createResultReg(RC);
4132 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
4133 MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: ResultReg)
4134 .addReg(RegNo: Op0);
4135 return ResultReg;
4136 } else
4137 return emitIntExt(SrcVT, SrcReg: Op0, DestVT: RetVT, isZExt: IsZExt);
4138 }
4139
4140 // Don't deal with undefined shifts.
4141 if (Shift >= DstBits)
4142 return Register();
4143
4144 // For immediate shifts we can fold the zero-/sign-extension into the shift.
4145 // {S|U}BFM Wd, Wn, #r, #s
4146 // Wd<32+s-r,32-r> = Wn<s:0> when r > s
4147
4148 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4149 // %2 = shl i16 %1, 4
4150 // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
4151 // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
4152 // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
4153 // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
4154
4155 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4156 // %2 = shl i16 %1, 8
4157 // Wd<32+7-24,32-24> = Wn<7:0>
4158 // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
4159 // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
4160 // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
4161
4162 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4163 // %2 = shl i16 %1, 12
4164 // Wd<32+3-20,32-20> = Wn<3:0>
4165 // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
4166 // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
4167 // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
4168
4169 unsigned ImmR = RegSize - Shift;
4170 // Limit the width to the length of the source type.
4171 unsigned ImmS = std::min<unsigned>(a: SrcBits - 1, b: DstBits - 1 - Shift);
4172 static const unsigned OpcTable[2][2] = {
4173 {AArch64::SBFMWri, AArch64::SBFMXri},
4174 {AArch64::UBFMWri, AArch64::UBFMXri}
4175 };
4176 unsigned Opc = OpcTable[IsZExt][Is64Bit];
4177 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4178 Register TmpReg = MRI.createVirtualRegister(RegClass: RC);
4179 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
4180 MCID: TII.get(Opcode: AArch64::SUBREG_TO_REG), DestReg: TmpReg)
4181 .addImm(Val: 0)
4182 .addReg(RegNo: Op0)
4183 .addImm(Val: AArch64::sub_32);
4184 Op0 = TmpReg;
4185 }
4186 return fastEmitInst_rii(MachineInstOpcode: Opc, RC, Op0, Imm1: ImmR, Imm2: ImmS);
4187}
4188
4189Register AArch64FastISel::emitLSR_rr(MVT RetVT, Register Op0Reg,
4190 Register Op1Reg) {
4191 unsigned Opc = 0;
4192 bool NeedTrunc = false;
4193 uint64_t Mask = 0;
4194 switch (RetVT.SimpleTy) {
4195 default:
4196 return Register();
4197 case MVT::i8: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff; break;
4198 case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
4199 case MVT::i32: Opc = AArch64::LSRVWr; break;
4200 case MVT::i64: Opc = AArch64::LSRVXr; break;
4201 }
4202
4203 const TargetRegisterClass *RC =
4204 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4205 if (NeedTrunc) {
4206 Op0Reg = emitAnd_ri(RetVT: MVT::i32, LHSReg: Op0Reg, Imm: Mask);
4207 Op1Reg = emitAnd_ri(RetVT: MVT::i32, LHSReg: Op1Reg, Imm: Mask);
4208 }
4209 Register ResultReg = fastEmitInst_rr(MachineInstOpcode: Opc, RC, Op0: Op0Reg, Op1: Op1Reg);
4210 if (NeedTrunc)
4211 ResultReg = emitAnd_ri(RetVT: MVT::i32, LHSReg: ResultReg, Imm: Mask);
4212 return ResultReg;
4213}
4214
4215Register AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, Register Op0,
4216 uint64_t Shift, bool IsZExt) {
4217 assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4218 "Unexpected source/return type pair.");
4219 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4220 SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4221 "Unexpected source value type.");
4222 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4223 RetVT == MVT::i64) && "Unexpected return value type.");
4224
4225 bool Is64Bit = (RetVT == MVT::i64);
4226 unsigned RegSize = Is64Bit ? 64 : 32;
4227 unsigned DstBits = RetVT.getSizeInBits();
4228 unsigned SrcBits = SrcVT.getSizeInBits();
4229 const TargetRegisterClass *RC =
4230 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4231
4232 // Just emit a copy for "zero" shifts.
4233 if (Shift == 0) {
4234 if (RetVT == SrcVT) {
4235 Register ResultReg = createResultReg(RC);
4236 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
4237 MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: ResultReg)
4238 .addReg(RegNo: Op0);
4239 return ResultReg;
4240 } else
4241 return emitIntExt(SrcVT, SrcReg: Op0, DestVT: RetVT, isZExt: IsZExt);
4242 }
4243
4244 // Don't deal with undefined shifts.
4245 if (Shift >= DstBits)
4246 return Register();
4247
4248 // For immediate shifts we can fold the zero-/sign-extension into the shift.
4249 // {S|U}BFM Wd, Wn, #r, #s
4250 // Wd<s-r:0> = Wn<s:r> when r <= s
4251
4252 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4253 // %2 = lshr i16 %1, 4
4254 // Wd<7-4:0> = Wn<7:4>
4255 // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
4256 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4257 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4258
4259 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4260 // %2 = lshr i16 %1, 8
4261 // Wd<7-7,0> = Wn<7:7>
4262 // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
4263 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4264 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4265
4266 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4267 // %2 = lshr i16 %1, 12
4268 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4269 // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
4270 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4271 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4272
4273 if (Shift >= SrcBits && IsZExt)
4274 return materializeInt(CI: ConstantInt::get(Context&: *Context, V: APInt(RegSize, 0)), VT: RetVT);
4275
4276 // It is not possible to fold a sign-extend into the LShr instruction. In this
4277 // case emit a sign-extend.
4278 if (!IsZExt) {
4279 Op0 = emitIntExt(SrcVT, SrcReg: Op0, DestVT: RetVT, isZExt: IsZExt);
4280 if (!Op0)
4281 return Register();
4282 SrcVT = RetVT;
4283 SrcBits = SrcVT.getSizeInBits();
4284 IsZExt = true;
4285 }
4286
4287 unsigned ImmR = std::min<unsigned>(a: SrcBits - 1, b: Shift);
4288 unsigned ImmS = SrcBits - 1;
4289 static const unsigned OpcTable[2][2] = {
4290 {AArch64::SBFMWri, AArch64::SBFMXri},
4291 {AArch64::UBFMWri, AArch64::UBFMXri}
4292 };
4293 unsigned Opc = OpcTable[IsZExt][Is64Bit];
4294 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4295 Register TmpReg = MRI.createVirtualRegister(RegClass: RC);
4296 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
4297 MCID: TII.get(Opcode: AArch64::SUBREG_TO_REG), DestReg: TmpReg)
4298 .addImm(Val: 0)
4299 .addReg(RegNo: Op0)
4300 .addImm(Val: AArch64::sub_32);
4301 Op0 = TmpReg;
4302 }
4303 return fastEmitInst_rii(MachineInstOpcode: Opc, RC, Op0, Imm1: ImmR, Imm2: ImmS);
4304}
4305
4306Register AArch64FastISel::emitASR_rr(MVT RetVT, Register Op0Reg,
4307 Register Op1Reg) {
4308 unsigned Opc = 0;
4309 bool NeedTrunc = false;
4310 uint64_t Mask = 0;
4311 switch (RetVT.SimpleTy) {
4312 default:
4313 return Register();
4314 case MVT::i8: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff; break;
4315 case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
4316 case MVT::i32: Opc = AArch64::ASRVWr; break;
4317 case MVT::i64: Opc = AArch64::ASRVXr; break;
4318 }
4319
4320 const TargetRegisterClass *RC =
4321 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4322 if (NeedTrunc) {
4323 Op0Reg = emitIntExt(SrcVT: RetVT, SrcReg: Op0Reg, DestVT: MVT::i32, /*isZExt=*/false);
4324 Op1Reg = emitAnd_ri(RetVT: MVT::i32, LHSReg: Op1Reg, Imm: Mask);
4325 }
4326 Register ResultReg = fastEmitInst_rr(MachineInstOpcode: Opc, RC, Op0: Op0Reg, Op1: Op1Reg);
4327 if (NeedTrunc)
4328 ResultReg = emitAnd_ri(RetVT: MVT::i32, LHSReg: ResultReg, Imm: Mask);
4329 return ResultReg;
4330}
4331
4332Register AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, Register Op0,
4333 uint64_t Shift, bool IsZExt) {
4334 assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4335 "Unexpected source/return type pair.");
4336 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4337 SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4338 "Unexpected source value type.");
4339 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4340 RetVT == MVT::i64) && "Unexpected return value type.");
4341
4342 bool Is64Bit = (RetVT == MVT::i64);
4343 unsigned RegSize = Is64Bit ? 64 : 32;
4344 unsigned DstBits = RetVT.getSizeInBits();
4345 unsigned SrcBits = SrcVT.getSizeInBits();
4346 const TargetRegisterClass *RC =
4347 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4348
4349 // Just emit a copy for "zero" shifts.
4350 if (Shift == 0) {
4351 if (RetVT == SrcVT) {
4352 Register ResultReg = createResultReg(RC);
4353 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
4354 MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: ResultReg)
4355 .addReg(RegNo: Op0);
4356 return ResultReg;
4357 } else
4358 return emitIntExt(SrcVT, SrcReg: Op0, DestVT: RetVT, isZExt: IsZExt);
4359 }
4360
4361 // Don't deal with undefined shifts.
4362 if (Shift >= DstBits)
4363 return Register();
4364
4365 // For immediate shifts we can fold the zero-/sign-extension into the shift.
4366 // {S|U}BFM Wd, Wn, #r, #s
4367 // Wd<s-r:0> = Wn<s:r> when r <= s
4368
4369 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4370 // %2 = ashr i16 %1, 4
4371 // Wd<7-4:0> = Wn<7:4>
4372 // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
4373 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4374 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4375
4376 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4377 // %2 = ashr i16 %1, 8
4378 // Wd<7-7,0> = Wn<7:7>
4379 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4380 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4381 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4382
4383 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4384 // %2 = ashr i16 %1, 12
4385 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4386 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4387 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4388 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4389
4390 if (Shift >= SrcBits && IsZExt)
4391 return materializeInt(CI: ConstantInt::get(Context&: *Context, V: APInt(RegSize, 0)), VT: RetVT);
4392
4393 unsigned ImmR = std::min<unsigned>(a: SrcBits - 1, b: Shift);
4394 unsigned ImmS = SrcBits - 1;
4395 static const unsigned OpcTable[2][2] = {
4396 {AArch64::SBFMWri, AArch64::SBFMXri},
4397 {AArch64::UBFMWri, AArch64::UBFMXri}
4398 };
4399 unsigned Opc = OpcTable[IsZExt][Is64Bit];
4400 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4401 Register TmpReg = MRI.createVirtualRegister(RegClass: RC);
4402 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
4403 MCID: TII.get(Opcode: AArch64::SUBREG_TO_REG), DestReg: TmpReg)
4404 .addImm(Val: 0)
4405 .addReg(RegNo: Op0)
4406 .addImm(Val: AArch64::sub_32);
4407 Op0 = TmpReg;
4408 }
4409 return fastEmitInst_rii(MachineInstOpcode: Opc, RC, Op0, Imm1: ImmR, Imm2: ImmS);
4410}
4411
4412Register AArch64FastISel::emitIntExt(MVT SrcVT, Register SrcReg, MVT DestVT,
4413 bool IsZExt) {
4414 assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
4415
4416 // FastISel does not have plumbing to deal with extensions where the SrcVT or
4417 // DestVT are odd things, so test to make sure that they are both types we can
4418 // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
4419 // bail out to SelectionDAG.
4420 if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
4421 (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
4422 ((SrcVT != MVT::i1) && (SrcVT != MVT::i8) &&
4423 (SrcVT != MVT::i16) && (SrcVT != MVT::i32)))
4424 return Register();
4425
4426 unsigned Opc;
4427 unsigned Imm = 0;
4428
4429 switch (SrcVT.SimpleTy) {
4430 default:
4431 return Register();
4432 case MVT::i1:
4433 return emiti1Ext(SrcReg, DestVT, IsZExt);
4434 case MVT::i8:
4435 if (DestVT == MVT::i64)
4436 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4437 else
4438 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4439 Imm = 7;
4440 break;
4441 case MVT::i16:
4442 if (DestVT == MVT::i64)
4443 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4444 else
4445 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4446 Imm = 15;
4447 break;
4448 case MVT::i32:
4449 assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
4450 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4451 Imm = 31;
4452 break;
4453 }
4454
4455 // Handle i8 and i16 as i32.
4456 if (DestVT == MVT::i8 || DestVT == MVT::i16)
4457 DestVT = MVT::i32;
4458 else if (DestVT == MVT::i64) {
4459 Register Src64 = MRI.createVirtualRegister(RegClass: &AArch64::GPR64RegClass);
4460 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
4461 MCID: TII.get(Opcode: AArch64::SUBREG_TO_REG), DestReg: Src64)
4462 .addImm(Val: 0)
4463 .addReg(RegNo: SrcReg)
4464 .addImm(Val: AArch64::sub_32);
4465 SrcReg = Src64;
4466 }
4467
4468 const TargetRegisterClass *RC =
4469 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4470 return fastEmitInst_rii(MachineInstOpcode: Opc, RC, Op0: SrcReg, Imm1: 0, Imm2: Imm);
4471}
4472
4473static bool isZExtLoad(const MachineInstr *LI) {
4474 switch (LI->getOpcode()) {
4475 default:
4476 return false;
4477 case AArch64::LDURBBi:
4478 case AArch64::LDURHHi:
4479 case AArch64::LDURWi:
4480 case AArch64::LDRBBui:
4481 case AArch64::LDRHHui:
4482 case AArch64::LDRWui:
4483 case AArch64::LDRBBroX:
4484 case AArch64::LDRHHroX:
4485 case AArch64::LDRWroX:
4486 case AArch64::LDRBBroW:
4487 case AArch64::LDRHHroW:
4488 case AArch64::LDRWroW:
4489 return true;
4490 }
4491}
4492
4493static bool isSExtLoad(const MachineInstr *LI) {
4494 switch (LI->getOpcode()) {
4495 default:
4496 return false;
4497 case AArch64::LDURSBWi:
4498 case AArch64::LDURSHWi:
4499 case AArch64::LDURSBXi:
4500 case AArch64::LDURSHXi:
4501 case AArch64::LDURSWi:
4502 case AArch64::LDRSBWui:
4503 case AArch64::LDRSHWui:
4504 case AArch64::LDRSBXui:
4505 case AArch64::LDRSHXui:
4506 case AArch64::LDRSWui:
4507 case AArch64::LDRSBWroX:
4508 case AArch64::LDRSHWroX:
4509 case AArch64::LDRSBXroX:
4510 case AArch64::LDRSHXroX:
4511 case AArch64::LDRSWroX:
4512 case AArch64::LDRSBWroW:
4513 case AArch64::LDRSHWroW:
4514 case AArch64::LDRSBXroW:
4515 case AArch64::LDRSHXroW:
4516 case AArch64::LDRSWroW:
4517 return true;
4518 }
4519}
4520
4521bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
4522 MVT SrcVT) {
4523 const auto *LI = dyn_cast<LoadInst>(Val: I->getOperand(i: 0));
4524 if (!LI || !LI->hasOneUse())
4525 return false;
4526
4527 // Check if the load instruction has already been selected.
4528 Register Reg = lookUpRegForValue(V: LI);
4529 if (!Reg)
4530 return false;
4531
4532 MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
4533 if (!MI)
4534 return false;
4535
4536 // Check if the correct load instruction has been emitted - SelectionDAG might
4537 // have emitted a zero-extending load, but we need a sign-extending load.
4538 bool IsZExt = isa<ZExtInst>(Val: I);
4539 const auto *LoadMI = MI;
4540 if (LoadMI->getOpcode() == TargetOpcode::COPY &&
4541 LoadMI->getOperand(i: 1).getSubReg() == AArch64::sub_32) {
4542 Register LoadReg = MI->getOperand(i: 1).getReg();
4543 LoadMI = MRI.getUniqueVRegDef(Reg: LoadReg);
4544 assert(LoadMI && "Expected valid instruction");
4545 }
4546 if (!(IsZExt && isZExtLoad(LI: LoadMI)) && !(!IsZExt && isSExtLoad(LI: LoadMI)))
4547 return false;
4548
4549 // Nothing to be done.
4550 if (RetVT != MVT::i64 || SrcVT > MVT::i32) {
4551 updateValueMap(I, Reg);
4552 return true;
4553 }
4554
4555 if (IsZExt) {
4556 Register Reg64 = createResultReg(RC: &AArch64::GPR64RegClass);
4557 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
4558 MCID: TII.get(Opcode: AArch64::SUBREG_TO_REG), DestReg: Reg64)
4559 .addImm(Val: 0)
4560 .addReg(RegNo: Reg, Flags: getKillRegState(B: true))
4561 .addImm(Val: AArch64::sub_32);
4562 Reg = Reg64;
4563 } else {
4564 assert((MI->getOpcode() == TargetOpcode::COPY &&
4565 MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
4566 "Expected copy instruction");
4567 Reg = MI->getOperand(i: 1).getReg();
4568 MachineBasicBlock::iterator I(MI);
4569 removeDeadCode(I, E: std::next(x: I));
4570 }
4571 updateValueMap(I, Reg);
4572 return true;
4573}
4574
4575bool AArch64FastISel::selectIntExt(const Instruction *I) {
4576 assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
4577 "Unexpected integer extend instruction.");
4578 MVT RetVT;
4579 MVT SrcVT;
4580 if (!isTypeSupported(Ty: I->getType(), VT&: RetVT))
4581 return false;
4582
4583 if (!isTypeSupported(Ty: I->getOperand(i: 0)->getType(), VT&: SrcVT))
4584 return false;
4585
4586 // Try to optimize already sign-/zero-extended values from load instructions.
4587 if (optimizeIntExtLoad(I, RetVT, SrcVT))
4588 return true;
4589
4590 Register SrcReg = getRegForValue(V: I->getOperand(i: 0));
4591 if (!SrcReg)
4592 return false;
4593
4594 // Try to optimize already sign-/zero-extended values from function arguments.
4595 bool IsZExt = isa<ZExtInst>(Val: I);
4596 if (const auto *Arg = dyn_cast<Argument>(Val: I->getOperand(i: 0))) {
4597 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
4598 if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
4599 Register ResultReg = createResultReg(RC: &AArch64::GPR64RegClass);
4600 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD,
4601 MCID: TII.get(Opcode: AArch64::SUBREG_TO_REG), DestReg: ResultReg)
4602 .addImm(Val: 0)
4603 .addReg(RegNo: SrcReg)
4604 .addImm(Val: AArch64::sub_32);
4605 SrcReg = ResultReg;
4606 }
4607
4608 updateValueMap(I, Reg: SrcReg);
4609 return true;
4610 }
4611 }
4612
4613 Register ResultReg = emitIntExt(SrcVT, SrcReg, DestVT: RetVT, IsZExt);
4614 if (!ResultReg)
4615 return false;
4616
4617 updateValueMap(I, Reg: ResultReg);
4618 return true;
4619}
4620
4621bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
4622 EVT DestEVT = TLI.getValueType(DL, Ty: I->getType(), AllowUnknown: true);
4623 if (!DestEVT.isSimple())
4624 return false;
4625
4626 MVT DestVT = DestEVT.getSimpleVT();
4627 if (DestVT != MVT::i64 && DestVT != MVT::i32)
4628 return false;
4629
4630 unsigned DivOpc;
4631 bool Is64bit = (DestVT == MVT::i64);
4632 switch (ISDOpcode) {
4633 default:
4634 return false;
4635 case ISD::SREM:
4636 DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
4637 break;
4638 case ISD::UREM:
4639 DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
4640 break;
4641 }
4642 unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
4643 Register Src0Reg = getRegForValue(V: I->getOperand(i: 0));
4644 if (!Src0Reg)
4645 return false;
4646
4647 Register Src1Reg = getRegForValue(V: I->getOperand(i: 1));
4648 if (!Src1Reg)
4649 return false;
4650
4651 const TargetRegisterClass *RC =
4652 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4653 Register QuotReg = fastEmitInst_rr(MachineInstOpcode: DivOpc, RC, Op0: Src0Reg, Op1: Src1Reg);
4654 assert(QuotReg && "Unexpected DIV instruction emission failure.");
4655 // The remainder is computed as numerator - (quotient * denominator) using the
4656 // MSUB instruction.
4657 Register ResultReg = fastEmitInst_rrr(MachineInstOpcode: MSubOpc, RC, Op0: QuotReg, Op1: Src1Reg, Op2: Src0Reg);
4658 updateValueMap(I, Reg: ResultReg);
4659 return true;
4660}
4661
4662bool AArch64FastISel::selectMul(const Instruction *I) {
4663 MVT VT;
4664 if (!isTypeSupported(Ty: I->getType(), VT, /*IsVectorAllowed=*/true))
4665 return false;
4666
4667 if (VT.isVector())
4668 return selectBinaryOp(I, ISDOpcode: ISD::MUL);
4669
4670 const Value *Src0 = I->getOperand(i: 0);
4671 const Value *Src1 = I->getOperand(i: 1);
4672 if (const auto *C = dyn_cast<ConstantInt>(Val: Src0))
4673 if (C->getValue().isPowerOf2())
4674 std::swap(a&: Src0, b&: Src1);
4675
4676 // Try to simplify to a shift instruction.
4677 if (const auto *C = dyn_cast<ConstantInt>(Val: Src1))
4678 if (C->getValue().isPowerOf2()) {
4679 uint64_t ShiftVal = C->getValue().logBase2();
4680 MVT SrcVT = VT;
4681 bool IsZExt = true;
4682 if (const auto *ZExt = dyn_cast<ZExtInst>(Val: Src0)) {
4683 if (!isIntExtFree(I: ZExt)) {
4684 MVT VT;
4685 if (isValueAvailable(V: ZExt) && isTypeSupported(Ty: ZExt->getSrcTy(), VT)) {
4686 SrcVT = VT;
4687 IsZExt = true;
4688 Src0 = ZExt->getOperand(i_nocapture: 0);
4689 }
4690 }
4691 } else if (const auto *SExt = dyn_cast<SExtInst>(Val: Src0)) {
4692 if (!isIntExtFree(I: SExt)) {
4693 MVT VT;
4694 if (isValueAvailable(V: SExt) && isTypeSupported(Ty: SExt->getSrcTy(), VT)) {
4695 SrcVT = VT;
4696 IsZExt = false;
4697 Src0 = SExt->getOperand(i_nocapture: 0);
4698 }
4699 }
4700 }
4701
4702 Register Src0Reg = getRegForValue(V: Src0);
4703 if (!Src0Reg)
4704 return false;
4705
4706 Register ResultReg = emitLSL_ri(RetVT: VT, SrcVT, Op0: Src0Reg, Shift: ShiftVal, IsZExt);
4707
4708 if (ResultReg) {
4709 updateValueMap(I, Reg: ResultReg);
4710 return true;
4711 }
4712 }
4713
4714 Register Src0Reg = getRegForValue(V: I->getOperand(i: 0));
4715 if (!Src0Reg)
4716 return false;
4717
4718 Register Src1Reg = getRegForValue(V: I->getOperand(i: 1));
4719 if (!Src1Reg)
4720 return false;
4721
4722 Register ResultReg = emitMul_rr(RetVT: VT, Op0: Src0Reg, Op1: Src1Reg);
4723
4724 if (!ResultReg)
4725 return false;
4726
4727 updateValueMap(I, Reg: ResultReg);
4728 return true;
4729}
4730
4731bool AArch64FastISel::selectShift(const Instruction *I) {
4732 MVT RetVT;
4733 if (!isTypeSupported(Ty: I->getType(), VT&: RetVT, /*IsVectorAllowed=*/true))
4734 return false;
4735
4736 if (RetVT.isVector())
4737 return selectOperator(I, Opcode: I->getOpcode());
4738
4739 if (const auto *C = dyn_cast<ConstantInt>(Val: I->getOperand(i: 1))) {
4740 Register ResultReg;
4741 uint64_t ShiftVal = C->getZExtValue();
4742 MVT SrcVT = RetVT;
4743 bool IsZExt = I->getOpcode() != Instruction::AShr;
4744 const Value *Op0 = I->getOperand(i: 0);
4745 if (const auto *ZExt = dyn_cast<ZExtInst>(Val: Op0)) {
4746 if (!isIntExtFree(I: ZExt)) {
4747 MVT TmpVT;
4748 if (isValueAvailable(V: ZExt) && isTypeSupported(Ty: ZExt->getSrcTy(), VT&: TmpVT)) {
4749 SrcVT = TmpVT;
4750 IsZExt = true;
4751 Op0 = ZExt->getOperand(i_nocapture: 0);
4752 }
4753 }
4754 } else if (const auto *SExt = dyn_cast<SExtInst>(Val: Op0)) {
4755 if (!isIntExtFree(I: SExt)) {
4756 MVT TmpVT;
4757 if (isValueAvailable(V: SExt) && isTypeSupported(Ty: SExt->getSrcTy(), VT&: TmpVT)) {
4758 SrcVT = TmpVT;
4759 IsZExt = false;
4760 Op0 = SExt->getOperand(i_nocapture: 0);
4761 }
4762 }
4763 }
4764
4765 Register Op0Reg = getRegForValue(V: Op0);
4766 if (!Op0Reg)
4767 return false;
4768
4769 switch (I->getOpcode()) {
4770 default: llvm_unreachable("Unexpected instruction.");
4771 case Instruction::Shl:
4772 ResultReg = emitLSL_ri(RetVT, SrcVT, Op0: Op0Reg, Shift: ShiftVal, IsZExt);
4773 break;
4774 case Instruction::AShr:
4775 ResultReg = emitASR_ri(RetVT, SrcVT, Op0: Op0Reg, Shift: ShiftVal, IsZExt);
4776 break;
4777 case Instruction::LShr:
4778 ResultReg = emitLSR_ri(RetVT, SrcVT, Op0: Op0Reg, Shift: ShiftVal, IsZExt);
4779 break;
4780 }
4781 if (!ResultReg)
4782 return false;
4783
4784 updateValueMap(I, Reg: ResultReg);
4785 return true;
4786 }
4787
4788 Register Op0Reg = getRegForValue(V: I->getOperand(i: 0));
4789 if (!Op0Reg)
4790 return false;
4791
4792 Register Op1Reg = getRegForValue(V: I->getOperand(i: 1));
4793 if (!Op1Reg)
4794 return false;
4795
4796 Register ResultReg;
4797 switch (I->getOpcode()) {
4798 default: llvm_unreachable("Unexpected instruction.");
4799 case Instruction::Shl:
4800 ResultReg = emitLSL_rr(RetVT, Op0Reg, Op1Reg);
4801 break;
4802 case Instruction::AShr:
4803 ResultReg = emitASR_rr(RetVT, Op0Reg, Op1Reg);
4804 break;
4805 case Instruction::LShr:
4806 ResultReg = emitLSR_rr(RetVT, Op0Reg, Op1Reg);
4807 break;
4808 }
4809
4810 if (!ResultReg)
4811 return false;
4812
4813 updateValueMap(I, Reg: ResultReg);
4814 return true;
4815}
4816
4817bool AArch64FastISel::selectBitCast(const Instruction *I) {
4818 MVT RetVT, SrcVT;
4819
4820 if (!isTypeLegal(Ty: I->getOperand(i: 0)->getType(), VT&: SrcVT))
4821 return false;
4822 if (!isTypeLegal(Ty: I->getType(), VT&: RetVT))
4823 return false;
4824
4825 unsigned Opc;
4826 if (RetVT == MVT::f32 && SrcVT == MVT::i32)
4827 Opc = AArch64::FMOVWSr;
4828 else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
4829 Opc = AArch64::FMOVXDr;
4830 else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
4831 Opc = AArch64::FMOVSWr;
4832 else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
4833 Opc = AArch64::FMOVDXr;
4834 else
4835 return false;
4836
4837 const TargetRegisterClass *RC = nullptr;
4838 switch (RetVT.SimpleTy) {
4839 default: llvm_unreachable("Unexpected value type.");
4840 case MVT::i32: RC = &AArch64::GPR32RegClass; break;
4841 case MVT::i64: RC = &AArch64::GPR64RegClass; break;
4842 case MVT::f32: RC = &AArch64::FPR32RegClass; break;
4843 case MVT::f64: RC = &AArch64::FPR64RegClass; break;
4844 }
4845 Register Op0Reg = getRegForValue(V: I->getOperand(i: 0));
4846 if (!Op0Reg)
4847 return false;
4848
4849 Register ResultReg = fastEmitInst_r(MachineInstOpcode: Opc, RC, Op0: Op0Reg);
4850 if (!ResultReg)
4851 return false;
4852
4853 updateValueMap(I, Reg: ResultReg);
4854 return true;
4855}
4856
4857bool AArch64FastISel::selectFRem(const Instruction *I) {
4858 MVT RetVT;
4859 if (!isTypeLegal(Ty: I->getType(), VT&: RetVT))
4860 return false;
4861
4862 RTLIB::LibcallImpl LCImpl =
4863 LibcallLowering->getLibcallImpl(Call: RTLIB::getREM(VT: RetVT));
4864 if (LCImpl == RTLIB::Unsupported)
4865 return false;
4866
4867 ArgListTy Args;
4868 Args.reserve(n: I->getNumOperands());
4869
4870 // Populate the argument list.
4871 for (auto &Arg : I->operands())
4872 Args.emplace_back(args: Arg);
4873
4874 CallLoweringInfo CLI;
4875 MCContext &Ctx = MF->getContext();
4876 CallingConv::ID CC = LibcallLowering->getLibcallImplCallingConv(Call: LCImpl);
4877 StringRef FuncName = RTLIB::RuntimeLibcallsInfo::getLibcallImplName(CallImpl: LCImpl);
4878
4879 CLI.setCallee(DL, Ctx, CC, ResultTy: I->getType(), Target: FuncName, ArgsList: std::move(Args));
4880 if (!lowerCallTo(CLI))
4881 return false;
4882 updateValueMap(I, Reg: CLI.ResultReg);
4883 return true;
4884}
4885
4886bool AArch64FastISel::selectSDiv(const Instruction *I) {
4887 MVT VT;
4888 if (!isTypeLegal(Ty: I->getType(), VT))
4889 return false;
4890
4891 if (!isa<ConstantInt>(Val: I->getOperand(i: 1)))
4892 return selectBinaryOp(I, ISDOpcode: ISD::SDIV);
4893
4894 const APInt &C = cast<ConstantInt>(Val: I->getOperand(i: 1))->getValue();
4895 if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
4896 !(C.isPowerOf2() || C.isNegatedPowerOf2()))
4897 return selectBinaryOp(I, ISDOpcode: ISD::SDIV);
4898
4899 unsigned Lg2 = C.countr_zero();
4900 Register Src0Reg = getRegForValue(V: I->getOperand(i: 0));
4901 if (!Src0Reg)
4902 return false;
4903
4904 if (cast<BinaryOperator>(Val: I)->isExact()) {
4905 Register ResultReg = emitASR_ri(RetVT: VT, SrcVT: VT, Op0: Src0Reg, Shift: Lg2);
4906 if (!ResultReg)
4907 return false;
4908 updateValueMap(I, Reg: ResultReg);
4909 return true;
4910 }
4911
4912 int64_t Pow2MinusOne = (1ULL << Lg2) - 1;
4913 Register AddReg = emitAdd_ri_(VT, Op0: Src0Reg, Imm: Pow2MinusOne);
4914 if (!AddReg)
4915 return false;
4916
4917 // (Src0 < 0) ? Pow2 - 1 : 0;
4918 if (!emitICmp_ri(RetVT: VT, LHSReg: Src0Reg, Imm: 0))
4919 return false;
4920
4921 unsigned SelectOpc;
4922 const TargetRegisterClass *RC;
4923 if (VT == MVT::i64) {
4924 SelectOpc = AArch64::CSELXr;
4925 RC = &AArch64::GPR64RegClass;
4926 } else {
4927 SelectOpc = AArch64::CSELWr;
4928 RC = &AArch64::GPR32RegClass;
4929 }
4930 Register SelectReg = fastEmitInst_rri(MachineInstOpcode: SelectOpc, RC, Op0: AddReg, Op1: Src0Reg,
4931 Imm: AArch64CC::LT);
4932 if (!SelectReg)
4933 return false;
4934
4935 // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
4936 // negate the result.
4937 Register ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
4938 Register ResultReg;
4939 if (C.isNegative())
4940 ResultReg = emitAddSub_rs(/*UseAdd=*/false, RetVT: VT, LHSReg: ZeroReg, RHSReg: SelectReg,
4941 ShiftType: AArch64_AM::ASR, ShiftImm: Lg2);
4942 else
4943 ResultReg = emitASR_ri(RetVT: VT, SrcVT: VT, Op0: SelectReg, Shift: Lg2);
4944
4945 if (!ResultReg)
4946 return false;
4947
4948 updateValueMap(I, Reg: ResultReg);
4949 return true;
4950}
4951
4952/// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We
4953/// have to duplicate it for AArch64, because otherwise we would fail during the
4954/// sign-extend emission.
4955Register AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
4956 Register IdxN = getRegForValue(V: Idx);
4957 if (!IdxN)
4958 // Unhandled operand. Halt "fast" selection and bail.
4959 return Register();
4960
4961 // If the index is smaller or larger than intptr_t, truncate or extend it.
4962 MVT PtrVT = TLI.getPointerTy(DL);
4963 EVT IdxVT = EVT::getEVT(Ty: Idx->getType(), /*HandleUnknown=*/false);
4964 if (IdxVT.bitsLT(VT: PtrVT)) {
4965 IdxN = emitIntExt(SrcVT: IdxVT.getSimpleVT(), SrcReg: IdxN, DestVT: PtrVT, /*isZExt=*/IsZExt: false);
4966 } else if (IdxVT.bitsGT(VT: PtrVT))
4967 llvm_unreachable("AArch64 FastISel doesn't support types larger than i64");
4968 return IdxN;
4969}
4970
4971/// This is mostly a copy of the existing FastISel GEP code, but we have to
4972/// duplicate it for AArch64, because otherwise we would bail out even for
4973/// simple cases. This is because the standard fastEmit functions don't cover
4974/// MUL at all and ADD is lowered very inefficientily.
4975bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
4976 if (Subtarget->isTargetILP32())
4977 return false;
4978
4979 Register N = getRegForValue(V: I->getOperand(i: 0));
4980 if (!N)
4981 return false;
4982
4983 // Keep a running tab of the total offset to coalesce multiple N = N + Offset
4984 // into a single N = N + TotalOffset.
4985 uint64_t TotalOffs = 0;
4986 MVT VT = TLI.getPointerTy(DL);
4987 for (gep_type_iterator GTI = gep_type_begin(GEP: I), E = gep_type_end(GEP: I);
4988 GTI != E; ++GTI) {
4989 const Value *Idx = GTI.getOperand();
4990 if (auto *StTy = GTI.getStructTypeOrNull()) {
4991 unsigned Field = cast<ConstantInt>(Val: Idx)->getZExtValue();
4992 // N = N + Offset
4993 if (Field)
4994 TotalOffs += DL.getStructLayout(Ty: StTy)->getElementOffset(Idx: Field);
4995 } else {
4996 // If this is a constant subscript, handle it quickly.
4997 if (const auto *CI = dyn_cast<ConstantInt>(Val: Idx)) {
4998 if (CI->isZero())
4999 continue;
5000 // N = N + Offset
5001 TotalOffs += GTI.getSequentialElementStride(DL) *
5002 cast<ConstantInt>(Val: CI)->getSExtValue();
5003 continue;
5004 }
5005 if (TotalOffs) {
5006 N = emitAdd_ri_(VT, Op0: N, Imm: TotalOffs);
5007 if (!N)
5008 return false;
5009 TotalOffs = 0;
5010 }
5011
5012 // N = N + Idx * ElementSize;
5013 uint64_t ElementSize = GTI.getSequentialElementStride(DL);
5014 Register IdxN = getRegForGEPIndex(Idx);
5015 if (!IdxN)
5016 return false;
5017
5018 if (ElementSize != 1) {
5019 Register C = fastEmit_i(VT, RetVT: VT, Opcode: ISD::Constant, imm0: ElementSize);
5020 if (!C)
5021 return false;
5022 IdxN = emitMul_rr(RetVT: VT, Op0: IdxN, Op1: C);
5023 if (!IdxN)
5024 return false;
5025 }
5026 N = fastEmit_rr(VT, RetVT: VT, Opcode: ISD::ADD, Op0: N, Op1: IdxN);
5027 if (!N)
5028 return false;
5029 }
5030 }
5031 if (TotalOffs) {
5032 N = emitAdd_ri_(VT, Op0: N, Imm: TotalOffs);
5033 if (!N)
5034 return false;
5035 }
5036 updateValueMap(I, Reg: N);
5037 return true;
5038}
5039
5040bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) {
5041 assert(TM.getOptLevel() == CodeGenOptLevel::None &&
5042 "cmpxchg survived AtomicExpand at optlevel > -O0");
5043
5044 auto *RetPairTy = cast<StructType>(Val: I->getType());
5045 Type *RetTy = RetPairTy->getTypeAtIndex(N: 0U);
5046 assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) &&
5047 "cmpxchg has a non-i1 status result");
5048
5049 MVT VT;
5050 if (!isTypeLegal(Ty: RetTy, VT))
5051 return false;
5052
5053 const TargetRegisterClass *ResRC;
5054 unsigned Opc, CmpOpc;
5055 // This only supports i32/i64, because i8/i16 aren't legal, and the generic
5056 // extractvalue selection doesn't support that.
5057 if (VT == MVT::i32) {
5058 Opc = AArch64::CMP_SWAP_32;
5059 CmpOpc = AArch64::SUBSWrs;
5060 ResRC = &AArch64::GPR32RegClass;
5061 } else if (VT == MVT::i64) {
5062 Opc = AArch64::CMP_SWAP_64;
5063 CmpOpc = AArch64::SUBSXrs;
5064 ResRC = &AArch64::GPR64RegClass;
5065 } else {
5066 return false;
5067 }
5068
5069 const MCInstrDesc &II = TII.get(Opcode: Opc);
5070
5071 Register AddrReg = getRegForValue(V: I->getPointerOperand());
5072 Register DesiredReg = getRegForValue(V: I->getCompareOperand());
5073 Register NewReg = getRegForValue(V: I->getNewValOperand());
5074
5075 if (!AddrReg || !DesiredReg || !NewReg)
5076 return false;
5077
5078 AddrReg = constrainOperandRegClass(II, Op: AddrReg, OpNum: II.getNumDefs());
5079 DesiredReg = constrainOperandRegClass(II, Op: DesiredReg, OpNum: II.getNumDefs() + 1);
5080 NewReg = constrainOperandRegClass(II, Op: NewReg, OpNum: II.getNumDefs() + 2);
5081
5082 const Register ResultReg1 = createResultReg(RC: ResRC);
5083 const Register ResultReg2 = createResultReg(RC: &AArch64::GPR32RegClass);
5084 const Register ScratchReg = createResultReg(RC: &AArch64::GPR32RegClass);
5085
5086 // FIXME: MachineMemOperand doesn't support cmpxchg yet.
5087 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II)
5088 .addDef(RegNo: ResultReg1)
5089 .addDef(RegNo: ScratchReg)
5090 .addUse(RegNo: AddrReg)
5091 .addUse(RegNo: DesiredReg)
5092 .addUse(RegNo: NewReg);
5093
5094 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: CmpOpc))
5095 .addDef(RegNo: VT == MVT::i32 ? AArch64::WZR : AArch64::XZR)
5096 .addUse(RegNo: ResultReg1)
5097 .addUse(RegNo: DesiredReg)
5098 .addImm(Val: 0);
5099
5100 BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AArch64::CSINCWr))
5101 .addDef(RegNo: ResultReg2)
5102 .addUse(RegNo: AArch64::WZR)
5103 .addUse(RegNo: AArch64::WZR)
5104 .addImm(Val: AArch64CC::NE);
5105
5106 assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers.");
5107 updateValueMap(I, Reg: ResultReg1, NumRegs: 2);
5108 return true;
5109}
5110
5111bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
5112 if (TLI.fallBackToDAGISel(Inst: *I))
5113 return false;
5114 switch (I->getOpcode()) {
5115 default:
5116 break;
5117 case Instruction::Add:
5118 case Instruction::Sub:
5119 return selectAddSub(I);
5120 case Instruction::Mul:
5121 return selectMul(I);
5122 case Instruction::SDiv:
5123 return selectSDiv(I);
5124 case Instruction::SRem:
5125 if (!selectBinaryOp(I, ISDOpcode: ISD::SREM))
5126 return selectRem(I, ISDOpcode: ISD::SREM);
5127 return true;
5128 case Instruction::URem:
5129 if (!selectBinaryOp(I, ISDOpcode: ISD::UREM))
5130 return selectRem(I, ISDOpcode: ISD::UREM);
5131 return true;
5132 case Instruction::Shl:
5133 case Instruction::LShr:
5134 case Instruction::AShr:
5135 return selectShift(I);
5136 case Instruction::And:
5137 case Instruction::Or:
5138 case Instruction::Xor:
5139 return selectLogicalOp(I);
5140 case Instruction::Br:
5141 return selectBranch(I);
5142 case Instruction::IndirectBr:
5143 return selectIndirectBr(I);
5144 case Instruction::BitCast:
5145 if (!FastISel::selectBitCast(I))
5146 return selectBitCast(I);
5147 return true;
5148 case Instruction::FPToSI:
5149 if (!selectCast(I, Opcode: ISD::FP_TO_SINT))
5150 return selectFPToInt(I, /*Signed=*/true);
5151 return true;
5152 case Instruction::FPToUI:
5153 return selectFPToInt(I, /*Signed=*/false);
5154 case Instruction::ZExt:
5155 case Instruction::SExt:
5156 return selectIntExt(I);
5157 case Instruction::Trunc:
5158 if (!selectCast(I, Opcode: ISD::TRUNCATE))
5159 return selectTrunc(I);
5160 return true;
5161 case Instruction::FPExt:
5162 return selectFPExt(I);
5163 case Instruction::FPTrunc:
5164 return selectFPTrunc(I);
5165 case Instruction::SIToFP:
5166 if (!selectCast(I, Opcode: ISD::SINT_TO_FP))
5167 return selectIntToFP(I, /*Signed=*/true);
5168 return true;
5169 case Instruction::UIToFP:
5170 return selectIntToFP(I, /*Signed=*/false);
5171 case Instruction::Load:
5172 return selectLoad(I);
5173 case Instruction::Store:
5174 return selectStore(I);
5175 case Instruction::FCmp:
5176 case Instruction::ICmp:
5177 return selectCmp(I);
5178 case Instruction::Select:
5179 return selectSelect(I);
5180 case Instruction::Ret:
5181 return selectRet(I);
5182 case Instruction::FRem:
5183 return selectFRem(I);
5184 case Instruction::GetElementPtr:
5185 return selectGetElementPtr(I);
5186 case Instruction::AtomicCmpXchg:
5187 return selectAtomicCmpXchg(I: cast<AtomicCmpXchgInst>(Val: I));
5188 }
5189
5190 // fall-back to target-independent instruction selection.
5191 return selectOperator(I, Opcode: I->getOpcode());
5192}
5193
5194FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo,
5195 const TargetLibraryInfo *LibInfo,
5196 const LibcallLoweringInfo *LibcallLowering) {
5197
5198 SMEAttrs CallerAttrs =
5199 FuncInfo.MF->getInfo<AArch64FunctionInfo>()->getSMEFnAttrs();
5200 if (CallerAttrs.hasZAState() || CallerAttrs.hasZT0State() ||
5201 CallerAttrs.hasStreamingInterfaceOrBody() ||
5202 CallerAttrs.hasStreamingCompatibleInterface() ||
5203 CallerAttrs.hasAgnosticZAInterface())
5204 return nullptr;
5205 return new AArch64FastISel(FuncInfo, LibInfo, LibcallLowering);
5206}
5207