1 | //===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines the AArch64-specific support for the FastISel class. Some |
10 | // of the target-specific code is generated by tablegen in the file |
11 | // AArch64GenFastISel.inc, which is #included here. |
12 | // |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #include "AArch64.h" |
16 | #include "AArch64CallingConvention.h" |
17 | #include "AArch64MachineFunctionInfo.h" |
18 | #include "AArch64RegisterInfo.h" |
19 | #include "AArch64Subtarget.h" |
20 | #include "MCTargetDesc/AArch64AddressingModes.h" |
21 | #include "Utils/AArch64BaseInfo.h" |
22 | #include "llvm/ADT/APFloat.h" |
23 | #include "llvm/ADT/APInt.h" |
24 | #include "llvm/ADT/DenseMap.h" |
25 | #include "llvm/ADT/SmallVector.h" |
26 | #include "llvm/Analysis/BranchProbabilityInfo.h" |
27 | #include "llvm/CodeGen/CallingConvLower.h" |
28 | #include "llvm/CodeGen/FastISel.h" |
29 | #include "llvm/CodeGen/FunctionLoweringInfo.h" |
30 | #include "llvm/CodeGen/ISDOpcodes.h" |
31 | #include "llvm/CodeGen/MachineBasicBlock.h" |
32 | #include "llvm/CodeGen/MachineConstantPool.h" |
33 | #include "llvm/CodeGen/MachineFrameInfo.h" |
34 | #include "llvm/CodeGen/MachineInstr.h" |
35 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
36 | #include "llvm/CodeGen/MachineMemOperand.h" |
37 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
38 | #include "llvm/CodeGen/RuntimeLibcallUtil.h" |
39 | #include "llvm/CodeGen/ValueTypes.h" |
40 | #include "llvm/CodeGenTypes/MachineValueType.h" |
41 | #include "llvm/IR/Argument.h" |
42 | #include "llvm/IR/Attributes.h" |
43 | #include "llvm/IR/BasicBlock.h" |
44 | #include "llvm/IR/CallingConv.h" |
45 | #include "llvm/IR/Constant.h" |
46 | #include "llvm/IR/Constants.h" |
47 | #include "llvm/IR/DataLayout.h" |
48 | #include "llvm/IR/DerivedTypes.h" |
49 | #include "llvm/IR/Function.h" |
50 | #include "llvm/IR/GetElementPtrTypeIterator.h" |
51 | #include "llvm/IR/GlobalValue.h" |
52 | #include "llvm/IR/InstrTypes.h" |
53 | #include "llvm/IR/Instruction.h" |
54 | #include "llvm/IR/Instructions.h" |
55 | #include "llvm/IR/IntrinsicInst.h" |
56 | #include "llvm/IR/Intrinsics.h" |
57 | #include "llvm/IR/IntrinsicsAArch64.h" |
58 | #include "llvm/IR/Module.h" |
59 | #include "llvm/IR/Operator.h" |
60 | #include "llvm/IR/Type.h" |
61 | #include "llvm/IR/User.h" |
62 | #include "llvm/IR/Value.h" |
63 | #include "llvm/MC/MCInstrDesc.h" |
64 | #include "llvm/MC/MCRegisterInfo.h" |
65 | #include "llvm/MC/MCSymbol.h" |
66 | #include "llvm/Support/AtomicOrdering.h" |
67 | #include "llvm/Support/Casting.h" |
68 | #include "llvm/Support/CodeGen.h" |
69 | #include "llvm/Support/Compiler.h" |
70 | #include "llvm/Support/ErrorHandling.h" |
71 | #include "llvm/Support/MathExtras.h" |
72 | #include <algorithm> |
73 | #include <cassert> |
74 | #include <cstdint> |
75 | #include <iterator> |
76 | #include <utility> |
77 | |
78 | using namespace llvm; |
79 | |
80 | namespace { |
81 | |
82 | class AArch64FastISel final : public FastISel { |
83 | class Address { |
84 | public: |
85 | using BaseKind = enum { |
86 | RegBase, |
87 | FrameIndexBase |
88 | }; |
89 | |
90 | private: |
91 | BaseKind Kind = RegBase; |
92 | AArch64_AM::ShiftExtendType ExtType = AArch64_AM::InvalidShiftExtend; |
93 | union { |
94 | unsigned Reg; |
95 | int FI; |
96 | } Base; |
97 | unsigned OffsetReg = 0; |
98 | unsigned Shift = 0; |
99 | int64_t Offset = 0; |
100 | const GlobalValue *GV = nullptr; |
101 | |
102 | public: |
103 | Address() { Base.Reg = 0; } |
104 | |
105 | void setKind(BaseKind K) { Kind = K; } |
106 | BaseKind getKind() const { return Kind; } |
107 | void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; } |
108 | AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; } |
109 | bool isRegBase() const { return Kind == RegBase; } |
110 | bool isFIBase() const { return Kind == FrameIndexBase; } |
111 | |
112 | void setReg(unsigned Reg) { |
113 | assert(isRegBase() && "Invalid base register access!" ); |
114 | Base.Reg = Reg; |
115 | } |
116 | |
117 | unsigned getReg() const { |
118 | assert(isRegBase() && "Invalid base register access!" ); |
119 | return Base.Reg; |
120 | } |
121 | |
122 | void setOffsetReg(unsigned Reg) { |
123 | OffsetReg = Reg; |
124 | } |
125 | |
126 | unsigned getOffsetReg() const { |
127 | return OffsetReg; |
128 | } |
129 | |
130 | void setFI(unsigned FI) { |
131 | assert(isFIBase() && "Invalid base frame index access!" ); |
132 | Base.FI = FI; |
133 | } |
134 | |
135 | unsigned getFI() const { |
136 | assert(isFIBase() && "Invalid base frame index access!" ); |
137 | return Base.FI; |
138 | } |
139 | |
140 | void setOffset(int64_t O) { Offset = O; } |
141 | int64_t getOffset() { return Offset; } |
142 | void setShift(unsigned S) { Shift = S; } |
143 | unsigned getShift() { return Shift; } |
144 | |
145 | void setGlobalValue(const GlobalValue *G) { GV = G; } |
146 | const GlobalValue *getGlobalValue() { return GV; } |
147 | }; |
148 | |
149 | /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can |
150 | /// make the right decision when generating code for different targets. |
151 | const AArch64Subtarget *Subtarget; |
152 | LLVMContext *Context; |
153 | |
154 | bool fastLowerArguments() override; |
155 | bool fastLowerCall(CallLoweringInfo &CLI) override; |
156 | bool fastLowerIntrinsicCall(const IntrinsicInst *II) override; |
157 | |
158 | private: |
159 | // Selection routines. |
160 | bool selectAddSub(const Instruction *I); |
161 | bool selectLogicalOp(const Instruction *I); |
162 | bool selectLoad(const Instruction *I); |
163 | bool selectStore(const Instruction *I); |
164 | bool selectBranch(const Instruction *I); |
165 | bool selectIndirectBr(const Instruction *I); |
166 | bool selectCmp(const Instruction *I); |
167 | bool selectSelect(const Instruction *I); |
168 | bool selectFPExt(const Instruction *I); |
169 | bool selectFPTrunc(const Instruction *I); |
170 | bool selectFPToInt(const Instruction *I, bool Signed); |
171 | bool selectIntToFP(const Instruction *I, bool Signed); |
172 | bool selectRem(const Instruction *I, unsigned ISDOpcode); |
173 | bool selectRet(const Instruction *I); |
174 | bool selectTrunc(const Instruction *I); |
175 | bool selectIntExt(const Instruction *I); |
176 | bool selectMul(const Instruction *I); |
177 | bool selectShift(const Instruction *I); |
178 | bool selectBitCast(const Instruction *I); |
179 | bool selectFRem(const Instruction *I); |
180 | bool selectSDiv(const Instruction *I); |
181 | bool selectGetElementPtr(const Instruction *I); |
182 | bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I); |
183 | |
184 | // Utility helper routines. |
185 | bool isTypeLegal(Type *Ty, MVT &VT); |
186 | bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false); |
187 | bool isValueAvailable(const Value *V) const; |
188 | bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr); |
189 | bool computeCallAddress(const Value *V, Address &Addr); |
190 | bool simplifyAddress(Address &Addr, MVT VT); |
191 | void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB, |
192 | MachineMemOperand::Flags Flags, |
193 | unsigned ScaleFactor, MachineMemOperand *MMO); |
194 | bool isMemCpySmall(uint64_t Len, MaybeAlign Alignment); |
195 | bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len, |
196 | MaybeAlign Alignment); |
197 | bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I, |
198 | const Value *Cond); |
199 | bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT); |
200 | bool optimizeSelect(const SelectInst *SI); |
201 | unsigned getRegForGEPIndex(const Value *Idx); |
202 | |
203 | // Emit helper routines. |
204 | unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS, |
205 | const Value *RHS, bool SetFlags = false, |
206 | bool WantResult = true, bool IsZExt = false); |
207 | unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg, |
208 | unsigned RHSReg, bool SetFlags = false, |
209 | bool WantResult = true); |
210 | unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg, |
211 | uint64_t Imm, bool SetFlags = false, |
212 | bool WantResult = true); |
213 | unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg, |
214 | unsigned RHSReg, AArch64_AM::ShiftExtendType ShiftType, |
215 | uint64_t ShiftImm, bool SetFlags = false, |
216 | bool WantResult = true); |
217 | unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg, |
218 | unsigned RHSReg, AArch64_AM::ShiftExtendType ExtType, |
219 | uint64_t ShiftImm, bool SetFlags = false, |
220 | bool WantResult = true); |
221 | |
222 | // Emit functions. |
223 | bool emitCompareAndBranch(const BranchInst *BI); |
224 | bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt); |
225 | bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt); |
226 | bool emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm); |
227 | bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS); |
228 | unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true, |
229 | MachineMemOperand *MMO = nullptr); |
230 | bool emitStore(MVT VT, unsigned SrcReg, Address Addr, |
231 | MachineMemOperand *MMO = nullptr); |
232 | bool emitStoreRelease(MVT VT, unsigned SrcReg, unsigned AddrReg, |
233 | MachineMemOperand *MMO = nullptr); |
234 | unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt); |
235 | unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt); |
236 | unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS, |
237 | bool SetFlags = false, bool WantResult = true, |
238 | bool IsZExt = false); |
239 | unsigned emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm); |
240 | unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS, |
241 | bool SetFlags = false, bool WantResult = true, |
242 | bool IsZExt = false); |
243 | unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, unsigned RHSReg, |
244 | bool WantResult = true); |
245 | unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, unsigned RHSReg, |
246 | AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm, |
247 | bool WantResult = true); |
248 | unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS, |
249 | const Value *RHS); |
250 | unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg, |
251 | uint64_t Imm); |
252 | unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg, |
253 | unsigned RHSReg, uint64_t ShiftImm); |
254 | unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm); |
255 | unsigned emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1); |
256 | unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1); |
257 | unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1); |
258 | unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg); |
259 | unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm, |
260 | bool IsZExt = true); |
261 | unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg); |
262 | unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm, |
263 | bool IsZExt = true); |
264 | unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg); |
265 | unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm, |
266 | bool IsZExt = false); |
267 | |
268 | unsigned materializeInt(const ConstantInt *CI, MVT VT); |
269 | unsigned materializeFP(const ConstantFP *CFP, MVT VT); |
270 | unsigned materializeGV(const GlobalValue *GV); |
271 | |
272 | // Call handling routines. |
273 | private: |
274 | CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const; |
275 | bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs, |
276 | unsigned &NumBytes); |
277 | bool finishCall(CallLoweringInfo &CLI, unsigned NumBytes); |
278 | |
279 | public: |
280 | // Backend specific FastISel code. |
281 | unsigned fastMaterializeAlloca(const AllocaInst *AI) override; |
282 | unsigned fastMaterializeConstant(const Constant *C) override; |
283 | unsigned fastMaterializeFloatZero(const ConstantFP* CF) override; |
284 | |
285 | explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo, |
286 | const TargetLibraryInfo *LibInfo) |
287 | : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) { |
288 | Subtarget = &FuncInfo.MF->getSubtarget<AArch64Subtarget>(); |
289 | Context = &FuncInfo.Fn->getContext(); |
290 | } |
291 | |
292 | bool fastSelectInstruction(const Instruction *I) override; |
293 | |
294 | #include "AArch64GenFastISel.inc" |
295 | }; |
296 | |
297 | } // end anonymous namespace |
298 | |
299 | /// Check if the sign-/zero-extend will be a noop. |
300 | static bool isIntExtFree(const Instruction *I) { |
301 | assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) && |
302 | "Unexpected integer extend instruction." ); |
303 | assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() && |
304 | "Unexpected value type." ); |
305 | bool IsZExt = isa<ZExtInst>(Val: I); |
306 | |
307 | if (const auto *LI = dyn_cast<LoadInst>(Val: I->getOperand(i: 0))) |
308 | if (LI->hasOneUse()) |
309 | return true; |
310 | |
311 | if (const auto *Arg = dyn_cast<Argument>(Val: I->getOperand(i: 0))) |
312 | if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) |
313 | return true; |
314 | |
315 | return false; |
316 | } |
317 | |
318 | /// Determine the implicit scale factor that is applied by a memory |
319 | /// operation for a given value type. |
320 | static unsigned getImplicitScaleFactor(MVT VT) { |
321 | switch (VT.SimpleTy) { |
322 | default: |
323 | return 0; // invalid |
324 | case MVT::i1: // fall-through |
325 | case MVT::i8: |
326 | return 1; |
327 | case MVT::i16: |
328 | return 2; |
329 | case MVT::i32: // fall-through |
330 | case MVT::f32: |
331 | return 4; |
332 | case MVT::i64: // fall-through |
333 | case MVT::f64: |
334 | return 8; |
335 | } |
336 | } |
337 | |
338 | CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const { |
339 | if (CC == CallingConv::GHC) |
340 | return CC_AArch64_GHC; |
341 | if (CC == CallingConv::CFGuard_Check) |
342 | return CC_AArch64_Win64_CFGuard_Check; |
343 | if (Subtarget->isTargetDarwin()) |
344 | return CC_AArch64_DarwinPCS; |
345 | if (Subtarget->isTargetWindows()) |
346 | return CC_AArch64_Win64PCS; |
347 | return CC_AArch64_AAPCS; |
348 | } |
349 | |
350 | unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) { |
351 | assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 && |
352 | "Alloca should always return a pointer." ); |
353 | |
354 | // Don't handle dynamic allocas. |
355 | if (!FuncInfo.StaticAllocaMap.count(Val: AI)) |
356 | return 0; |
357 | |
358 | DenseMap<const AllocaInst *, int>::iterator SI = |
359 | FuncInfo.StaticAllocaMap.find(Val: AI); |
360 | |
361 | if (SI != FuncInfo.StaticAllocaMap.end()) { |
362 | Register ResultReg = createResultReg(RC: &AArch64::GPR64spRegClass); |
363 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AArch64::ADDXri), |
364 | DestReg: ResultReg) |
365 | .addFrameIndex(Idx: SI->second) |
366 | .addImm(Val: 0) |
367 | .addImm(Val: 0); |
368 | return ResultReg; |
369 | } |
370 | |
371 | return 0; |
372 | } |
373 | |
374 | unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) { |
375 | if (VT > MVT::i64) |
376 | return 0; |
377 | |
378 | if (!CI->isZero()) |
379 | return fastEmit_i(VT, RetVT: VT, Opcode: ISD::Constant, imm0: CI->getZExtValue()); |
380 | |
381 | // Create a copy from the zero register to materialize a "0" value. |
382 | const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass |
383 | : &AArch64::GPR32RegClass; |
384 | unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; |
385 | Register ResultReg = createResultReg(RC); |
386 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: TargetOpcode::COPY), |
387 | DestReg: ResultReg).addReg(RegNo: ZeroReg, flags: getKillRegState(B: true)); |
388 | return ResultReg; |
389 | } |
390 | |
391 | unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) { |
392 | // Positive zero (+0.0) has to be materialized with a fmov from the zero |
393 | // register, because the immediate version of fmov cannot encode zero. |
394 | if (CFP->isNullValue()) |
395 | return fastMaterializeFloatZero(CF: CFP); |
396 | |
397 | if (VT != MVT::f32 && VT != MVT::f64) |
398 | return 0; |
399 | |
400 | const APFloat Val = CFP->getValueAPF(); |
401 | bool Is64Bit = (VT == MVT::f64); |
402 | // This checks to see if we can use FMOV instructions to materialize |
403 | // a constant, otherwise we have to materialize via the constant pool. |
404 | int Imm = |
405 | Is64Bit ? AArch64_AM::getFP64Imm(FPImm: Val) : AArch64_AM::getFP32Imm(FPImm: Val); |
406 | if (Imm != -1) { |
407 | unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi; |
408 | return fastEmitInst_i(MachineInstOpcode: Opc, RC: TLI.getRegClassFor(VT), Imm); |
409 | } |
410 | |
411 | // For the large code model materialize the FP constant in code. |
412 | if (TM.getCodeModel() == CodeModel::Large) { |
413 | unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm; |
414 | const TargetRegisterClass *RC = Is64Bit ? |
415 | &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; |
416 | |
417 | Register TmpReg = createResultReg(RC); |
418 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc1), DestReg: TmpReg) |
419 | .addImm(Val: CFP->getValueAPF().bitcastToAPInt().getZExtValue()); |
420 | |
421 | Register ResultReg = createResultReg(RC: TLI.getRegClassFor(VT)); |
422 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, |
423 | MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: ResultReg) |
424 | .addReg(RegNo: TmpReg, flags: getKillRegState(B: true)); |
425 | |
426 | return ResultReg; |
427 | } |
428 | |
429 | // Materialize via constant pool. MachineConstantPool wants an explicit |
430 | // alignment. |
431 | Align Alignment = DL.getPrefTypeAlign(Ty: CFP->getType()); |
432 | |
433 | unsigned CPI = MCP.getConstantPoolIndex(C: cast<Constant>(Val: CFP), Alignment); |
434 | Register ADRPReg = createResultReg(RC: &AArch64::GPR64commonRegClass); |
435 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AArch64::ADRP), |
436 | DestReg: ADRPReg).addConstantPoolIndex(Idx: CPI, Offset: 0, TargetFlags: AArch64II::MO_PAGE); |
437 | |
438 | unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui; |
439 | Register ResultReg = createResultReg(RC: TLI.getRegClassFor(VT)); |
440 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg: ResultReg) |
441 | .addReg(RegNo: ADRPReg) |
442 | .addConstantPoolIndex(Idx: CPI, Offset: 0, TargetFlags: AArch64II::MO_PAGEOFF | AArch64II::MO_NC); |
443 | return ResultReg; |
444 | } |
445 | |
446 | unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) { |
447 | // We can't handle thread-local variables quickly yet. |
448 | if (GV->isThreadLocal()) |
449 | return 0; |
450 | |
451 | // MachO still uses GOT for large code-model accesses, but ELF requires |
452 | // movz/movk sequences, which FastISel doesn't handle yet. |
453 | if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO()) |
454 | return 0; |
455 | |
456 | unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, TM); |
457 | |
458 | EVT DestEVT = TLI.getValueType(DL, Ty: GV->getType(), AllowUnknown: true); |
459 | if (!DestEVT.isSimple()) |
460 | return 0; |
461 | |
462 | Register ADRPReg = createResultReg(RC: &AArch64::GPR64commonRegClass); |
463 | unsigned ResultReg; |
464 | |
465 | if (OpFlags & AArch64II::MO_GOT) { |
466 | // ADRP + LDRX |
467 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AArch64::ADRP), |
468 | DestReg: ADRPReg) |
469 | .addGlobalAddress(GV, Offset: 0, TargetFlags: AArch64II::MO_PAGE | OpFlags); |
470 | |
471 | unsigned LdrOpc; |
472 | if (Subtarget->isTargetILP32()) { |
473 | ResultReg = createResultReg(RC: &AArch64::GPR32RegClass); |
474 | LdrOpc = AArch64::LDRWui; |
475 | } else { |
476 | ResultReg = createResultReg(RC: &AArch64::GPR64RegClass); |
477 | LdrOpc = AArch64::LDRXui; |
478 | } |
479 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: LdrOpc), |
480 | DestReg: ResultReg) |
481 | .addReg(RegNo: ADRPReg) |
482 | .addGlobalAddress(GV, Offset: 0, TargetFlags: AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | |
483 | AArch64II::MO_NC | OpFlags); |
484 | if (!Subtarget->isTargetILP32()) |
485 | return ResultReg; |
486 | |
487 | // LDRWui produces a 32-bit register, but pointers in-register are 64-bits |
488 | // so we must extend the result on ILP32. |
489 | Register Result64 = createResultReg(RC: &AArch64::GPR64RegClass); |
490 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, |
491 | MCID: TII.get(Opcode: TargetOpcode::SUBREG_TO_REG)) |
492 | .addDef(RegNo: Result64) |
493 | .addImm(Val: 0) |
494 | .addReg(RegNo: ResultReg, flags: RegState::Kill) |
495 | .addImm(Val: AArch64::sub_32); |
496 | return Result64; |
497 | } else { |
498 | // ADRP + ADDX |
499 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AArch64::ADRP), |
500 | DestReg: ADRPReg) |
501 | .addGlobalAddress(GV, Offset: 0, TargetFlags: AArch64II::MO_PAGE | OpFlags); |
502 | |
503 | if (OpFlags & AArch64II::MO_TAGGED) { |
504 | // MO_TAGGED on the page indicates a tagged address. Set the tag now. |
505 | // We do so by creating a MOVK that sets bits 48-63 of the register to |
506 | // (global address + 0x100000000 - PC) >> 48. This assumes that we're in |
507 | // the small code model so we can assume a binary size of <= 4GB, which |
508 | // makes the untagged PC relative offset positive. The binary must also be |
509 | // loaded into address range [0, 2^48). Both of these properties need to |
510 | // be ensured at runtime when using tagged addresses. |
511 | // |
512 | // TODO: There is duplicate logic in AArch64ExpandPseudoInsts.cpp that |
513 | // also uses BuildMI for making an ADRP (+ MOVK) + ADD, but the operands |
514 | // are not exactly 1:1 with FastISel so we cannot easily abstract this |
515 | // out. At some point, it would be nice to find a way to not have this |
516 | // duplciate code. |
517 | unsigned DstReg = createResultReg(RC: &AArch64::GPR64commonRegClass); |
518 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AArch64::MOVKXi), |
519 | DestReg: DstReg) |
520 | .addReg(RegNo: ADRPReg) |
521 | .addGlobalAddress(GV, /*Offset=*/0x100000000, |
522 | TargetFlags: AArch64II::MO_PREL | AArch64II::MO_G3) |
523 | .addImm(Val: 48); |
524 | ADRPReg = DstReg; |
525 | } |
526 | |
527 | ResultReg = createResultReg(RC: &AArch64::GPR64spRegClass); |
528 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AArch64::ADDXri), |
529 | DestReg: ResultReg) |
530 | .addReg(RegNo: ADRPReg) |
531 | .addGlobalAddress(GV, Offset: 0, |
532 | TargetFlags: AArch64II::MO_PAGEOFF | AArch64II::MO_NC | OpFlags) |
533 | .addImm(Val: 0); |
534 | } |
535 | return ResultReg; |
536 | } |
537 | |
538 | unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) { |
539 | EVT CEVT = TLI.getValueType(DL, Ty: C->getType(), AllowUnknown: true); |
540 | |
541 | // Only handle simple types. |
542 | if (!CEVT.isSimple()) |
543 | return 0; |
544 | MVT VT = CEVT.getSimpleVT(); |
545 | // arm64_32 has 32-bit pointers held in 64-bit registers. Because of that, |
546 | // 'null' pointers need to have a somewhat special treatment. |
547 | if (isa<ConstantPointerNull>(Val: C)) { |
548 | assert(VT == MVT::i64 && "Expected 64-bit pointers" ); |
549 | return materializeInt(CI: ConstantInt::get(Ty: Type::getInt64Ty(C&: *Context), V: 0), VT); |
550 | } |
551 | |
552 | if (const auto *CI = dyn_cast<ConstantInt>(Val: C)) |
553 | return materializeInt(CI, VT); |
554 | else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(Val: C)) |
555 | return materializeFP(CFP, VT); |
556 | else if (const GlobalValue *GV = dyn_cast<GlobalValue>(Val: C)) |
557 | return materializeGV(GV); |
558 | |
559 | return 0; |
560 | } |
561 | |
562 | unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) { |
563 | assert(CFP->isNullValue() && |
564 | "Floating-point constant is not a positive zero." ); |
565 | MVT VT; |
566 | if (!isTypeLegal(Ty: CFP->getType(), VT)) |
567 | return 0; |
568 | |
569 | if (VT != MVT::f32 && VT != MVT::f64) |
570 | return 0; |
571 | |
572 | bool Is64Bit = (VT == MVT::f64); |
573 | unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR; |
574 | unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr; |
575 | return fastEmitInst_r(MachineInstOpcode: Opc, RC: TLI.getRegClassFor(VT), Op0: ZReg); |
576 | } |
577 | |
578 | /// Check if the multiply is by a power-of-2 constant. |
579 | static bool isMulPowOf2(const Value *I) { |
580 | if (const auto *MI = dyn_cast<MulOperator>(Val: I)) { |
581 | if (const auto *C = dyn_cast<ConstantInt>(Val: MI->getOperand(i_nocapture: 0))) |
582 | if (C->getValue().isPowerOf2()) |
583 | return true; |
584 | if (const auto *C = dyn_cast<ConstantInt>(Val: MI->getOperand(i_nocapture: 1))) |
585 | if (C->getValue().isPowerOf2()) |
586 | return true; |
587 | } |
588 | return false; |
589 | } |
590 | |
591 | // Computes the address to get to an object. |
592 | bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty) |
593 | { |
594 | const User *U = nullptr; |
595 | unsigned Opcode = Instruction::UserOp1; |
596 | if (const Instruction *I = dyn_cast<Instruction>(Val: Obj)) { |
597 | // Don't walk into other basic blocks unless the object is an alloca from |
598 | // another block, otherwise it may not have a virtual register assigned. |
599 | if (FuncInfo.StaticAllocaMap.count(Val: static_cast<const AllocaInst *>(Obj)) || |
600 | FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { |
601 | Opcode = I->getOpcode(); |
602 | U = I; |
603 | } |
604 | } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Val: Obj)) { |
605 | Opcode = C->getOpcode(); |
606 | U = C; |
607 | } |
608 | |
609 | if (auto *Ty = dyn_cast<PointerType>(Val: Obj->getType())) |
610 | if (Ty->getAddressSpace() > 255) |
611 | // Fast instruction selection doesn't support the special |
612 | // address spaces. |
613 | return false; |
614 | |
615 | switch (Opcode) { |
616 | default: |
617 | break; |
618 | case Instruction::BitCast: |
619 | // Look through bitcasts. |
620 | return computeAddress(Obj: U->getOperand(i: 0), Addr, Ty); |
621 | |
622 | case Instruction::IntToPtr: |
623 | // Look past no-op inttoptrs. |
624 | if (TLI.getValueType(DL, Ty: U->getOperand(i: 0)->getType()) == |
625 | TLI.getPointerTy(DL)) |
626 | return computeAddress(Obj: U->getOperand(i: 0), Addr, Ty); |
627 | break; |
628 | |
629 | case Instruction::PtrToInt: |
630 | // Look past no-op ptrtoints. |
631 | if (TLI.getValueType(DL, Ty: U->getType()) == TLI.getPointerTy(DL)) |
632 | return computeAddress(Obj: U->getOperand(i: 0), Addr, Ty); |
633 | break; |
634 | |
635 | case Instruction::GetElementPtr: { |
636 | Address SavedAddr = Addr; |
637 | uint64_t TmpOffset = Addr.getOffset(); |
638 | |
639 | // Iterate through the GEP folding the constants into offsets where |
640 | // we can. |
641 | for (gep_type_iterator GTI = gep_type_begin(GEP: U), E = gep_type_end(GEP: U); |
642 | GTI != E; ++GTI) { |
643 | const Value *Op = GTI.getOperand(); |
644 | if (StructType *STy = GTI.getStructTypeOrNull()) { |
645 | const StructLayout *SL = DL.getStructLayout(Ty: STy); |
646 | unsigned Idx = cast<ConstantInt>(Val: Op)->getZExtValue(); |
647 | TmpOffset += SL->getElementOffset(Idx); |
648 | } else { |
649 | uint64_t S = GTI.getSequentialElementStride(DL); |
650 | while (true) { |
651 | if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val: Op)) { |
652 | // Constant-offset addressing. |
653 | TmpOffset += CI->getSExtValue() * S; |
654 | break; |
655 | } |
656 | if (canFoldAddIntoGEP(GEP: U, Add: Op)) { |
657 | // A compatible add with a constant operand. Fold the constant. |
658 | ConstantInt *CI = |
659 | cast<ConstantInt>(Val: cast<AddOperator>(Val: Op)->getOperand(i_nocapture: 1)); |
660 | TmpOffset += CI->getSExtValue() * S; |
661 | // Iterate on the other operand. |
662 | Op = cast<AddOperator>(Val: Op)->getOperand(i_nocapture: 0); |
663 | continue; |
664 | } |
665 | // Unsupported |
666 | goto unsupported_gep; |
667 | } |
668 | } |
669 | } |
670 | |
671 | // Try to grab the base operand now. |
672 | Addr.setOffset(TmpOffset); |
673 | if (computeAddress(Obj: U->getOperand(i: 0), Addr, Ty)) |
674 | return true; |
675 | |
676 | // We failed, restore everything and try the other options. |
677 | Addr = SavedAddr; |
678 | |
679 | unsupported_gep: |
680 | break; |
681 | } |
682 | case Instruction::Alloca: { |
683 | const AllocaInst *AI = cast<AllocaInst>(Val: Obj); |
684 | DenseMap<const AllocaInst *, int>::iterator SI = |
685 | FuncInfo.StaticAllocaMap.find(Val: AI); |
686 | if (SI != FuncInfo.StaticAllocaMap.end()) { |
687 | Addr.setKind(Address::FrameIndexBase); |
688 | Addr.setFI(SI->second); |
689 | return true; |
690 | } |
691 | break; |
692 | } |
693 | case Instruction::Add: { |
694 | // Adds of constants are common and easy enough. |
695 | const Value *LHS = U->getOperand(i: 0); |
696 | const Value *RHS = U->getOperand(i: 1); |
697 | |
698 | if (isa<ConstantInt>(Val: LHS)) |
699 | std::swap(a&: LHS, b&: RHS); |
700 | |
701 | if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val: RHS)) { |
702 | Addr.setOffset(Addr.getOffset() + CI->getSExtValue()); |
703 | return computeAddress(Obj: LHS, Addr, Ty); |
704 | } |
705 | |
706 | Address Backup = Addr; |
707 | if (computeAddress(Obj: LHS, Addr, Ty) && computeAddress(Obj: RHS, Addr, Ty)) |
708 | return true; |
709 | Addr = Backup; |
710 | |
711 | break; |
712 | } |
713 | case Instruction::Sub: { |
714 | // Subs of constants are common and easy enough. |
715 | const Value *LHS = U->getOperand(i: 0); |
716 | const Value *RHS = U->getOperand(i: 1); |
717 | |
718 | if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val: RHS)) { |
719 | Addr.setOffset(Addr.getOffset() - CI->getSExtValue()); |
720 | return computeAddress(Obj: LHS, Addr, Ty); |
721 | } |
722 | break; |
723 | } |
724 | case Instruction::Shl: { |
725 | if (Addr.getOffsetReg()) |
726 | break; |
727 | |
728 | const auto *CI = dyn_cast<ConstantInt>(Val: U->getOperand(i: 1)); |
729 | if (!CI) |
730 | break; |
731 | |
732 | unsigned Val = CI->getZExtValue(); |
733 | if (Val < 1 || Val > 3) |
734 | break; |
735 | |
736 | uint64_t NumBytes = 0; |
737 | if (Ty && Ty->isSized()) { |
738 | uint64_t NumBits = DL.getTypeSizeInBits(Ty); |
739 | NumBytes = NumBits / 8; |
740 | if (!isPowerOf2_64(Value: NumBits)) |
741 | NumBytes = 0; |
742 | } |
743 | |
744 | if (NumBytes != (1ULL << Val)) |
745 | break; |
746 | |
747 | Addr.setShift(Val); |
748 | Addr.setExtendType(AArch64_AM::LSL); |
749 | |
750 | const Value *Src = U->getOperand(i: 0); |
751 | if (const auto *I = dyn_cast<Instruction>(Val: Src)) { |
752 | if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { |
753 | // Fold the zext or sext when it won't become a noop. |
754 | if (const auto *ZE = dyn_cast<ZExtInst>(Val: I)) { |
755 | if (!isIntExtFree(I: ZE) && |
756 | ZE->getOperand(i_nocapture: 0)->getType()->isIntegerTy(Bitwidth: 32)) { |
757 | Addr.setExtendType(AArch64_AM::UXTW); |
758 | Src = ZE->getOperand(i_nocapture: 0); |
759 | } |
760 | } else if (const auto *SE = dyn_cast<SExtInst>(Val: I)) { |
761 | if (!isIntExtFree(I: SE) && |
762 | SE->getOperand(i_nocapture: 0)->getType()->isIntegerTy(Bitwidth: 32)) { |
763 | Addr.setExtendType(AArch64_AM::SXTW); |
764 | Src = SE->getOperand(i_nocapture: 0); |
765 | } |
766 | } |
767 | } |
768 | } |
769 | |
770 | if (const auto *AI = dyn_cast<BinaryOperator>(Val: Src)) |
771 | if (AI->getOpcode() == Instruction::And) { |
772 | const Value *LHS = AI->getOperand(i_nocapture: 0); |
773 | const Value *RHS = AI->getOperand(i_nocapture: 1); |
774 | |
775 | if (const auto *C = dyn_cast<ConstantInt>(Val: LHS)) |
776 | if (C->getValue() == 0xffffffff) |
777 | std::swap(a&: LHS, b&: RHS); |
778 | |
779 | if (const auto *C = dyn_cast<ConstantInt>(Val: RHS)) |
780 | if (C->getValue() == 0xffffffff) { |
781 | Addr.setExtendType(AArch64_AM::UXTW); |
782 | Register Reg = getRegForValue(V: LHS); |
783 | if (!Reg) |
784 | return false; |
785 | Reg = fastEmitInst_extractsubreg(RetVT: MVT::i32, Op0: Reg, Idx: AArch64::sub_32); |
786 | Addr.setOffsetReg(Reg); |
787 | return true; |
788 | } |
789 | } |
790 | |
791 | Register Reg = getRegForValue(V: Src); |
792 | if (!Reg) |
793 | return false; |
794 | Addr.setOffsetReg(Reg); |
795 | return true; |
796 | } |
797 | case Instruction::Mul: { |
798 | if (Addr.getOffsetReg()) |
799 | break; |
800 | |
801 | if (!isMulPowOf2(I: U)) |
802 | break; |
803 | |
804 | const Value *LHS = U->getOperand(i: 0); |
805 | const Value *RHS = U->getOperand(i: 1); |
806 | |
807 | // Canonicalize power-of-2 value to the RHS. |
808 | if (const auto *C = dyn_cast<ConstantInt>(Val: LHS)) |
809 | if (C->getValue().isPowerOf2()) |
810 | std::swap(a&: LHS, b&: RHS); |
811 | |
812 | assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt." ); |
813 | const auto *C = cast<ConstantInt>(Val: RHS); |
814 | unsigned Val = C->getValue().logBase2(); |
815 | if (Val < 1 || Val > 3) |
816 | break; |
817 | |
818 | uint64_t NumBytes = 0; |
819 | if (Ty && Ty->isSized()) { |
820 | uint64_t NumBits = DL.getTypeSizeInBits(Ty); |
821 | NumBytes = NumBits / 8; |
822 | if (!isPowerOf2_64(Value: NumBits)) |
823 | NumBytes = 0; |
824 | } |
825 | |
826 | if (NumBytes != (1ULL << Val)) |
827 | break; |
828 | |
829 | Addr.setShift(Val); |
830 | Addr.setExtendType(AArch64_AM::LSL); |
831 | |
832 | const Value *Src = LHS; |
833 | if (const auto *I = dyn_cast<Instruction>(Val: Src)) { |
834 | if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { |
835 | // Fold the zext or sext when it won't become a noop. |
836 | if (const auto *ZE = dyn_cast<ZExtInst>(Val: I)) { |
837 | if (!isIntExtFree(I: ZE) && |
838 | ZE->getOperand(i_nocapture: 0)->getType()->isIntegerTy(Bitwidth: 32)) { |
839 | Addr.setExtendType(AArch64_AM::UXTW); |
840 | Src = ZE->getOperand(i_nocapture: 0); |
841 | } |
842 | } else if (const auto *SE = dyn_cast<SExtInst>(Val: I)) { |
843 | if (!isIntExtFree(I: SE) && |
844 | SE->getOperand(i_nocapture: 0)->getType()->isIntegerTy(Bitwidth: 32)) { |
845 | Addr.setExtendType(AArch64_AM::SXTW); |
846 | Src = SE->getOperand(i_nocapture: 0); |
847 | } |
848 | } |
849 | } |
850 | } |
851 | |
852 | Register Reg = getRegForValue(V: Src); |
853 | if (!Reg) |
854 | return false; |
855 | Addr.setOffsetReg(Reg); |
856 | return true; |
857 | } |
858 | case Instruction::And: { |
859 | if (Addr.getOffsetReg()) |
860 | break; |
861 | |
862 | if (!Ty || DL.getTypeSizeInBits(Ty) != 8) |
863 | break; |
864 | |
865 | const Value *LHS = U->getOperand(i: 0); |
866 | const Value *RHS = U->getOperand(i: 1); |
867 | |
868 | if (const auto *C = dyn_cast<ConstantInt>(Val: LHS)) |
869 | if (C->getValue() == 0xffffffff) |
870 | std::swap(a&: LHS, b&: RHS); |
871 | |
872 | if (const auto *C = dyn_cast<ConstantInt>(Val: RHS)) |
873 | if (C->getValue() == 0xffffffff) { |
874 | Addr.setShift(0); |
875 | Addr.setExtendType(AArch64_AM::LSL); |
876 | Addr.setExtendType(AArch64_AM::UXTW); |
877 | |
878 | Register Reg = getRegForValue(V: LHS); |
879 | if (!Reg) |
880 | return false; |
881 | Reg = fastEmitInst_extractsubreg(RetVT: MVT::i32, Op0: Reg, Idx: AArch64::sub_32); |
882 | Addr.setOffsetReg(Reg); |
883 | return true; |
884 | } |
885 | break; |
886 | } |
887 | case Instruction::SExt: |
888 | case Instruction::ZExt: { |
889 | if (!Addr.getReg() || Addr.getOffsetReg()) |
890 | break; |
891 | |
892 | const Value *Src = nullptr; |
893 | // Fold the zext or sext when it won't become a noop. |
894 | if (const auto *ZE = dyn_cast<ZExtInst>(Val: U)) { |
895 | if (!isIntExtFree(I: ZE) && ZE->getOperand(i_nocapture: 0)->getType()->isIntegerTy(Bitwidth: 32)) { |
896 | Addr.setExtendType(AArch64_AM::UXTW); |
897 | Src = ZE->getOperand(i_nocapture: 0); |
898 | } |
899 | } else if (const auto *SE = dyn_cast<SExtInst>(Val: U)) { |
900 | if (!isIntExtFree(I: SE) && SE->getOperand(i_nocapture: 0)->getType()->isIntegerTy(Bitwidth: 32)) { |
901 | Addr.setExtendType(AArch64_AM::SXTW); |
902 | Src = SE->getOperand(i_nocapture: 0); |
903 | } |
904 | } |
905 | |
906 | if (!Src) |
907 | break; |
908 | |
909 | Addr.setShift(0); |
910 | Register Reg = getRegForValue(V: Src); |
911 | if (!Reg) |
912 | return false; |
913 | Addr.setOffsetReg(Reg); |
914 | return true; |
915 | } |
916 | } // end switch |
917 | |
918 | if (Addr.isRegBase() && !Addr.getReg()) { |
919 | Register Reg = getRegForValue(V: Obj); |
920 | if (!Reg) |
921 | return false; |
922 | Addr.setReg(Reg); |
923 | return true; |
924 | } |
925 | |
926 | if (!Addr.getOffsetReg()) { |
927 | Register Reg = getRegForValue(V: Obj); |
928 | if (!Reg) |
929 | return false; |
930 | Addr.setOffsetReg(Reg); |
931 | return true; |
932 | } |
933 | |
934 | return false; |
935 | } |
936 | |
937 | bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) { |
938 | const User *U = nullptr; |
939 | unsigned Opcode = Instruction::UserOp1; |
940 | bool InMBB = true; |
941 | |
942 | if (const auto *I = dyn_cast<Instruction>(Val: V)) { |
943 | Opcode = I->getOpcode(); |
944 | U = I; |
945 | InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock(); |
946 | } else if (const auto *C = dyn_cast<ConstantExpr>(Val: V)) { |
947 | Opcode = C->getOpcode(); |
948 | U = C; |
949 | } |
950 | |
951 | switch (Opcode) { |
952 | default: break; |
953 | case Instruction::BitCast: |
954 | // Look past bitcasts if its operand is in the same BB. |
955 | if (InMBB) |
956 | return computeCallAddress(V: U->getOperand(i: 0), Addr); |
957 | break; |
958 | case Instruction::IntToPtr: |
959 | // Look past no-op inttoptrs if its operand is in the same BB. |
960 | if (InMBB && |
961 | TLI.getValueType(DL, Ty: U->getOperand(i: 0)->getType()) == |
962 | TLI.getPointerTy(DL)) |
963 | return computeCallAddress(V: U->getOperand(i: 0), Addr); |
964 | break; |
965 | case Instruction::PtrToInt: |
966 | // Look past no-op ptrtoints if its operand is in the same BB. |
967 | if (InMBB && TLI.getValueType(DL, Ty: U->getType()) == TLI.getPointerTy(DL)) |
968 | return computeCallAddress(V: U->getOperand(i: 0), Addr); |
969 | break; |
970 | } |
971 | |
972 | if (const GlobalValue *GV = dyn_cast<GlobalValue>(Val: V)) { |
973 | Addr.setGlobalValue(GV); |
974 | return true; |
975 | } |
976 | |
977 | // If all else fails, try to materialize the value in a register. |
978 | if (!Addr.getGlobalValue()) { |
979 | Addr.setReg(getRegForValue(V)); |
980 | return Addr.getReg() != 0; |
981 | } |
982 | |
983 | return false; |
984 | } |
985 | |
986 | bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) { |
987 | EVT evt = TLI.getValueType(DL, Ty, AllowUnknown: true); |
988 | |
989 | if (Subtarget->isTargetILP32() && Ty->isPointerTy()) |
990 | return false; |
991 | |
992 | // Only handle simple types. |
993 | if (evt == MVT::Other || !evt.isSimple()) |
994 | return false; |
995 | VT = evt.getSimpleVT(); |
996 | |
997 | // This is a legal type, but it's not something we handle in fast-isel. |
998 | if (VT == MVT::f128) |
999 | return false; |
1000 | |
1001 | // Handle all other legal types, i.e. a register that will directly hold this |
1002 | // value. |
1003 | return TLI.isTypeLegal(VT); |
1004 | } |
1005 | |
1006 | /// Determine if the value type is supported by FastISel. |
1007 | /// |
1008 | /// FastISel for AArch64 can handle more value types than are legal. This adds |
1009 | /// simple value type such as i1, i8, and i16. |
1010 | bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) { |
1011 | if (Ty->isVectorTy() && !IsVectorAllowed) |
1012 | return false; |
1013 | |
1014 | if (isTypeLegal(Ty, VT)) |
1015 | return true; |
1016 | |
1017 | // If this is a type than can be sign or zero-extended to a basic operation |
1018 | // go ahead and accept it now. |
1019 | if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16) |
1020 | return true; |
1021 | |
1022 | return false; |
1023 | } |
1024 | |
1025 | bool AArch64FastISel::isValueAvailable(const Value *V) const { |
1026 | if (!isa<Instruction>(Val: V)) |
1027 | return true; |
1028 | |
1029 | const auto *I = cast<Instruction>(Val: V); |
1030 | return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB; |
1031 | } |
1032 | |
1033 | bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) { |
1034 | if (Subtarget->isTargetILP32()) |
1035 | return false; |
1036 | |
1037 | unsigned ScaleFactor = getImplicitScaleFactor(VT); |
1038 | if (!ScaleFactor) |
1039 | return false; |
1040 | |
1041 | bool ImmediateOffsetNeedsLowering = false; |
1042 | bool RegisterOffsetNeedsLowering = false; |
1043 | int64_t Offset = Addr.getOffset(); |
1044 | if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(x: Offset)) |
1045 | ImmediateOffsetNeedsLowering = true; |
1046 | else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) && |
1047 | !isUInt<12>(x: Offset / ScaleFactor)) |
1048 | ImmediateOffsetNeedsLowering = true; |
1049 | |
1050 | // Cannot encode an offset register and an immediate offset in the same |
1051 | // instruction. Fold the immediate offset into the load/store instruction and |
1052 | // emit an additional add to take care of the offset register. |
1053 | if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg()) |
1054 | RegisterOffsetNeedsLowering = true; |
1055 | |
1056 | // Cannot encode zero register as base. |
1057 | if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg()) |
1058 | RegisterOffsetNeedsLowering = true; |
1059 | |
1060 | // If this is a stack pointer and the offset needs to be simplified then put |
1061 | // the alloca address into a register, set the base type back to register and |
1062 | // continue. This should almost never happen. |
1063 | if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase()) |
1064 | { |
1065 | Register ResultReg = createResultReg(RC: &AArch64::GPR64spRegClass); |
1066 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AArch64::ADDXri), |
1067 | DestReg: ResultReg) |
1068 | .addFrameIndex(Idx: Addr.getFI()) |
1069 | .addImm(Val: 0) |
1070 | .addImm(Val: 0); |
1071 | Addr.setKind(Address::RegBase); |
1072 | Addr.setReg(ResultReg); |
1073 | } |
1074 | |
1075 | if (RegisterOffsetNeedsLowering) { |
1076 | unsigned ResultReg = 0; |
1077 | if (Addr.getReg()) { |
1078 | if (Addr.getExtendType() == AArch64_AM::SXTW || |
1079 | Addr.getExtendType() == AArch64_AM::UXTW ) |
1080 | ResultReg = emitAddSub_rx(/*UseAdd=*/true, RetVT: MVT::i64, LHSReg: Addr.getReg(), |
1081 | RHSReg: Addr.getOffsetReg(), ExtType: Addr.getExtendType(), |
1082 | ShiftImm: Addr.getShift()); |
1083 | else |
1084 | ResultReg = emitAddSub_rs(/*UseAdd=*/true, RetVT: MVT::i64, LHSReg: Addr.getReg(), |
1085 | RHSReg: Addr.getOffsetReg(), ShiftType: AArch64_AM::LSL, |
1086 | ShiftImm: Addr.getShift()); |
1087 | } else { |
1088 | if (Addr.getExtendType() == AArch64_AM::UXTW) |
1089 | ResultReg = emitLSL_ri(RetVT: MVT::i64, SrcVT: MVT::i32, Op0Reg: Addr.getOffsetReg(), |
1090 | Imm: Addr.getShift(), /*IsZExt=*/true); |
1091 | else if (Addr.getExtendType() == AArch64_AM::SXTW) |
1092 | ResultReg = emitLSL_ri(RetVT: MVT::i64, SrcVT: MVT::i32, Op0Reg: Addr.getOffsetReg(), |
1093 | Imm: Addr.getShift(), /*IsZExt=*/false); |
1094 | else |
1095 | ResultReg = emitLSL_ri(RetVT: MVT::i64, SrcVT: MVT::i64, Op0Reg: Addr.getOffsetReg(), |
1096 | Imm: Addr.getShift()); |
1097 | } |
1098 | if (!ResultReg) |
1099 | return false; |
1100 | |
1101 | Addr.setReg(ResultReg); |
1102 | Addr.setOffsetReg(0); |
1103 | Addr.setShift(0); |
1104 | Addr.setExtendType(AArch64_AM::InvalidShiftExtend); |
1105 | } |
1106 | |
1107 | // Since the offset is too large for the load/store instruction get the |
1108 | // reg+offset into a register. |
1109 | if (ImmediateOffsetNeedsLowering) { |
1110 | unsigned ResultReg; |
1111 | if (Addr.getReg()) |
1112 | // Try to fold the immediate into the add instruction. |
1113 | ResultReg = emitAdd_ri_(VT: MVT::i64, Op0: Addr.getReg(), Imm: Offset); |
1114 | else |
1115 | ResultReg = fastEmit_i(VT: MVT::i64, RetVT: MVT::i64, Opcode: ISD::Constant, imm0: Offset); |
1116 | |
1117 | if (!ResultReg) |
1118 | return false; |
1119 | Addr.setReg(ResultReg); |
1120 | Addr.setOffset(0); |
1121 | } |
1122 | return true; |
1123 | } |
1124 | |
1125 | void AArch64FastISel::addLoadStoreOperands(Address &Addr, |
1126 | const MachineInstrBuilder &MIB, |
1127 | MachineMemOperand::Flags Flags, |
1128 | unsigned ScaleFactor, |
1129 | MachineMemOperand *MMO) { |
1130 | int64_t Offset = Addr.getOffset() / ScaleFactor; |
1131 | // Frame base works a bit differently. Handle it separately. |
1132 | if (Addr.isFIBase()) { |
1133 | int FI = Addr.getFI(); |
1134 | // FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size |
1135 | // and alignment should be based on the VT. |
1136 | MMO = FuncInfo.MF->getMachineMemOperand( |
1137 | PtrInfo: MachinePointerInfo::getFixedStack(MF&: *FuncInfo.MF, FI, Offset), F: Flags, |
1138 | Size: MFI.getObjectSize(ObjectIdx: FI), BaseAlignment: MFI.getObjectAlign(ObjectIdx: FI)); |
1139 | // Now add the rest of the operands. |
1140 | MIB.addFrameIndex(Idx: FI).addImm(Val: Offset); |
1141 | } else { |
1142 | assert(Addr.isRegBase() && "Unexpected address kind." ); |
1143 | const MCInstrDesc &II = MIB->getDesc(); |
1144 | unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0; |
1145 | Addr.setReg( |
1146 | constrainOperandRegClass(II, Op: Addr.getReg(), OpNum: II.getNumDefs()+Idx)); |
1147 | Addr.setOffsetReg( |
1148 | constrainOperandRegClass(II, Op: Addr.getOffsetReg(), OpNum: II.getNumDefs()+Idx+1)); |
1149 | if (Addr.getOffsetReg()) { |
1150 | assert(Addr.getOffset() == 0 && "Unexpected offset" ); |
1151 | bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW || |
1152 | Addr.getExtendType() == AArch64_AM::SXTX; |
1153 | MIB.addReg(RegNo: Addr.getReg()); |
1154 | MIB.addReg(RegNo: Addr.getOffsetReg()); |
1155 | MIB.addImm(Val: IsSigned); |
1156 | MIB.addImm(Val: Addr.getShift() != 0); |
1157 | } else |
1158 | MIB.addReg(RegNo: Addr.getReg()).addImm(Val: Offset); |
1159 | } |
1160 | |
1161 | if (MMO) |
1162 | MIB.addMemOperand(MMO); |
1163 | } |
1164 | |
1165 | unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS, |
1166 | const Value *RHS, bool SetFlags, |
1167 | bool WantResult, bool IsZExt) { |
1168 | AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend; |
1169 | bool NeedExtend = false; |
1170 | switch (RetVT.SimpleTy) { |
1171 | default: |
1172 | return 0; |
1173 | case MVT::i1: |
1174 | NeedExtend = true; |
1175 | break; |
1176 | case MVT::i8: |
1177 | NeedExtend = true; |
1178 | ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB; |
1179 | break; |
1180 | case MVT::i16: |
1181 | NeedExtend = true; |
1182 | ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH; |
1183 | break; |
1184 | case MVT::i32: // fall-through |
1185 | case MVT::i64: |
1186 | break; |
1187 | } |
1188 | MVT SrcVT = RetVT; |
1189 | RetVT.SimpleTy = std::max(a: RetVT.SimpleTy, b: MVT::i32); |
1190 | |
1191 | // Canonicalize immediates to the RHS first. |
1192 | if (UseAdd && isa<Constant>(Val: LHS) && !isa<Constant>(Val: RHS)) |
1193 | std::swap(a&: LHS, b&: RHS); |
1194 | |
1195 | // Canonicalize mul by power of 2 to the RHS. |
1196 | if (UseAdd && LHS->hasOneUse() && isValueAvailable(V: LHS)) |
1197 | if (isMulPowOf2(I: LHS)) |
1198 | std::swap(a&: LHS, b&: RHS); |
1199 | |
1200 | // Canonicalize shift immediate to the RHS. |
1201 | if (UseAdd && LHS->hasOneUse() && isValueAvailable(V: LHS)) |
1202 | if (const auto *SI = dyn_cast<BinaryOperator>(Val: LHS)) |
1203 | if (isa<ConstantInt>(Val: SI->getOperand(i_nocapture: 1))) |
1204 | if (SI->getOpcode() == Instruction::Shl || |
1205 | SI->getOpcode() == Instruction::LShr || |
1206 | SI->getOpcode() == Instruction::AShr ) |
1207 | std::swap(a&: LHS, b&: RHS); |
1208 | |
1209 | Register LHSReg = getRegForValue(V: LHS); |
1210 | if (!LHSReg) |
1211 | return 0; |
1212 | |
1213 | if (NeedExtend) |
1214 | LHSReg = emitIntExt(SrcVT, SrcReg: LHSReg, DestVT: RetVT, isZExt: IsZExt); |
1215 | |
1216 | unsigned ResultReg = 0; |
1217 | if (const auto *C = dyn_cast<ConstantInt>(Val: RHS)) { |
1218 | uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue(); |
1219 | if (C->isNegative()) |
1220 | ResultReg = emitAddSub_ri(UseAdd: !UseAdd, RetVT, LHSReg, Imm: -Imm, SetFlags, |
1221 | WantResult); |
1222 | else |
1223 | ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, Imm, SetFlags, |
1224 | WantResult); |
1225 | } else if (const auto *C = dyn_cast<Constant>(Val: RHS)) |
1226 | if (C->isNullValue()) |
1227 | ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, Imm: 0, SetFlags, WantResult); |
1228 | |
1229 | if (ResultReg) |
1230 | return ResultReg; |
1231 | |
1232 | // Only extend the RHS within the instruction if there is a valid extend type. |
1233 | if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() && |
1234 | isValueAvailable(V: RHS)) { |
1235 | Register RHSReg = getRegForValue(V: RHS); |
1236 | if (!RHSReg) |
1237 | return 0; |
1238 | return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtType: ExtendType, ShiftImm: 0, |
1239 | SetFlags, WantResult); |
1240 | } |
1241 | |
1242 | // Check if the mul can be folded into the instruction. |
1243 | if (RHS->hasOneUse() && isValueAvailable(V: RHS)) { |
1244 | if (isMulPowOf2(I: RHS)) { |
1245 | const Value *MulLHS = cast<MulOperator>(Val: RHS)->getOperand(i_nocapture: 0); |
1246 | const Value *MulRHS = cast<MulOperator>(Val: RHS)->getOperand(i_nocapture: 1); |
1247 | |
1248 | if (const auto *C = dyn_cast<ConstantInt>(Val: MulLHS)) |
1249 | if (C->getValue().isPowerOf2()) |
1250 | std::swap(a&: MulLHS, b&: MulRHS); |
1251 | |
1252 | assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt." ); |
1253 | uint64_t ShiftVal = cast<ConstantInt>(Val: MulRHS)->getValue().logBase2(); |
1254 | Register RHSReg = getRegForValue(V: MulLHS); |
1255 | if (!RHSReg) |
1256 | return 0; |
1257 | ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, ShiftType: AArch64_AM::LSL, |
1258 | ShiftImm: ShiftVal, SetFlags, WantResult); |
1259 | if (ResultReg) |
1260 | return ResultReg; |
1261 | } |
1262 | } |
1263 | |
1264 | // Check if the shift can be folded into the instruction. |
1265 | if (RHS->hasOneUse() && isValueAvailable(V: RHS)) { |
1266 | if (const auto *SI = dyn_cast<BinaryOperator>(Val: RHS)) { |
1267 | if (const auto *C = dyn_cast<ConstantInt>(Val: SI->getOperand(i_nocapture: 1))) { |
1268 | AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend; |
1269 | switch (SI->getOpcode()) { |
1270 | default: break; |
1271 | case Instruction::Shl: ShiftType = AArch64_AM::LSL; break; |
1272 | case Instruction::LShr: ShiftType = AArch64_AM::LSR; break; |
1273 | case Instruction::AShr: ShiftType = AArch64_AM::ASR; break; |
1274 | } |
1275 | uint64_t ShiftVal = C->getZExtValue(); |
1276 | if (ShiftType != AArch64_AM::InvalidShiftExtend) { |
1277 | Register RHSReg = getRegForValue(V: SI->getOperand(i_nocapture: 0)); |
1278 | if (!RHSReg) |
1279 | return 0; |
1280 | ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, ShiftType, |
1281 | ShiftImm: ShiftVal, SetFlags, WantResult); |
1282 | if (ResultReg) |
1283 | return ResultReg; |
1284 | } |
1285 | } |
1286 | } |
1287 | } |
1288 | |
1289 | Register RHSReg = getRegForValue(V: RHS); |
1290 | if (!RHSReg) |
1291 | return 0; |
1292 | |
1293 | if (NeedExtend) |
1294 | RHSReg = emitIntExt(SrcVT, SrcReg: RHSReg, DestVT: RetVT, isZExt: IsZExt); |
1295 | |
1296 | return emitAddSub_rr(UseAdd, RetVT, LHSReg, RHSReg, SetFlags, WantResult); |
1297 | } |
1298 | |
1299 | unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg, |
1300 | unsigned RHSReg, bool SetFlags, |
1301 | bool WantResult) { |
1302 | assert(LHSReg && RHSReg && "Invalid register number." ); |
1303 | |
1304 | if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP || |
1305 | RHSReg == AArch64::SP || RHSReg == AArch64::WSP) |
1306 | return 0; |
1307 | |
1308 | if (RetVT != MVT::i32 && RetVT != MVT::i64) |
1309 | return 0; |
1310 | |
1311 | static const unsigned OpcTable[2][2][2] = { |
1312 | { { AArch64::SUBWrr, AArch64::SUBXrr }, |
1313 | { AArch64::ADDWrr, AArch64::ADDXrr } }, |
1314 | { { AArch64::SUBSWrr, AArch64::SUBSXrr }, |
1315 | { AArch64::ADDSWrr, AArch64::ADDSXrr } } |
1316 | }; |
1317 | bool Is64Bit = RetVT == MVT::i64; |
1318 | unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; |
1319 | const TargetRegisterClass *RC = |
1320 | Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; |
1321 | unsigned ResultReg; |
1322 | if (WantResult) |
1323 | ResultReg = createResultReg(RC); |
1324 | else |
1325 | ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; |
1326 | |
1327 | const MCInstrDesc &II = TII.get(Opcode: Opc); |
1328 | LHSReg = constrainOperandRegClass(II, Op: LHSReg, OpNum: II.getNumDefs()); |
1329 | RHSReg = constrainOperandRegClass(II, Op: RHSReg, OpNum: II.getNumDefs() + 1); |
1330 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II, DestReg: ResultReg) |
1331 | .addReg(RegNo: LHSReg) |
1332 | .addReg(RegNo: RHSReg); |
1333 | return ResultReg; |
1334 | } |
1335 | |
1336 | unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg, |
1337 | uint64_t Imm, bool SetFlags, |
1338 | bool WantResult) { |
1339 | assert(LHSReg && "Invalid register number." ); |
1340 | |
1341 | if (RetVT != MVT::i32 && RetVT != MVT::i64) |
1342 | return 0; |
1343 | |
1344 | unsigned ShiftImm; |
1345 | if (isUInt<12>(x: Imm)) |
1346 | ShiftImm = 0; |
1347 | else if ((Imm & 0xfff000) == Imm) { |
1348 | ShiftImm = 12; |
1349 | Imm >>= 12; |
1350 | } else |
1351 | return 0; |
1352 | |
1353 | static const unsigned OpcTable[2][2][2] = { |
1354 | { { AArch64::SUBWri, AArch64::SUBXri }, |
1355 | { AArch64::ADDWri, AArch64::ADDXri } }, |
1356 | { { AArch64::SUBSWri, AArch64::SUBSXri }, |
1357 | { AArch64::ADDSWri, AArch64::ADDSXri } } |
1358 | }; |
1359 | bool Is64Bit = RetVT == MVT::i64; |
1360 | unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; |
1361 | const TargetRegisterClass *RC; |
1362 | if (SetFlags) |
1363 | RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; |
1364 | else |
1365 | RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass; |
1366 | unsigned ResultReg; |
1367 | if (WantResult) |
1368 | ResultReg = createResultReg(RC); |
1369 | else |
1370 | ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; |
1371 | |
1372 | const MCInstrDesc &II = TII.get(Opcode: Opc); |
1373 | LHSReg = constrainOperandRegClass(II, Op: LHSReg, OpNum: II.getNumDefs()); |
1374 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II, DestReg: ResultReg) |
1375 | .addReg(RegNo: LHSReg) |
1376 | .addImm(Val: Imm) |
1377 | .addImm(Val: getShifterImm(ST: AArch64_AM::LSL, Imm: ShiftImm)); |
1378 | return ResultReg; |
1379 | } |
1380 | |
1381 | unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg, |
1382 | unsigned RHSReg, |
1383 | AArch64_AM::ShiftExtendType ShiftType, |
1384 | uint64_t ShiftImm, bool SetFlags, |
1385 | bool WantResult) { |
1386 | assert(LHSReg && RHSReg && "Invalid register number." ); |
1387 | assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP && |
1388 | RHSReg != AArch64::SP && RHSReg != AArch64::WSP); |
1389 | |
1390 | if (RetVT != MVT::i32 && RetVT != MVT::i64) |
1391 | return 0; |
1392 | |
1393 | // Don't deal with undefined shifts. |
1394 | if (ShiftImm >= RetVT.getSizeInBits()) |
1395 | return 0; |
1396 | |
1397 | static const unsigned OpcTable[2][2][2] = { |
1398 | { { AArch64::SUBWrs, AArch64::SUBXrs }, |
1399 | { AArch64::ADDWrs, AArch64::ADDXrs } }, |
1400 | { { AArch64::SUBSWrs, AArch64::SUBSXrs }, |
1401 | { AArch64::ADDSWrs, AArch64::ADDSXrs } } |
1402 | }; |
1403 | bool Is64Bit = RetVT == MVT::i64; |
1404 | unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; |
1405 | const TargetRegisterClass *RC = |
1406 | Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; |
1407 | unsigned ResultReg; |
1408 | if (WantResult) |
1409 | ResultReg = createResultReg(RC); |
1410 | else |
1411 | ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; |
1412 | |
1413 | const MCInstrDesc &II = TII.get(Opcode: Opc); |
1414 | LHSReg = constrainOperandRegClass(II, Op: LHSReg, OpNum: II.getNumDefs()); |
1415 | RHSReg = constrainOperandRegClass(II, Op: RHSReg, OpNum: II.getNumDefs() + 1); |
1416 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II, DestReg: ResultReg) |
1417 | .addReg(RegNo: LHSReg) |
1418 | .addReg(RegNo: RHSReg) |
1419 | .addImm(Val: getShifterImm(ST: ShiftType, Imm: ShiftImm)); |
1420 | return ResultReg; |
1421 | } |
1422 | |
1423 | unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg, |
1424 | unsigned RHSReg, |
1425 | AArch64_AM::ShiftExtendType ExtType, |
1426 | uint64_t ShiftImm, bool SetFlags, |
1427 | bool WantResult) { |
1428 | assert(LHSReg && RHSReg && "Invalid register number." ); |
1429 | assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR && |
1430 | RHSReg != AArch64::XZR && RHSReg != AArch64::WZR); |
1431 | |
1432 | if (RetVT != MVT::i32 && RetVT != MVT::i64) |
1433 | return 0; |
1434 | |
1435 | if (ShiftImm >= 4) |
1436 | return 0; |
1437 | |
1438 | static const unsigned OpcTable[2][2][2] = { |
1439 | { { AArch64::SUBWrx, AArch64::SUBXrx }, |
1440 | { AArch64::ADDWrx, AArch64::ADDXrx } }, |
1441 | { { AArch64::SUBSWrx, AArch64::SUBSXrx }, |
1442 | { AArch64::ADDSWrx, AArch64::ADDSXrx } } |
1443 | }; |
1444 | bool Is64Bit = RetVT == MVT::i64; |
1445 | unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; |
1446 | const TargetRegisterClass *RC = nullptr; |
1447 | if (SetFlags) |
1448 | RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; |
1449 | else |
1450 | RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass; |
1451 | unsigned ResultReg; |
1452 | if (WantResult) |
1453 | ResultReg = createResultReg(RC); |
1454 | else |
1455 | ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; |
1456 | |
1457 | const MCInstrDesc &II = TII.get(Opcode: Opc); |
1458 | LHSReg = constrainOperandRegClass(II, Op: LHSReg, OpNum: II.getNumDefs()); |
1459 | RHSReg = constrainOperandRegClass(II, Op: RHSReg, OpNum: II.getNumDefs() + 1); |
1460 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II, DestReg: ResultReg) |
1461 | .addReg(RegNo: LHSReg) |
1462 | .addReg(RegNo: RHSReg) |
1463 | .addImm(Val: getArithExtendImm(ET: ExtType, Imm: ShiftImm)); |
1464 | return ResultReg; |
1465 | } |
1466 | |
1467 | bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) { |
1468 | Type *Ty = LHS->getType(); |
1469 | EVT EVT = TLI.getValueType(DL, Ty, AllowUnknown: true); |
1470 | if (!EVT.isSimple()) |
1471 | return false; |
1472 | MVT VT = EVT.getSimpleVT(); |
1473 | |
1474 | switch (VT.SimpleTy) { |
1475 | default: |
1476 | return false; |
1477 | case MVT::i1: |
1478 | case MVT::i8: |
1479 | case MVT::i16: |
1480 | case MVT::i32: |
1481 | case MVT::i64: |
1482 | return emitICmp(RetVT: VT, LHS, RHS, IsZExt); |
1483 | case MVT::f32: |
1484 | case MVT::f64: |
1485 | return emitFCmp(RetVT: VT, LHS, RHS); |
1486 | } |
1487 | } |
1488 | |
1489 | bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, |
1490 | bool IsZExt) { |
1491 | return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false, |
1492 | IsZExt) != 0; |
1493 | } |
1494 | |
1495 | bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm) { |
1496 | return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, Imm, |
1497 | /*SetFlags=*/true, /*WantResult=*/false) != 0; |
1498 | } |
1499 | |
1500 | bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) { |
1501 | if (RetVT != MVT::f32 && RetVT != MVT::f64) |
1502 | return false; |
1503 | |
1504 | // Check to see if the 2nd operand is a constant that we can encode directly |
1505 | // in the compare. |
1506 | bool UseImm = false; |
1507 | if (const auto *CFP = dyn_cast<ConstantFP>(Val: RHS)) |
1508 | if (CFP->isZero() && !CFP->isNegative()) |
1509 | UseImm = true; |
1510 | |
1511 | Register LHSReg = getRegForValue(V: LHS); |
1512 | if (!LHSReg) |
1513 | return false; |
1514 | |
1515 | if (UseImm) { |
1516 | unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri; |
1517 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc)) |
1518 | .addReg(RegNo: LHSReg); |
1519 | return true; |
1520 | } |
1521 | |
1522 | Register RHSReg = getRegForValue(V: RHS); |
1523 | if (!RHSReg) |
1524 | return false; |
1525 | |
1526 | unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr; |
1527 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc)) |
1528 | .addReg(RegNo: LHSReg) |
1529 | .addReg(RegNo: RHSReg); |
1530 | return true; |
1531 | } |
1532 | |
1533 | unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS, |
1534 | bool SetFlags, bool WantResult, bool IsZExt) { |
1535 | return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult, |
1536 | IsZExt); |
1537 | } |
1538 | |
1539 | /// This method is a wrapper to simplify add emission. |
1540 | /// |
1541 | /// First try to emit an add with an immediate operand using emitAddSub_ri. If |
1542 | /// that fails, then try to materialize the immediate into a register and use |
1543 | /// emitAddSub_rr instead. |
1544 | unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm) { |
1545 | unsigned ResultReg; |
1546 | if (Imm < 0) |
1547 | ResultReg = emitAddSub_ri(UseAdd: false, RetVT: VT, LHSReg: Op0, Imm: -Imm); |
1548 | else |
1549 | ResultReg = emitAddSub_ri(UseAdd: true, RetVT: VT, LHSReg: Op0, Imm); |
1550 | |
1551 | if (ResultReg) |
1552 | return ResultReg; |
1553 | |
1554 | unsigned CReg = fastEmit_i(VT, RetVT: VT, Opcode: ISD::Constant, imm0: Imm); |
1555 | if (!CReg) |
1556 | return 0; |
1557 | |
1558 | ResultReg = emitAddSub_rr(UseAdd: true, RetVT: VT, LHSReg: Op0, RHSReg: CReg); |
1559 | return ResultReg; |
1560 | } |
1561 | |
1562 | unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS, |
1563 | bool SetFlags, bool WantResult, bool IsZExt) { |
1564 | return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult, |
1565 | IsZExt); |
1566 | } |
1567 | |
1568 | unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg, |
1569 | unsigned RHSReg, bool WantResult) { |
1570 | return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, RHSReg, |
1571 | /*SetFlags=*/true, WantResult); |
1572 | } |
1573 | |
1574 | unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg, |
1575 | unsigned RHSReg, |
1576 | AArch64_AM::ShiftExtendType ShiftType, |
1577 | uint64_t ShiftImm, bool WantResult) { |
1578 | return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, RHSReg, ShiftType, |
1579 | ShiftImm, /*SetFlags=*/true, WantResult); |
1580 | } |
1581 | |
1582 | unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT, |
1583 | const Value *LHS, const Value *RHS) { |
1584 | // Canonicalize immediates to the RHS first. |
1585 | if (isa<ConstantInt>(Val: LHS) && !isa<ConstantInt>(Val: RHS)) |
1586 | std::swap(a&: LHS, b&: RHS); |
1587 | |
1588 | // Canonicalize mul by power-of-2 to the RHS. |
1589 | if (LHS->hasOneUse() && isValueAvailable(V: LHS)) |
1590 | if (isMulPowOf2(I: LHS)) |
1591 | std::swap(a&: LHS, b&: RHS); |
1592 | |
1593 | // Canonicalize shift immediate to the RHS. |
1594 | if (LHS->hasOneUse() && isValueAvailable(V: LHS)) |
1595 | if (const auto *SI = dyn_cast<ShlOperator>(Val: LHS)) |
1596 | if (isa<ConstantInt>(Val: SI->getOperand(i_nocapture: 1))) |
1597 | std::swap(a&: LHS, b&: RHS); |
1598 | |
1599 | Register LHSReg = getRegForValue(V: LHS); |
1600 | if (!LHSReg) |
1601 | return 0; |
1602 | |
1603 | unsigned ResultReg = 0; |
1604 | if (const auto *C = dyn_cast<ConstantInt>(Val: RHS)) { |
1605 | uint64_t Imm = C->getZExtValue(); |
1606 | ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, Imm); |
1607 | } |
1608 | if (ResultReg) |
1609 | return ResultReg; |
1610 | |
1611 | // Check if the mul can be folded into the instruction. |
1612 | if (RHS->hasOneUse() && isValueAvailable(V: RHS)) { |
1613 | if (isMulPowOf2(I: RHS)) { |
1614 | const Value *MulLHS = cast<MulOperator>(Val: RHS)->getOperand(i_nocapture: 0); |
1615 | const Value *MulRHS = cast<MulOperator>(Val: RHS)->getOperand(i_nocapture: 1); |
1616 | |
1617 | if (const auto *C = dyn_cast<ConstantInt>(Val: MulLHS)) |
1618 | if (C->getValue().isPowerOf2()) |
1619 | std::swap(a&: MulLHS, b&: MulRHS); |
1620 | |
1621 | assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt." ); |
1622 | uint64_t ShiftVal = cast<ConstantInt>(Val: MulRHS)->getValue().logBase2(); |
1623 | |
1624 | Register RHSReg = getRegForValue(V: MulLHS); |
1625 | if (!RHSReg) |
1626 | return 0; |
1627 | ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftImm: ShiftVal); |
1628 | if (ResultReg) |
1629 | return ResultReg; |
1630 | } |
1631 | } |
1632 | |
1633 | // Check if the shift can be folded into the instruction. |
1634 | if (RHS->hasOneUse() && isValueAvailable(V: RHS)) { |
1635 | if (const auto *SI = dyn_cast<ShlOperator>(Val: RHS)) |
1636 | if (const auto *C = dyn_cast<ConstantInt>(Val: SI->getOperand(i_nocapture: 1))) { |
1637 | uint64_t ShiftVal = C->getZExtValue(); |
1638 | Register RHSReg = getRegForValue(V: SI->getOperand(i_nocapture: 0)); |
1639 | if (!RHSReg) |
1640 | return 0; |
1641 | ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftImm: ShiftVal); |
1642 | if (ResultReg) |
1643 | return ResultReg; |
1644 | } |
1645 | } |
1646 | |
1647 | Register RHSReg = getRegForValue(V: RHS); |
1648 | if (!RHSReg) |
1649 | return 0; |
1650 | |
1651 | MVT VT = std::max(a: MVT::i32, b: RetVT.SimpleTy); |
1652 | ResultReg = fastEmit_rr(VT, RetVT: VT, Opcode: ISDOpc, Op0: LHSReg, Op1: RHSReg); |
1653 | if (RetVT >= MVT::i8 && RetVT <= MVT::i16) { |
1654 | uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff; |
1655 | ResultReg = emitAnd_ri(RetVT: MVT::i32, LHSReg: ResultReg, Imm: Mask); |
1656 | } |
1657 | return ResultReg; |
1658 | } |
1659 | |
1660 | unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, |
1661 | unsigned LHSReg, uint64_t Imm) { |
1662 | static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR), |
1663 | "ISD nodes are not consecutive!" ); |
1664 | static const unsigned OpcTable[3][2] = { |
1665 | { AArch64::ANDWri, AArch64::ANDXri }, |
1666 | { AArch64::ORRWri, AArch64::ORRXri }, |
1667 | { AArch64::EORWri, AArch64::EORXri } |
1668 | }; |
1669 | const TargetRegisterClass *RC; |
1670 | unsigned Opc; |
1671 | unsigned RegSize; |
1672 | switch (RetVT.SimpleTy) { |
1673 | default: |
1674 | return 0; |
1675 | case MVT::i1: |
1676 | case MVT::i8: |
1677 | case MVT::i16: |
1678 | case MVT::i32: { |
1679 | unsigned Idx = ISDOpc - ISD::AND; |
1680 | Opc = OpcTable[Idx][0]; |
1681 | RC = &AArch64::GPR32spRegClass; |
1682 | RegSize = 32; |
1683 | break; |
1684 | } |
1685 | case MVT::i64: |
1686 | Opc = OpcTable[ISDOpc - ISD::AND][1]; |
1687 | RC = &AArch64::GPR64spRegClass; |
1688 | RegSize = 64; |
1689 | break; |
1690 | } |
1691 | |
1692 | if (!AArch64_AM::isLogicalImmediate(imm: Imm, regSize: RegSize)) |
1693 | return 0; |
1694 | |
1695 | Register ResultReg = |
1696 | fastEmitInst_ri(MachineInstOpcode: Opc, RC, Op0: LHSReg, |
1697 | Imm: AArch64_AM::encodeLogicalImmediate(imm: Imm, regSize: RegSize)); |
1698 | if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) { |
1699 | uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff; |
1700 | ResultReg = emitAnd_ri(RetVT: MVT::i32, LHSReg: ResultReg, Imm: Mask); |
1701 | } |
1702 | return ResultReg; |
1703 | } |
1704 | |
1705 | unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, |
1706 | unsigned LHSReg, unsigned RHSReg, |
1707 | uint64_t ShiftImm) { |
1708 | static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR), |
1709 | "ISD nodes are not consecutive!" ); |
1710 | static const unsigned OpcTable[3][2] = { |
1711 | { AArch64::ANDWrs, AArch64::ANDXrs }, |
1712 | { AArch64::ORRWrs, AArch64::ORRXrs }, |
1713 | { AArch64::EORWrs, AArch64::EORXrs } |
1714 | }; |
1715 | |
1716 | // Don't deal with undefined shifts. |
1717 | if (ShiftImm >= RetVT.getSizeInBits()) |
1718 | return 0; |
1719 | |
1720 | const TargetRegisterClass *RC; |
1721 | unsigned Opc; |
1722 | switch (RetVT.SimpleTy) { |
1723 | default: |
1724 | return 0; |
1725 | case MVT::i1: |
1726 | case MVT::i8: |
1727 | case MVT::i16: |
1728 | case MVT::i32: |
1729 | Opc = OpcTable[ISDOpc - ISD::AND][0]; |
1730 | RC = &AArch64::GPR32RegClass; |
1731 | break; |
1732 | case MVT::i64: |
1733 | Opc = OpcTable[ISDOpc - ISD::AND][1]; |
1734 | RC = &AArch64::GPR64RegClass; |
1735 | break; |
1736 | } |
1737 | Register ResultReg = |
1738 | fastEmitInst_rri(MachineInstOpcode: Opc, RC, Op0: LHSReg, Op1: RHSReg, |
1739 | Imm: AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: ShiftImm)); |
1740 | if (RetVT >= MVT::i8 && RetVT <= MVT::i16) { |
1741 | uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff; |
1742 | ResultReg = emitAnd_ri(RetVT: MVT::i32, LHSReg: ResultReg, Imm: Mask); |
1743 | } |
1744 | return ResultReg; |
1745 | } |
1746 | |
1747 | unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, |
1748 | uint64_t Imm) { |
1749 | return emitLogicalOp_ri(ISDOpc: ISD::AND, RetVT, LHSReg, Imm); |
1750 | } |
1751 | |
1752 | unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr, |
1753 | bool WantZExt, MachineMemOperand *MMO) { |
1754 | if (!TLI.allowsMisalignedMemoryAccesses(VT)) |
1755 | return 0; |
1756 | |
1757 | // Simplify this down to something we can handle. |
1758 | if (!simplifyAddress(Addr, VT)) |
1759 | return 0; |
1760 | |
1761 | unsigned ScaleFactor = getImplicitScaleFactor(VT); |
1762 | if (!ScaleFactor) |
1763 | llvm_unreachable("Unexpected value type." ); |
1764 | |
1765 | // Negative offsets require unscaled, 9-bit, signed immediate offsets. |
1766 | // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets. |
1767 | bool UseScaled = true; |
1768 | if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) { |
1769 | UseScaled = false; |
1770 | ScaleFactor = 1; |
1771 | } |
1772 | |
1773 | static const unsigned GPOpcTable[2][8][4] = { |
1774 | // Sign-extend. |
1775 | { { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi, |
1776 | AArch64::LDURXi }, |
1777 | { AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi, |
1778 | AArch64::LDURXi }, |
1779 | { AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui, |
1780 | AArch64::LDRXui }, |
1781 | { AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui, |
1782 | AArch64::LDRXui }, |
1783 | { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX, |
1784 | AArch64::LDRXroX }, |
1785 | { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX, |
1786 | AArch64::LDRXroX }, |
1787 | { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW, |
1788 | AArch64::LDRXroW }, |
1789 | { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW, |
1790 | AArch64::LDRXroW } |
1791 | }, |
1792 | // Zero-extend. |
1793 | { { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi, |
1794 | AArch64::LDURXi }, |
1795 | { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi, |
1796 | AArch64::LDURXi }, |
1797 | { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui, |
1798 | AArch64::LDRXui }, |
1799 | { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui, |
1800 | AArch64::LDRXui }, |
1801 | { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX, |
1802 | AArch64::LDRXroX }, |
1803 | { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX, |
1804 | AArch64::LDRXroX }, |
1805 | { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW, |
1806 | AArch64::LDRXroW }, |
1807 | { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW, |
1808 | AArch64::LDRXroW } |
1809 | } |
1810 | }; |
1811 | |
1812 | static const unsigned FPOpcTable[4][2] = { |
1813 | { AArch64::LDURSi, AArch64::LDURDi }, |
1814 | { AArch64::LDRSui, AArch64::LDRDui }, |
1815 | { AArch64::LDRSroX, AArch64::LDRDroX }, |
1816 | { AArch64::LDRSroW, AArch64::LDRDroW } |
1817 | }; |
1818 | |
1819 | unsigned Opc; |
1820 | const TargetRegisterClass *RC; |
1821 | bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() && |
1822 | Addr.getOffsetReg(); |
1823 | unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0; |
1824 | if (Addr.getExtendType() == AArch64_AM::UXTW || |
1825 | Addr.getExtendType() == AArch64_AM::SXTW) |
1826 | Idx++; |
1827 | |
1828 | bool IsRet64Bit = RetVT == MVT::i64; |
1829 | switch (VT.SimpleTy) { |
1830 | default: |
1831 | llvm_unreachable("Unexpected value type." ); |
1832 | case MVT::i1: // Intentional fall-through. |
1833 | case MVT::i8: |
1834 | Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0]; |
1835 | RC = (IsRet64Bit && !WantZExt) ? |
1836 | &AArch64::GPR64RegClass: &AArch64::GPR32RegClass; |
1837 | break; |
1838 | case MVT::i16: |
1839 | Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1]; |
1840 | RC = (IsRet64Bit && !WantZExt) ? |
1841 | &AArch64::GPR64RegClass: &AArch64::GPR32RegClass; |
1842 | break; |
1843 | case MVT::i32: |
1844 | Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2]; |
1845 | RC = (IsRet64Bit && !WantZExt) ? |
1846 | &AArch64::GPR64RegClass: &AArch64::GPR32RegClass; |
1847 | break; |
1848 | case MVT::i64: |
1849 | Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3]; |
1850 | RC = &AArch64::GPR64RegClass; |
1851 | break; |
1852 | case MVT::f32: |
1853 | Opc = FPOpcTable[Idx][0]; |
1854 | RC = &AArch64::FPR32RegClass; |
1855 | break; |
1856 | case MVT::f64: |
1857 | Opc = FPOpcTable[Idx][1]; |
1858 | RC = &AArch64::FPR64RegClass; |
1859 | break; |
1860 | } |
1861 | |
1862 | // Create the base instruction, then add the operands. |
1863 | Register ResultReg = createResultReg(RC); |
1864 | MachineInstrBuilder MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, |
1865 | MCID: TII.get(Opcode: Opc), DestReg: ResultReg); |
1866 | addLoadStoreOperands(Addr, MIB, Flags: MachineMemOperand::MOLoad, ScaleFactor, MMO); |
1867 | |
1868 | // Loading an i1 requires special handling. |
1869 | if (VT == MVT::i1) { |
1870 | unsigned ANDReg = emitAnd_ri(RetVT: MVT::i32, LHSReg: ResultReg, Imm: 1); |
1871 | assert(ANDReg && "Unexpected AND instruction emission failure." ); |
1872 | ResultReg = ANDReg; |
1873 | } |
1874 | |
1875 | // For zero-extending loads to 64bit we emit a 32bit load and then convert |
1876 | // the 32bit reg to a 64bit reg. |
1877 | if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) { |
1878 | Register Reg64 = createResultReg(RC: &AArch64::GPR64RegClass); |
1879 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, |
1880 | MCID: TII.get(Opcode: AArch64::SUBREG_TO_REG), DestReg: Reg64) |
1881 | .addImm(Val: 0) |
1882 | .addReg(RegNo: ResultReg, flags: getKillRegState(B: true)) |
1883 | .addImm(Val: AArch64::sub_32); |
1884 | ResultReg = Reg64; |
1885 | } |
1886 | return ResultReg; |
1887 | } |
1888 | |
1889 | bool AArch64FastISel::selectAddSub(const Instruction *I) { |
1890 | MVT VT; |
1891 | if (!isTypeSupported(Ty: I->getType(), VT, /*IsVectorAllowed=*/true)) |
1892 | return false; |
1893 | |
1894 | if (VT.isVector()) |
1895 | return selectOperator(I, Opcode: I->getOpcode()); |
1896 | |
1897 | unsigned ResultReg; |
1898 | switch (I->getOpcode()) { |
1899 | default: |
1900 | llvm_unreachable("Unexpected instruction." ); |
1901 | case Instruction::Add: |
1902 | ResultReg = emitAdd(RetVT: VT, LHS: I->getOperand(i: 0), RHS: I->getOperand(i: 1)); |
1903 | break; |
1904 | case Instruction::Sub: |
1905 | ResultReg = emitSub(RetVT: VT, LHS: I->getOperand(i: 0), RHS: I->getOperand(i: 1)); |
1906 | break; |
1907 | } |
1908 | if (!ResultReg) |
1909 | return false; |
1910 | |
1911 | updateValueMap(I, Reg: ResultReg); |
1912 | return true; |
1913 | } |
1914 | |
1915 | bool AArch64FastISel::selectLogicalOp(const Instruction *I) { |
1916 | MVT VT; |
1917 | if (!isTypeSupported(Ty: I->getType(), VT, /*IsVectorAllowed=*/true)) |
1918 | return false; |
1919 | |
1920 | if (VT.isVector()) |
1921 | return selectOperator(I, Opcode: I->getOpcode()); |
1922 | |
1923 | unsigned ResultReg; |
1924 | switch (I->getOpcode()) { |
1925 | default: |
1926 | llvm_unreachable("Unexpected instruction." ); |
1927 | case Instruction::And: |
1928 | ResultReg = emitLogicalOp(ISDOpc: ISD::AND, RetVT: VT, LHS: I->getOperand(i: 0), RHS: I->getOperand(i: 1)); |
1929 | break; |
1930 | case Instruction::Or: |
1931 | ResultReg = emitLogicalOp(ISDOpc: ISD::OR, RetVT: VT, LHS: I->getOperand(i: 0), RHS: I->getOperand(i: 1)); |
1932 | break; |
1933 | case Instruction::Xor: |
1934 | ResultReg = emitLogicalOp(ISDOpc: ISD::XOR, RetVT: VT, LHS: I->getOperand(i: 0), RHS: I->getOperand(i: 1)); |
1935 | break; |
1936 | } |
1937 | if (!ResultReg) |
1938 | return false; |
1939 | |
1940 | updateValueMap(I, Reg: ResultReg); |
1941 | return true; |
1942 | } |
1943 | |
1944 | bool AArch64FastISel::selectLoad(const Instruction *I) { |
1945 | MVT VT; |
1946 | // Verify we have a legal type before going any further. Currently, we handle |
1947 | // simple types that will directly fit in a register (i32/f32/i64/f64) or |
1948 | // those that can be sign or zero-extended to a basic operation (i1/i8/i16). |
1949 | if (!isTypeSupported(Ty: I->getType(), VT, /*IsVectorAllowed=*/true) || |
1950 | cast<LoadInst>(Val: I)->isAtomic()) |
1951 | return false; |
1952 | |
1953 | const Value *SV = I->getOperand(i: 0); |
1954 | if (TLI.supportSwiftError()) { |
1955 | // Swifterror values can come from either a function parameter with |
1956 | // swifterror attribute or an alloca with swifterror attribute. |
1957 | if (const Argument *Arg = dyn_cast<Argument>(Val: SV)) { |
1958 | if (Arg->hasSwiftErrorAttr()) |
1959 | return false; |
1960 | } |
1961 | |
1962 | if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(Val: SV)) { |
1963 | if (Alloca->isSwiftError()) |
1964 | return false; |
1965 | } |
1966 | } |
1967 | |
1968 | // See if we can handle this address. |
1969 | Address Addr; |
1970 | if (!computeAddress(Obj: I->getOperand(i: 0), Addr, Ty: I->getType())) |
1971 | return false; |
1972 | |
1973 | // Fold the following sign-/zero-extend into the load instruction. |
1974 | bool WantZExt = true; |
1975 | MVT RetVT = VT; |
1976 | const Value *IntExtVal = nullptr; |
1977 | if (I->hasOneUse()) { |
1978 | if (const auto *ZE = dyn_cast<ZExtInst>(Val: I->use_begin()->getUser())) { |
1979 | if (isTypeSupported(Ty: ZE->getType(), VT&: RetVT)) |
1980 | IntExtVal = ZE; |
1981 | else |
1982 | RetVT = VT; |
1983 | } else if (const auto *SE = dyn_cast<SExtInst>(Val: I->use_begin()->getUser())) { |
1984 | if (isTypeSupported(Ty: SE->getType(), VT&: RetVT)) |
1985 | IntExtVal = SE; |
1986 | else |
1987 | RetVT = VT; |
1988 | WantZExt = false; |
1989 | } |
1990 | } |
1991 | |
1992 | unsigned ResultReg = |
1993 | emitLoad(VT, RetVT, Addr, WantZExt, MMO: createMachineMemOperandFor(I)); |
1994 | if (!ResultReg) |
1995 | return false; |
1996 | |
1997 | // There are a few different cases we have to handle, because the load or the |
1998 | // sign-/zero-extend might not be selected by FastISel if we fall-back to |
1999 | // SelectionDAG. There is also an ordering issue when both instructions are in |
2000 | // different basic blocks. |
2001 | // 1.) The load instruction is selected by FastISel, but the integer extend |
2002 | // not. This usually happens when the integer extend is in a different |
2003 | // basic block and SelectionDAG took over for that basic block. |
2004 | // 2.) The load instruction is selected before the integer extend. This only |
2005 | // happens when the integer extend is in a different basic block. |
2006 | // 3.) The load instruction is selected by SelectionDAG and the integer extend |
2007 | // by FastISel. This happens if there are instructions between the load |
2008 | // and the integer extend that couldn't be selected by FastISel. |
2009 | if (IntExtVal) { |
2010 | // The integer extend hasn't been emitted yet. FastISel or SelectionDAG |
2011 | // could select it. Emit a copy to subreg if necessary. FastISel will remove |
2012 | // it when it selects the integer extend. |
2013 | Register Reg = lookUpRegForValue(V: IntExtVal); |
2014 | auto *MI = MRI.getUniqueVRegDef(Reg); |
2015 | if (!MI) { |
2016 | if (RetVT == MVT::i64 && VT <= MVT::i32) { |
2017 | if (WantZExt) { |
2018 | // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG). |
2019 | MachineBasicBlock::iterator I(std::prev(x: FuncInfo.InsertPt)); |
2020 | ResultReg = std::prev(x: I)->getOperand(i: 0).getReg(); |
2021 | removeDeadCode(I, E: std::next(x: I)); |
2022 | } else |
2023 | ResultReg = fastEmitInst_extractsubreg(RetVT: MVT::i32, Op0: ResultReg, |
2024 | Idx: AArch64::sub_32); |
2025 | } |
2026 | updateValueMap(I, Reg: ResultReg); |
2027 | return true; |
2028 | } |
2029 | |
2030 | // The integer extend has already been emitted - delete all the instructions |
2031 | // that have been emitted by the integer extend lowering code and use the |
2032 | // result from the load instruction directly. |
2033 | while (MI) { |
2034 | Reg = 0; |
2035 | for (auto &Opnd : MI->uses()) { |
2036 | if (Opnd.isReg()) { |
2037 | Reg = Opnd.getReg(); |
2038 | break; |
2039 | } |
2040 | } |
2041 | MachineBasicBlock::iterator I(MI); |
2042 | removeDeadCode(I, E: std::next(x: I)); |
2043 | MI = nullptr; |
2044 | if (Reg) |
2045 | MI = MRI.getUniqueVRegDef(Reg); |
2046 | } |
2047 | updateValueMap(I: IntExtVal, Reg: ResultReg); |
2048 | return true; |
2049 | } |
2050 | |
2051 | updateValueMap(I, Reg: ResultReg); |
2052 | return true; |
2053 | } |
2054 | |
2055 | bool AArch64FastISel::emitStoreRelease(MVT VT, unsigned SrcReg, |
2056 | unsigned AddrReg, |
2057 | MachineMemOperand *MMO) { |
2058 | unsigned Opc; |
2059 | switch (VT.SimpleTy) { |
2060 | default: return false; |
2061 | case MVT::i8: Opc = AArch64::STLRB; break; |
2062 | case MVT::i16: Opc = AArch64::STLRH; break; |
2063 | case MVT::i32: Opc = AArch64::STLRW; break; |
2064 | case MVT::i64: Opc = AArch64::STLRX; break; |
2065 | } |
2066 | |
2067 | const MCInstrDesc &II = TII.get(Opcode: Opc); |
2068 | SrcReg = constrainOperandRegClass(II, Op: SrcReg, OpNum: 0); |
2069 | AddrReg = constrainOperandRegClass(II, Op: AddrReg, OpNum: 1); |
2070 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II) |
2071 | .addReg(RegNo: SrcReg) |
2072 | .addReg(RegNo: AddrReg) |
2073 | .addMemOperand(MMO); |
2074 | return true; |
2075 | } |
2076 | |
2077 | bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr, |
2078 | MachineMemOperand *MMO) { |
2079 | if (!TLI.allowsMisalignedMemoryAccesses(VT)) |
2080 | return false; |
2081 | |
2082 | // Simplify this down to something we can handle. |
2083 | if (!simplifyAddress(Addr, VT)) |
2084 | return false; |
2085 | |
2086 | unsigned ScaleFactor = getImplicitScaleFactor(VT); |
2087 | if (!ScaleFactor) |
2088 | llvm_unreachable("Unexpected value type." ); |
2089 | |
2090 | // Negative offsets require unscaled, 9-bit, signed immediate offsets. |
2091 | // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets. |
2092 | bool UseScaled = true; |
2093 | if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) { |
2094 | UseScaled = false; |
2095 | ScaleFactor = 1; |
2096 | } |
2097 | |
2098 | static const unsigned OpcTable[4][6] = { |
2099 | { AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi, |
2100 | AArch64::STURSi, AArch64::STURDi }, |
2101 | { AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui, |
2102 | AArch64::STRSui, AArch64::STRDui }, |
2103 | { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX, |
2104 | AArch64::STRSroX, AArch64::STRDroX }, |
2105 | { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW, |
2106 | AArch64::STRSroW, AArch64::STRDroW } |
2107 | }; |
2108 | |
2109 | unsigned Opc; |
2110 | bool VTIsi1 = false; |
2111 | bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() && |
2112 | Addr.getOffsetReg(); |
2113 | unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0; |
2114 | if (Addr.getExtendType() == AArch64_AM::UXTW || |
2115 | Addr.getExtendType() == AArch64_AM::SXTW) |
2116 | Idx++; |
2117 | |
2118 | switch (VT.SimpleTy) { |
2119 | default: llvm_unreachable("Unexpected value type." ); |
2120 | case MVT::i1: VTIsi1 = true; [[fallthrough]]; |
2121 | case MVT::i8: Opc = OpcTable[Idx][0]; break; |
2122 | case MVT::i16: Opc = OpcTable[Idx][1]; break; |
2123 | case MVT::i32: Opc = OpcTable[Idx][2]; break; |
2124 | case MVT::i64: Opc = OpcTable[Idx][3]; break; |
2125 | case MVT::f32: Opc = OpcTable[Idx][4]; break; |
2126 | case MVT::f64: Opc = OpcTable[Idx][5]; break; |
2127 | } |
2128 | |
2129 | // Storing an i1 requires special handling. |
2130 | if (VTIsi1 && SrcReg != AArch64::WZR) { |
2131 | unsigned ANDReg = emitAnd_ri(RetVT: MVT::i32, LHSReg: SrcReg, Imm: 1); |
2132 | assert(ANDReg && "Unexpected AND instruction emission failure." ); |
2133 | SrcReg = ANDReg; |
2134 | } |
2135 | // Create the base instruction, then add the operands. |
2136 | const MCInstrDesc &II = TII.get(Opcode: Opc); |
2137 | SrcReg = constrainOperandRegClass(II, Op: SrcReg, OpNum: II.getNumDefs()); |
2138 | MachineInstrBuilder MIB = |
2139 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II).addReg(RegNo: SrcReg); |
2140 | addLoadStoreOperands(Addr, MIB, Flags: MachineMemOperand::MOStore, ScaleFactor, MMO); |
2141 | |
2142 | return true; |
2143 | } |
2144 | |
2145 | bool AArch64FastISel::selectStore(const Instruction *I) { |
2146 | MVT VT; |
2147 | const Value *Op0 = I->getOperand(i: 0); |
2148 | // Verify we have a legal type before going any further. Currently, we handle |
2149 | // simple types that will directly fit in a register (i32/f32/i64/f64) or |
2150 | // those that can be sign or zero-extended to a basic operation (i1/i8/i16). |
2151 | if (!isTypeSupported(Ty: Op0->getType(), VT, /*IsVectorAllowed=*/true)) |
2152 | return false; |
2153 | |
2154 | const Value *PtrV = I->getOperand(i: 1); |
2155 | if (TLI.supportSwiftError()) { |
2156 | // Swifterror values can come from either a function parameter with |
2157 | // swifterror attribute or an alloca with swifterror attribute. |
2158 | if (const Argument *Arg = dyn_cast<Argument>(Val: PtrV)) { |
2159 | if (Arg->hasSwiftErrorAttr()) |
2160 | return false; |
2161 | } |
2162 | |
2163 | if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(Val: PtrV)) { |
2164 | if (Alloca->isSwiftError()) |
2165 | return false; |
2166 | } |
2167 | } |
2168 | |
2169 | // Get the value to be stored into a register. Use the zero register directly |
2170 | // when possible to avoid an unnecessary copy and a wasted register. |
2171 | unsigned SrcReg = 0; |
2172 | if (const auto *CI = dyn_cast<ConstantInt>(Val: Op0)) { |
2173 | if (CI->isZero()) |
2174 | SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; |
2175 | } else if (const auto *CF = dyn_cast<ConstantFP>(Val: Op0)) { |
2176 | if (CF->isZero() && !CF->isNegative()) { |
2177 | VT = MVT::getIntegerVT(BitWidth: VT.getSizeInBits()); |
2178 | SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; |
2179 | } |
2180 | } |
2181 | |
2182 | if (!SrcReg) |
2183 | SrcReg = getRegForValue(V: Op0); |
2184 | |
2185 | if (!SrcReg) |
2186 | return false; |
2187 | |
2188 | auto *SI = cast<StoreInst>(Val: I); |
2189 | |
2190 | // Try to emit a STLR for seq_cst/release. |
2191 | if (SI->isAtomic()) { |
2192 | AtomicOrdering Ord = SI->getOrdering(); |
2193 | // The non-atomic instructions are sufficient for relaxed stores. |
2194 | if (isReleaseOrStronger(AO: Ord)) { |
2195 | // The STLR addressing mode only supports a base reg; pass that directly. |
2196 | Register AddrReg = getRegForValue(V: PtrV); |
2197 | return emitStoreRelease(VT, SrcReg, AddrReg, |
2198 | MMO: createMachineMemOperandFor(I)); |
2199 | } |
2200 | } |
2201 | |
2202 | // See if we can handle this address. |
2203 | Address Addr; |
2204 | if (!computeAddress(Obj: PtrV, Addr, Ty: Op0->getType())) |
2205 | return false; |
2206 | |
2207 | if (!emitStore(VT, SrcReg, Addr, MMO: createMachineMemOperandFor(I))) |
2208 | return false; |
2209 | return true; |
2210 | } |
2211 | |
2212 | static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) { |
2213 | switch (Pred) { |
2214 | case CmpInst::FCMP_ONE: |
2215 | case CmpInst::FCMP_UEQ: |
2216 | default: |
2217 | // AL is our "false" for now. The other two need more compares. |
2218 | return AArch64CC::AL; |
2219 | case CmpInst::ICMP_EQ: |
2220 | case CmpInst::FCMP_OEQ: |
2221 | return AArch64CC::EQ; |
2222 | case CmpInst::ICMP_SGT: |
2223 | case CmpInst::FCMP_OGT: |
2224 | return AArch64CC::GT; |
2225 | case CmpInst::ICMP_SGE: |
2226 | case CmpInst::FCMP_OGE: |
2227 | return AArch64CC::GE; |
2228 | case CmpInst::ICMP_UGT: |
2229 | case CmpInst::FCMP_UGT: |
2230 | return AArch64CC::HI; |
2231 | case CmpInst::FCMP_OLT: |
2232 | return AArch64CC::MI; |
2233 | case CmpInst::ICMP_ULE: |
2234 | case CmpInst::FCMP_OLE: |
2235 | return AArch64CC::LS; |
2236 | case CmpInst::FCMP_ORD: |
2237 | return AArch64CC::VC; |
2238 | case CmpInst::FCMP_UNO: |
2239 | return AArch64CC::VS; |
2240 | case CmpInst::FCMP_UGE: |
2241 | return AArch64CC::PL; |
2242 | case CmpInst::ICMP_SLT: |
2243 | case CmpInst::FCMP_ULT: |
2244 | return AArch64CC::LT; |
2245 | case CmpInst::ICMP_SLE: |
2246 | case CmpInst::FCMP_ULE: |
2247 | return AArch64CC::LE; |
2248 | case CmpInst::FCMP_UNE: |
2249 | case CmpInst::ICMP_NE: |
2250 | return AArch64CC::NE; |
2251 | case CmpInst::ICMP_UGE: |
2252 | return AArch64CC::HS; |
2253 | case CmpInst::ICMP_ULT: |
2254 | return AArch64CC::LO; |
2255 | } |
2256 | } |
2257 | |
2258 | /// Try to emit a combined compare-and-branch instruction. |
2259 | bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) { |
2260 | // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions |
2261 | // will not be produced, as they are conditional branch instructions that do |
2262 | // not set flags. |
2263 | if (FuncInfo.MF->getFunction().hasFnAttribute( |
2264 | Kind: Attribute::SpeculativeLoadHardening)) |
2265 | return false; |
2266 | |
2267 | assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction" ); |
2268 | const CmpInst *CI = cast<CmpInst>(Val: BI->getCondition()); |
2269 | CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); |
2270 | |
2271 | const Value *LHS = CI->getOperand(i_nocapture: 0); |
2272 | const Value *RHS = CI->getOperand(i_nocapture: 1); |
2273 | |
2274 | MVT VT; |
2275 | if (!isTypeSupported(Ty: LHS->getType(), VT)) |
2276 | return false; |
2277 | |
2278 | unsigned BW = VT.getSizeInBits(); |
2279 | if (BW > 64) |
2280 | return false; |
2281 | |
2282 | MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(i: 0)]; |
2283 | MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(i: 1)]; |
2284 | |
2285 | // Try to take advantage of fallthrough opportunities. |
2286 | if (FuncInfo.MBB->isLayoutSuccessor(MBB: TBB)) { |
2287 | std::swap(a&: TBB, b&: FBB); |
2288 | Predicate = CmpInst::getInversePredicate(pred: Predicate); |
2289 | } |
2290 | |
2291 | int TestBit = -1; |
2292 | bool IsCmpNE; |
2293 | switch (Predicate) { |
2294 | default: |
2295 | return false; |
2296 | case CmpInst::ICMP_EQ: |
2297 | case CmpInst::ICMP_NE: |
2298 | if (isa<Constant>(Val: LHS) && cast<Constant>(Val: LHS)->isNullValue()) |
2299 | std::swap(a&: LHS, b&: RHS); |
2300 | |
2301 | if (!isa<Constant>(Val: RHS) || !cast<Constant>(Val: RHS)->isNullValue()) |
2302 | return false; |
2303 | |
2304 | if (const auto *AI = dyn_cast<BinaryOperator>(Val: LHS)) |
2305 | if (AI->getOpcode() == Instruction::And && isValueAvailable(V: AI)) { |
2306 | const Value *AndLHS = AI->getOperand(i_nocapture: 0); |
2307 | const Value *AndRHS = AI->getOperand(i_nocapture: 1); |
2308 | |
2309 | if (const auto *C = dyn_cast<ConstantInt>(Val: AndLHS)) |
2310 | if (C->getValue().isPowerOf2()) |
2311 | std::swap(a&: AndLHS, b&: AndRHS); |
2312 | |
2313 | if (const auto *C = dyn_cast<ConstantInt>(Val: AndRHS)) |
2314 | if (C->getValue().isPowerOf2()) { |
2315 | TestBit = C->getValue().logBase2(); |
2316 | LHS = AndLHS; |
2317 | } |
2318 | } |
2319 | |
2320 | if (VT == MVT::i1) |
2321 | TestBit = 0; |
2322 | |
2323 | IsCmpNE = Predicate == CmpInst::ICMP_NE; |
2324 | break; |
2325 | case CmpInst::ICMP_SLT: |
2326 | case CmpInst::ICMP_SGE: |
2327 | if (!isa<Constant>(Val: RHS) || !cast<Constant>(Val: RHS)->isNullValue()) |
2328 | return false; |
2329 | |
2330 | TestBit = BW - 1; |
2331 | IsCmpNE = Predicate == CmpInst::ICMP_SLT; |
2332 | break; |
2333 | case CmpInst::ICMP_SGT: |
2334 | case CmpInst::ICMP_SLE: |
2335 | if (!isa<ConstantInt>(Val: RHS)) |
2336 | return false; |
2337 | |
2338 | if (cast<ConstantInt>(Val: RHS)->getValue() != APInt(BW, -1, true)) |
2339 | return false; |
2340 | |
2341 | TestBit = BW - 1; |
2342 | IsCmpNE = Predicate == CmpInst::ICMP_SLE; |
2343 | break; |
2344 | } // end switch |
2345 | |
2346 | static const unsigned OpcTable[2][2][2] = { |
2347 | { {AArch64::CBZW, AArch64::CBZX }, |
2348 | {AArch64::CBNZW, AArch64::CBNZX} }, |
2349 | { {AArch64::TBZW, AArch64::TBZX }, |
2350 | {AArch64::TBNZW, AArch64::TBNZX} } |
2351 | }; |
2352 | |
2353 | bool IsBitTest = TestBit != -1; |
2354 | bool Is64Bit = BW == 64; |
2355 | if (TestBit < 32 && TestBit >= 0) |
2356 | Is64Bit = false; |
2357 | |
2358 | unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit]; |
2359 | const MCInstrDesc &II = TII.get(Opcode: Opc); |
2360 | |
2361 | Register SrcReg = getRegForValue(V: LHS); |
2362 | if (!SrcReg) |
2363 | return false; |
2364 | |
2365 | if (BW == 64 && !Is64Bit) |
2366 | SrcReg = fastEmitInst_extractsubreg(RetVT: MVT::i32, Op0: SrcReg, Idx: AArch64::sub_32); |
2367 | |
2368 | if ((BW < 32) && !IsBitTest) |
2369 | SrcReg = emitIntExt(SrcVT: VT, SrcReg, DestVT: MVT::i32, /*isZExt=*/true); |
2370 | |
2371 | // Emit the combined compare and branch instruction. |
2372 | SrcReg = constrainOperandRegClass(II, Op: SrcReg, OpNum: II.getNumDefs()); |
2373 | MachineInstrBuilder MIB = |
2374 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc)) |
2375 | .addReg(RegNo: SrcReg); |
2376 | if (IsBitTest) |
2377 | MIB.addImm(Val: TestBit); |
2378 | MIB.addMBB(MBB: TBB); |
2379 | |
2380 | finishCondBranch(BranchBB: BI->getParent(), TrueMBB: TBB, FalseMBB: FBB); |
2381 | return true; |
2382 | } |
2383 | |
2384 | bool AArch64FastISel::selectBranch(const Instruction *I) { |
2385 | const BranchInst *BI = cast<BranchInst>(Val: I); |
2386 | if (BI->isUnconditional()) { |
2387 | MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(i: 0)]; |
2388 | fastEmitBranch(MSucc, DbgLoc: BI->getDebugLoc()); |
2389 | return true; |
2390 | } |
2391 | |
2392 | MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(i: 0)]; |
2393 | MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(i: 1)]; |
2394 | |
2395 | if (const CmpInst *CI = dyn_cast<CmpInst>(Val: BI->getCondition())) { |
2396 | if (CI->hasOneUse() && isValueAvailable(V: CI)) { |
2397 | // Try to optimize or fold the cmp. |
2398 | CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); |
2399 | switch (Predicate) { |
2400 | default: |
2401 | break; |
2402 | case CmpInst::FCMP_FALSE: |
2403 | fastEmitBranch(MSucc: FBB, DbgLoc: MIMD.getDL()); |
2404 | return true; |
2405 | case CmpInst::FCMP_TRUE: |
2406 | fastEmitBranch(MSucc: TBB, DbgLoc: MIMD.getDL()); |
2407 | return true; |
2408 | } |
2409 | |
2410 | // Try to emit a combined compare-and-branch first. |
2411 | if (emitCompareAndBranch(BI)) |
2412 | return true; |
2413 | |
2414 | // Try to take advantage of fallthrough opportunities. |
2415 | if (FuncInfo.MBB->isLayoutSuccessor(MBB: TBB)) { |
2416 | std::swap(a&: TBB, b&: FBB); |
2417 | Predicate = CmpInst::getInversePredicate(pred: Predicate); |
2418 | } |
2419 | |
2420 | // Emit the cmp. |
2421 | if (!emitCmp(LHS: CI->getOperand(i_nocapture: 0), RHS: CI->getOperand(i_nocapture: 1), IsZExt: CI->isUnsigned())) |
2422 | return false; |
2423 | |
2424 | // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch |
2425 | // instruction. |
2426 | AArch64CC::CondCode CC = getCompareCC(Pred: Predicate); |
2427 | AArch64CC::CondCode = AArch64CC::AL; |
2428 | switch (Predicate) { |
2429 | default: |
2430 | break; |
2431 | case CmpInst::FCMP_UEQ: |
2432 | ExtraCC = AArch64CC::EQ; |
2433 | CC = AArch64CC::VS; |
2434 | break; |
2435 | case CmpInst::FCMP_ONE: |
2436 | ExtraCC = AArch64CC::MI; |
2437 | CC = AArch64CC::GT; |
2438 | break; |
2439 | } |
2440 | assert((CC != AArch64CC::AL) && "Unexpected condition code." ); |
2441 | |
2442 | // Emit the extra branch for FCMP_UEQ and FCMP_ONE. |
2443 | if (ExtraCC != AArch64CC::AL) { |
2444 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AArch64::Bcc)) |
2445 | .addImm(Val: ExtraCC) |
2446 | .addMBB(MBB: TBB); |
2447 | } |
2448 | |
2449 | // Emit the branch. |
2450 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AArch64::Bcc)) |
2451 | .addImm(Val: CC) |
2452 | .addMBB(MBB: TBB); |
2453 | |
2454 | finishCondBranch(BranchBB: BI->getParent(), TrueMBB: TBB, FalseMBB: FBB); |
2455 | return true; |
2456 | } |
2457 | } else if (const auto *CI = dyn_cast<ConstantInt>(Val: BI->getCondition())) { |
2458 | uint64_t Imm = CI->getZExtValue(); |
2459 | MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB; |
2460 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AArch64::B)) |
2461 | .addMBB(MBB: Target); |
2462 | |
2463 | // Obtain the branch probability and add the target to the successor list. |
2464 | if (FuncInfo.BPI) { |
2465 | auto BranchProbability = FuncInfo.BPI->getEdgeProbability( |
2466 | Src: BI->getParent(), Dst: Target->getBasicBlock()); |
2467 | FuncInfo.MBB->addSuccessor(Succ: Target, Prob: BranchProbability); |
2468 | } else |
2469 | FuncInfo.MBB->addSuccessorWithoutProb(Succ: Target); |
2470 | return true; |
2471 | } else { |
2472 | AArch64CC::CondCode CC = AArch64CC::NE; |
2473 | if (foldXALUIntrinsic(CC, I, Cond: BI->getCondition())) { |
2474 | // Fake request the condition, otherwise the intrinsic might be completely |
2475 | // optimized away. |
2476 | Register CondReg = getRegForValue(V: BI->getCondition()); |
2477 | if (!CondReg) |
2478 | return false; |
2479 | |
2480 | // Emit the branch. |
2481 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AArch64::Bcc)) |
2482 | .addImm(Val: CC) |
2483 | .addMBB(MBB: TBB); |
2484 | |
2485 | finishCondBranch(BranchBB: BI->getParent(), TrueMBB: TBB, FalseMBB: FBB); |
2486 | return true; |
2487 | } |
2488 | } |
2489 | |
2490 | Register CondReg = getRegForValue(V: BI->getCondition()); |
2491 | if (CondReg == 0) |
2492 | return false; |
2493 | |
2494 | // i1 conditions come as i32 values, test the lowest bit with tb(n)z. |
2495 | unsigned Opcode = AArch64::TBNZW; |
2496 | if (FuncInfo.MBB->isLayoutSuccessor(MBB: TBB)) { |
2497 | std::swap(a&: TBB, b&: FBB); |
2498 | Opcode = AArch64::TBZW; |
2499 | } |
2500 | |
2501 | const MCInstrDesc &II = TII.get(Opcode); |
2502 | Register ConstrainedCondReg |
2503 | = constrainOperandRegClass(II, Op: CondReg, OpNum: II.getNumDefs()); |
2504 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II) |
2505 | .addReg(RegNo: ConstrainedCondReg) |
2506 | .addImm(Val: 0) |
2507 | .addMBB(MBB: TBB); |
2508 | |
2509 | finishCondBranch(BranchBB: BI->getParent(), TrueMBB: TBB, FalseMBB: FBB); |
2510 | return true; |
2511 | } |
2512 | |
2513 | bool AArch64FastISel::selectIndirectBr(const Instruction *I) { |
2514 | const IndirectBrInst *BI = cast<IndirectBrInst>(Val: I); |
2515 | Register AddrReg = getRegForValue(V: BI->getOperand(i_nocapture: 0)); |
2516 | if (AddrReg == 0) |
2517 | return false; |
2518 | |
2519 | // Authenticated indirectbr is not implemented yet. |
2520 | if (FuncInfo.MF->getFunction().hasFnAttribute(Kind: "ptrauth-indirect-gotos" )) |
2521 | return false; |
2522 | |
2523 | // Emit the indirect branch. |
2524 | const MCInstrDesc &II = TII.get(Opcode: AArch64::BR); |
2525 | AddrReg = constrainOperandRegClass(II, Op: AddrReg, OpNum: II.getNumDefs()); |
2526 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II).addReg(RegNo: AddrReg); |
2527 | |
2528 | // Make sure the CFG is up-to-date. |
2529 | for (const auto *Succ : BI->successors()) |
2530 | FuncInfo.MBB->addSuccessor(Succ: FuncInfo.MBBMap[Succ]); |
2531 | |
2532 | return true; |
2533 | } |
2534 | |
2535 | bool AArch64FastISel::selectCmp(const Instruction *I) { |
2536 | const CmpInst *CI = cast<CmpInst>(Val: I); |
2537 | |
2538 | // Vectors of i1 are weird: bail out. |
2539 | if (CI->getType()->isVectorTy()) |
2540 | return false; |
2541 | |
2542 | // Try to optimize or fold the cmp. |
2543 | CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); |
2544 | unsigned ResultReg = 0; |
2545 | switch (Predicate) { |
2546 | default: |
2547 | break; |
2548 | case CmpInst::FCMP_FALSE: |
2549 | ResultReg = createResultReg(RC: &AArch64::GPR32RegClass); |
2550 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, |
2551 | MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: ResultReg) |
2552 | .addReg(RegNo: AArch64::WZR, flags: getKillRegState(B: true)); |
2553 | break; |
2554 | case CmpInst::FCMP_TRUE: |
2555 | ResultReg = fastEmit_i(VT: MVT::i32, RetVT: MVT::i32, Opcode: ISD::Constant, imm0: 1); |
2556 | break; |
2557 | } |
2558 | |
2559 | if (ResultReg) { |
2560 | updateValueMap(I, Reg: ResultReg); |
2561 | return true; |
2562 | } |
2563 | |
2564 | // Emit the cmp. |
2565 | if (!emitCmp(LHS: CI->getOperand(i_nocapture: 0), RHS: CI->getOperand(i_nocapture: 1), IsZExt: CI->isUnsigned())) |
2566 | return false; |
2567 | |
2568 | ResultReg = createResultReg(RC: &AArch64::GPR32RegClass); |
2569 | |
2570 | // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These |
2571 | // condition codes are inverted, because they are used by CSINC. |
2572 | static unsigned CondCodeTable[2][2] = { |
2573 | { AArch64CC::NE, AArch64CC::VC }, |
2574 | { AArch64CC::PL, AArch64CC::LE } |
2575 | }; |
2576 | unsigned *CondCodes = nullptr; |
2577 | switch (Predicate) { |
2578 | default: |
2579 | break; |
2580 | case CmpInst::FCMP_UEQ: |
2581 | CondCodes = &CondCodeTable[0][0]; |
2582 | break; |
2583 | case CmpInst::FCMP_ONE: |
2584 | CondCodes = &CondCodeTable[1][0]; |
2585 | break; |
2586 | } |
2587 | |
2588 | if (CondCodes) { |
2589 | Register TmpReg1 = createResultReg(RC: &AArch64::GPR32RegClass); |
2590 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AArch64::CSINCWr), |
2591 | DestReg: TmpReg1) |
2592 | .addReg(RegNo: AArch64::WZR, flags: getKillRegState(B: true)) |
2593 | .addReg(RegNo: AArch64::WZR, flags: getKillRegState(B: true)) |
2594 | .addImm(Val: CondCodes[0]); |
2595 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AArch64::CSINCWr), |
2596 | DestReg: ResultReg) |
2597 | .addReg(RegNo: TmpReg1, flags: getKillRegState(B: true)) |
2598 | .addReg(RegNo: AArch64::WZR, flags: getKillRegState(B: true)) |
2599 | .addImm(Val: CondCodes[1]); |
2600 | |
2601 | updateValueMap(I, Reg: ResultReg); |
2602 | return true; |
2603 | } |
2604 | |
2605 | // Now set a register based on the comparison. |
2606 | AArch64CC::CondCode CC = getCompareCC(Pred: Predicate); |
2607 | assert((CC != AArch64CC::AL) && "Unexpected condition code." ); |
2608 | AArch64CC::CondCode invertedCC = getInvertedCondCode(Code: CC); |
2609 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AArch64::CSINCWr), |
2610 | DestReg: ResultReg) |
2611 | .addReg(RegNo: AArch64::WZR, flags: getKillRegState(B: true)) |
2612 | .addReg(RegNo: AArch64::WZR, flags: getKillRegState(B: true)) |
2613 | .addImm(Val: invertedCC); |
2614 | |
2615 | updateValueMap(I, Reg: ResultReg); |
2616 | return true; |
2617 | } |
2618 | |
2619 | /// Optimize selects of i1 if one of the operands has a 'true' or 'false' |
2620 | /// value. |
2621 | bool AArch64FastISel::optimizeSelect(const SelectInst *SI) { |
2622 | if (!SI->getType()->isIntegerTy(Bitwidth: 1)) |
2623 | return false; |
2624 | |
2625 | const Value *Src1Val, *Src2Val; |
2626 | unsigned Opc = 0; |
2627 | bool = false; |
2628 | if (auto *CI = dyn_cast<ConstantInt>(Val: SI->getTrueValue())) { |
2629 | if (CI->isOne()) { |
2630 | Src1Val = SI->getCondition(); |
2631 | Src2Val = SI->getFalseValue(); |
2632 | Opc = AArch64::ORRWrr; |
2633 | } else { |
2634 | assert(CI->isZero()); |
2635 | Src1Val = SI->getFalseValue(); |
2636 | Src2Val = SI->getCondition(); |
2637 | Opc = AArch64::BICWrr; |
2638 | } |
2639 | } else if (auto *CI = dyn_cast<ConstantInt>(Val: SI->getFalseValue())) { |
2640 | if (CI->isOne()) { |
2641 | Src1Val = SI->getCondition(); |
2642 | Src2Val = SI->getTrueValue(); |
2643 | Opc = AArch64::ORRWrr; |
2644 | NeedExtraOp = true; |
2645 | } else { |
2646 | assert(CI->isZero()); |
2647 | Src1Val = SI->getCondition(); |
2648 | Src2Val = SI->getTrueValue(); |
2649 | Opc = AArch64::ANDWrr; |
2650 | } |
2651 | } |
2652 | |
2653 | if (!Opc) |
2654 | return false; |
2655 | |
2656 | Register Src1Reg = getRegForValue(V: Src1Val); |
2657 | if (!Src1Reg) |
2658 | return false; |
2659 | |
2660 | Register Src2Reg = getRegForValue(V: Src2Val); |
2661 | if (!Src2Reg) |
2662 | return false; |
2663 | |
2664 | if (NeedExtraOp) |
2665 | Src1Reg = emitLogicalOp_ri(ISDOpc: ISD::XOR, RetVT: MVT::i32, LHSReg: Src1Reg, Imm: 1); |
2666 | |
2667 | Register ResultReg = fastEmitInst_rr(MachineInstOpcode: Opc, RC: &AArch64::GPR32RegClass, Op0: Src1Reg, |
2668 | Op1: Src2Reg); |
2669 | updateValueMap(I: SI, Reg: ResultReg); |
2670 | return true; |
2671 | } |
2672 | |
2673 | bool AArch64FastISel::selectSelect(const Instruction *I) { |
2674 | assert(isa<SelectInst>(I) && "Expected a select instruction." ); |
2675 | MVT VT; |
2676 | if (!isTypeSupported(Ty: I->getType(), VT)) |
2677 | return false; |
2678 | |
2679 | unsigned Opc; |
2680 | const TargetRegisterClass *RC; |
2681 | switch (VT.SimpleTy) { |
2682 | default: |
2683 | return false; |
2684 | case MVT::i1: |
2685 | case MVT::i8: |
2686 | case MVT::i16: |
2687 | case MVT::i32: |
2688 | Opc = AArch64::CSELWr; |
2689 | RC = &AArch64::GPR32RegClass; |
2690 | break; |
2691 | case MVT::i64: |
2692 | Opc = AArch64::CSELXr; |
2693 | RC = &AArch64::GPR64RegClass; |
2694 | break; |
2695 | case MVT::f32: |
2696 | Opc = AArch64::FCSELSrrr; |
2697 | RC = &AArch64::FPR32RegClass; |
2698 | break; |
2699 | case MVT::f64: |
2700 | Opc = AArch64::FCSELDrrr; |
2701 | RC = &AArch64::FPR64RegClass; |
2702 | break; |
2703 | } |
2704 | |
2705 | const SelectInst *SI = cast<SelectInst>(Val: I); |
2706 | const Value *Cond = SI->getCondition(); |
2707 | AArch64CC::CondCode CC = AArch64CC::NE; |
2708 | AArch64CC::CondCode = AArch64CC::AL; |
2709 | |
2710 | if (optimizeSelect(SI)) |
2711 | return true; |
2712 | |
2713 | // Try to pickup the flags, so we don't have to emit another compare. |
2714 | if (foldXALUIntrinsic(CC, I, Cond)) { |
2715 | // Fake request the condition to force emission of the XALU intrinsic. |
2716 | Register CondReg = getRegForValue(V: Cond); |
2717 | if (!CondReg) |
2718 | return false; |
2719 | } else if (isa<CmpInst>(Val: Cond) && cast<CmpInst>(Val: Cond)->hasOneUse() && |
2720 | isValueAvailable(V: Cond)) { |
2721 | const auto *Cmp = cast<CmpInst>(Val: Cond); |
2722 | // Try to optimize or fold the cmp. |
2723 | CmpInst::Predicate Predicate = optimizeCmpPredicate(CI: Cmp); |
2724 | const Value *FoldSelect = nullptr; |
2725 | switch (Predicate) { |
2726 | default: |
2727 | break; |
2728 | case CmpInst::FCMP_FALSE: |
2729 | FoldSelect = SI->getFalseValue(); |
2730 | break; |
2731 | case CmpInst::FCMP_TRUE: |
2732 | FoldSelect = SI->getTrueValue(); |
2733 | break; |
2734 | } |
2735 | |
2736 | if (FoldSelect) { |
2737 | Register SrcReg = getRegForValue(V: FoldSelect); |
2738 | if (!SrcReg) |
2739 | return false; |
2740 | |
2741 | updateValueMap(I, Reg: SrcReg); |
2742 | return true; |
2743 | } |
2744 | |
2745 | // Emit the cmp. |
2746 | if (!emitCmp(LHS: Cmp->getOperand(i_nocapture: 0), RHS: Cmp->getOperand(i_nocapture: 1), IsZExt: Cmp->isUnsigned())) |
2747 | return false; |
2748 | |
2749 | // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction. |
2750 | CC = getCompareCC(Pred: Predicate); |
2751 | switch (Predicate) { |
2752 | default: |
2753 | break; |
2754 | case CmpInst::FCMP_UEQ: |
2755 | ExtraCC = AArch64CC::EQ; |
2756 | CC = AArch64CC::VS; |
2757 | break; |
2758 | case CmpInst::FCMP_ONE: |
2759 | ExtraCC = AArch64CC::MI; |
2760 | CC = AArch64CC::GT; |
2761 | break; |
2762 | } |
2763 | assert((CC != AArch64CC::AL) && "Unexpected condition code." ); |
2764 | } else { |
2765 | Register CondReg = getRegForValue(V: Cond); |
2766 | if (!CondReg) |
2767 | return false; |
2768 | |
2769 | const MCInstrDesc &II = TII.get(Opcode: AArch64::ANDSWri); |
2770 | CondReg = constrainOperandRegClass(II, Op: CondReg, OpNum: 1); |
2771 | |
2772 | // Emit a TST instruction (ANDS wzr, reg, #imm). |
2773 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II, |
2774 | DestReg: AArch64::WZR) |
2775 | .addReg(RegNo: CondReg) |
2776 | .addImm(Val: AArch64_AM::encodeLogicalImmediate(imm: 1, regSize: 32)); |
2777 | } |
2778 | |
2779 | Register Src1Reg = getRegForValue(V: SI->getTrueValue()); |
2780 | Register Src2Reg = getRegForValue(V: SI->getFalseValue()); |
2781 | |
2782 | if (!Src1Reg || !Src2Reg) |
2783 | return false; |
2784 | |
2785 | if (ExtraCC != AArch64CC::AL) |
2786 | Src2Reg = fastEmitInst_rri(MachineInstOpcode: Opc, RC, Op0: Src1Reg, Op1: Src2Reg, Imm: ExtraCC); |
2787 | |
2788 | Register ResultReg = fastEmitInst_rri(MachineInstOpcode: Opc, RC, Op0: Src1Reg, Op1: Src2Reg, Imm: CC); |
2789 | updateValueMap(I, Reg: ResultReg); |
2790 | return true; |
2791 | } |
2792 | |
2793 | bool AArch64FastISel::selectFPExt(const Instruction *I) { |
2794 | Value *V = I->getOperand(i: 0); |
2795 | if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy()) |
2796 | return false; |
2797 | |
2798 | Register Op = getRegForValue(V); |
2799 | if (Op == 0) |
2800 | return false; |
2801 | |
2802 | Register ResultReg = createResultReg(RC: &AArch64::FPR64RegClass); |
2803 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AArch64::FCVTDSr), |
2804 | DestReg: ResultReg).addReg(RegNo: Op); |
2805 | updateValueMap(I, Reg: ResultReg); |
2806 | return true; |
2807 | } |
2808 | |
2809 | bool AArch64FastISel::selectFPTrunc(const Instruction *I) { |
2810 | Value *V = I->getOperand(i: 0); |
2811 | if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy()) |
2812 | return false; |
2813 | |
2814 | Register Op = getRegForValue(V); |
2815 | if (Op == 0) |
2816 | return false; |
2817 | |
2818 | Register ResultReg = createResultReg(RC: &AArch64::FPR32RegClass); |
2819 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AArch64::FCVTSDr), |
2820 | DestReg: ResultReg).addReg(RegNo: Op); |
2821 | updateValueMap(I, Reg: ResultReg); |
2822 | return true; |
2823 | } |
2824 | |
2825 | // FPToUI and FPToSI |
2826 | bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) { |
2827 | MVT DestVT; |
2828 | if (!isTypeLegal(Ty: I->getType(), VT&: DestVT) || DestVT.isVector()) |
2829 | return false; |
2830 | |
2831 | Register SrcReg = getRegForValue(V: I->getOperand(i: 0)); |
2832 | if (SrcReg == 0) |
2833 | return false; |
2834 | |
2835 | EVT SrcVT = TLI.getValueType(DL, Ty: I->getOperand(i: 0)->getType(), AllowUnknown: true); |
2836 | if (SrcVT == MVT::f128 || SrcVT == MVT::f16 || SrcVT == MVT::bf16) |
2837 | return false; |
2838 | |
2839 | unsigned Opc; |
2840 | if (SrcVT == MVT::f64) { |
2841 | if (Signed) |
2842 | Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr; |
2843 | else |
2844 | Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr; |
2845 | } else { |
2846 | if (Signed) |
2847 | Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr; |
2848 | else |
2849 | Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr; |
2850 | } |
2851 | Register ResultReg = createResultReg( |
2852 | RC: DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass); |
2853 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg: ResultReg) |
2854 | .addReg(RegNo: SrcReg); |
2855 | updateValueMap(I, Reg: ResultReg); |
2856 | return true; |
2857 | } |
2858 | |
2859 | bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) { |
2860 | MVT DestVT; |
2861 | if (!isTypeLegal(Ty: I->getType(), VT&: DestVT) || DestVT.isVector()) |
2862 | return false; |
2863 | // Let regular ISEL handle FP16 |
2864 | if (DestVT == MVT::f16 || DestVT == MVT::bf16) |
2865 | return false; |
2866 | |
2867 | assert((DestVT == MVT::f32 || DestVT == MVT::f64) && |
2868 | "Unexpected value type." ); |
2869 | |
2870 | Register SrcReg = getRegForValue(V: I->getOperand(i: 0)); |
2871 | if (!SrcReg) |
2872 | return false; |
2873 | |
2874 | EVT SrcVT = TLI.getValueType(DL, Ty: I->getOperand(i: 0)->getType(), AllowUnknown: true); |
2875 | |
2876 | // Handle sign-extension. |
2877 | if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) { |
2878 | SrcReg = |
2879 | emitIntExt(SrcVT: SrcVT.getSimpleVT(), SrcReg, DestVT: MVT::i32, /*isZExt*/ !Signed); |
2880 | if (!SrcReg) |
2881 | return false; |
2882 | } |
2883 | |
2884 | unsigned Opc; |
2885 | if (SrcVT == MVT::i64) { |
2886 | if (Signed) |
2887 | Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri; |
2888 | else |
2889 | Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri; |
2890 | } else { |
2891 | if (Signed) |
2892 | Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri; |
2893 | else |
2894 | Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri; |
2895 | } |
2896 | |
2897 | Register ResultReg = fastEmitInst_r(MachineInstOpcode: Opc, RC: TLI.getRegClassFor(VT: DestVT), Op0: SrcReg); |
2898 | updateValueMap(I, Reg: ResultReg); |
2899 | return true; |
2900 | } |
2901 | |
2902 | bool AArch64FastISel::fastLowerArguments() { |
2903 | if (!FuncInfo.CanLowerReturn) |
2904 | return false; |
2905 | |
2906 | const Function *F = FuncInfo.Fn; |
2907 | if (F->isVarArg()) |
2908 | return false; |
2909 | |
2910 | CallingConv::ID CC = F->getCallingConv(); |
2911 | if (CC != CallingConv::C && CC != CallingConv::Swift) |
2912 | return false; |
2913 | |
2914 | if (Subtarget->hasCustomCallingConv()) |
2915 | return false; |
2916 | |
2917 | // Only handle simple cases of up to 8 GPR and FPR each. |
2918 | unsigned GPRCnt = 0; |
2919 | unsigned FPRCnt = 0; |
2920 | for (auto const &Arg : F->args()) { |
2921 | if (Arg.hasAttribute(Kind: Attribute::ByVal) || |
2922 | Arg.hasAttribute(Kind: Attribute::InReg) || |
2923 | Arg.hasAttribute(Kind: Attribute::StructRet) || |
2924 | Arg.hasAttribute(Kind: Attribute::SwiftSelf) || |
2925 | Arg.hasAttribute(Kind: Attribute::SwiftAsync) || |
2926 | Arg.hasAttribute(Kind: Attribute::SwiftError) || |
2927 | Arg.hasAttribute(Kind: Attribute::Nest)) |
2928 | return false; |
2929 | |
2930 | Type *ArgTy = Arg.getType(); |
2931 | if (ArgTy->isStructTy() || ArgTy->isArrayTy()) |
2932 | return false; |
2933 | |
2934 | EVT ArgVT = TLI.getValueType(DL, Ty: ArgTy); |
2935 | if (!ArgVT.isSimple()) |
2936 | return false; |
2937 | |
2938 | MVT VT = ArgVT.getSimpleVT().SimpleTy; |
2939 | if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8()) |
2940 | return false; |
2941 | |
2942 | if (VT.isVector() && |
2943 | (!Subtarget->hasNEON() || !Subtarget->isLittleEndian())) |
2944 | return false; |
2945 | |
2946 | if (VT >= MVT::i1 && VT <= MVT::i64) |
2947 | ++GPRCnt; |
2948 | else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() || |
2949 | VT.is128BitVector()) |
2950 | ++FPRCnt; |
2951 | else |
2952 | return false; |
2953 | |
2954 | if (GPRCnt > 8 || FPRCnt > 8) |
2955 | return false; |
2956 | } |
2957 | |
2958 | static const MCPhysReg Registers[6][8] = { |
2959 | { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4, |
2960 | AArch64::W5, AArch64::W6, AArch64::W7 }, |
2961 | { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4, |
2962 | AArch64::X5, AArch64::X6, AArch64::X7 }, |
2963 | { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4, |
2964 | AArch64::H5, AArch64::H6, AArch64::H7 }, |
2965 | { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4, |
2966 | AArch64::S5, AArch64::S6, AArch64::S7 }, |
2967 | { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4, |
2968 | AArch64::D5, AArch64::D6, AArch64::D7 }, |
2969 | { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4, |
2970 | AArch64::Q5, AArch64::Q6, AArch64::Q7 } |
2971 | }; |
2972 | |
2973 | unsigned GPRIdx = 0; |
2974 | unsigned FPRIdx = 0; |
2975 | for (auto const &Arg : F->args()) { |
2976 | MVT VT = TLI.getSimpleValueType(DL, Ty: Arg.getType()); |
2977 | unsigned SrcReg; |
2978 | const TargetRegisterClass *RC; |
2979 | if (VT >= MVT::i1 && VT <= MVT::i32) { |
2980 | SrcReg = Registers[0][GPRIdx++]; |
2981 | RC = &AArch64::GPR32RegClass; |
2982 | VT = MVT::i32; |
2983 | } else if (VT == MVT::i64) { |
2984 | SrcReg = Registers[1][GPRIdx++]; |
2985 | RC = &AArch64::GPR64RegClass; |
2986 | } else if (VT == MVT::f16 || VT == MVT::bf16) { |
2987 | SrcReg = Registers[2][FPRIdx++]; |
2988 | RC = &AArch64::FPR16RegClass; |
2989 | } else if (VT == MVT::f32) { |
2990 | SrcReg = Registers[3][FPRIdx++]; |
2991 | RC = &AArch64::FPR32RegClass; |
2992 | } else if ((VT == MVT::f64) || VT.is64BitVector()) { |
2993 | SrcReg = Registers[4][FPRIdx++]; |
2994 | RC = &AArch64::FPR64RegClass; |
2995 | } else if (VT.is128BitVector()) { |
2996 | SrcReg = Registers[5][FPRIdx++]; |
2997 | RC = &AArch64::FPR128RegClass; |
2998 | } else |
2999 | llvm_unreachable("Unexpected value type." ); |
3000 | |
3001 | Register DstReg = FuncInfo.MF->addLiveIn(PReg: SrcReg, RC); |
3002 | // FIXME: Unfortunately it's necessary to emit a copy from the livein copy. |
3003 | // Without this, EmitLiveInCopies may eliminate the livein if its only |
3004 | // use is a bitcast (which isn't turned into an instruction). |
3005 | Register ResultReg = createResultReg(RC); |
3006 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, |
3007 | MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: ResultReg) |
3008 | .addReg(RegNo: DstReg, flags: getKillRegState(B: true)); |
3009 | updateValueMap(I: &Arg, Reg: ResultReg); |
3010 | } |
3011 | return true; |
3012 | } |
3013 | |
3014 | bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI, |
3015 | SmallVectorImpl<MVT> &OutVTs, |
3016 | unsigned &NumBytes) { |
3017 | CallingConv::ID CC = CLI.CallConv; |
3018 | SmallVector<CCValAssign, 16> ArgLocs; |
3019 | CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context); |
3020 | CCInfo.AnalyzeCallOperands(ArgVTs&: OutVTs, Flags&: CLI.OutFlags, Fn: CCAssignFnForCall(CC)); |
3021 | |
3022 | // Get a count of how many bytes are to be pushed on the stack. |
3023 | NumBytes = CCInfo.getStackSize(); |
3024 | |
3025 | // Issue CALLSEQ_START |
3026 | unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); |
3027 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AdjStackDown)) |
3028 | .addImm(Val: NumBytes).addImm(Val: 0); |
3029 | |
3030 | // Process the args. |
3031 | for (CCValAssign &VA : ArgLocs) { |
3032 | const Value *ArgVal = CLI.OutVals[VA.getValNo()]; |
3033 | MVT ArgVT = OutVTs[VA.getValNo()]; |
3034 | |
3035 | Register ArgReg = getRegForValue(V: ArgVal); |
3036 | if (!ArgReg) |
3037 | return false; |
3038 | |
3039 | // Handle arg promotion: SExt, ZExt, AExt. |
3040 | switch (VA.getLocInfo()) { |
3041 | case CCValAssign::Full: |
3042 | break; |
3043 | case CCValAssign::SExt: { |
3044 | MVT DestVT = VA.getLocVT(); |
3045 | MVT SrcVT = ArgVT; |
3046 | ArgReg = emitIntExt(SrcVT, SrcReg: ArgReg, DestVT, /*isZExt=*/false); |
3047 | if (!ArgReg) |
3048 | return false; |
3049 | break; |
3050 | } |
3051 | case CCValAssign::AExt: |
3052 | // Intentional fall-through. |
3053 | case CCValAssign::ZExt: { |
3054 | MVT DestVT = VA.getLocVT(); |
3055 | MVT SrcVT = ArgVT; |
3056 | ArgReg = emitIntExt(SrcVT, SrcReg: ArgReg, DestVT, /*isZExt=*/true); |
3057 | if (!ArgReg) |
3058 | return false; |
3059 | break; |
3060 | } |
3061 | default: |
3062 | llvm_unreachable("Unknown arg promotion!" ); |
3063 | } |
3064 | |
3065 | // Now copy/store arg to correct locations. |
3066 | if (VA.isRegLoc() && !VA.needsCustom()) { |
3067 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, |
3068 | MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: VA.getLocReg()).addReg(RegNo: ArgReg); |
3069 | CLI.OutRegs.push_back(Elt: VA.getLocReg()); |
3070 | } else if (VA.needsCustom()) { |
3071 | // FIXME: Handle custom args. |
3072 | return false; |
3073 | } else { |
3074 | assert(VA.isMemLoc() && "Assuming store on stack." ); |
3075 | |
3076 | // Don't emit stores for undef values. |
3077 | if (isa<UndefValue>(Val: ArgVal)) |
3078 | continue; |
3079 | |
3080 | // Need to store on the stack. |
3081 | unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8; |
3082 | |
3083 | unsigned BEAlign = 0; |
3084 | if (ArgSize < 8 && !Subtarget->isLittleEndian()) |
3085 | BEAlign = 8 - ArgSize; |
3086 | |
3087 | Address Addr; |
3088 | Addr.setKind(Address::RegBase); |
3089 | Addr.setReg(AArch64::SP); |
3090 | Addr.setOffset(VA.getLocMemOffset() + BEAlign); |
3091 | |
3092 | Align Alignment = DL.getABITypeAlign(Ty: ArgVal->getType()); |
3093 | MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand( |
3094 | PtrInfo: MachinePointerInfo::getStack(MF&: *FuncInfo.MF, Offset: Addr.getOffset()), |
3095 | F: MachineMemOperand::MOStore, Size: ArgVT.getStoreSize(), BaseAlignment: Alignment); |
3096 | |
3097 | if (!emitStore(VT: ArgVT, SrcReg: ArgReg, Addr, MMO)) |
3098 | return false; |
3099 | } |
3100 | } |
3101 | return true; |
3102 | } |
3103 | |
3104 | bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, unsigned NumBytes) { |
3105 | CallingConv::ID CC = CLI.CallConv; |
3106 | |
3107 | // Issue CALLSEQ_END |
3108 | unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); |
3109 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AdjStackUp)) |
3110 | .addImm(Val: NumBytes).addImm(Val: 0); |
3111 | |
3112 | // Now the return values. |
3113 | SmallVector<CCValAssign, 16> RVLocs; |
3114 | CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context); |
3115 | CCInfo.AnalyzeCallResult(Ins: CLI.Ins, Fn: CCAssignFnForCall(CC)); |
3116 | |
3117 | Register ResultReg = FuncInfo.CreateRegs(Ty: CLI.RetTy); |
3118 | for (unsigned i = 0; i != RVLocs.size(); ++i) { |
3119 | CCValAssign &VA = RVLocs[i]; |
3120 | MVT CopyVT = VA.getValVT(); |
3121 | unsigned CopyReg = ResultReg + i; |
3122 | |
3123 | // TODO: Handle big-endian results |
3124 | if (CopyVT.isVector() && !Subtarget->isLittleEndian()) |
3125 | return false; |
3126 | |
3127 | // Copy result out of their specified physreg. |
3128 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: TargetOpcode::COPY), |
3129 | DestReg: CopyReg) |
3130 | .addReg(RegNo: VA.getLocReg()); |
3131 | CLI.InRegs.push_back(Elt: VA.getLocReg()); |
3132 | } |
3133 | |
3134 | CLI.ResultReg = ResultReg; |
3135 | CLI.NumResultRegs = RVLocs.size(); |
3136 | |
3137 | return true; |
3138 | } |
3139 | |
3140 | bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) { |
3141 | CallingConv::ID CC = CLI.CallConv; |
3142 | bool IsTailCall = CLI.IsTailCall; |
3143 | bool IsVarArg = CLI.IsVarArg; |
3144 | const Value *Callee = CLI.Callee; |
3145 | MCSymbol *Symbol = CLI.Symbol; |
3146 | |
3147 | if (!Callee && !Symbol) |
3148 | return false; |
3149 | |
3150 | // Allow SelectionDAG isel to handle calls to functions like setjmp that need |
3151 | // a bti instruction following the call. |
3152 | if (CLI.CB && CLI.CB->hasFnAttr(Kind: Attribute::ReturnsTwice) && |
3153 | !Subtarget->noBTIAtReturnTwice() && |
3154 | MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement()) |
3155 | return false; |
3156 | |
3157 | // Allow SelectionDAG isel to handle indirect calls with KCFI checks. |
3158 | if (CLI.CB && CLI.CB->isIndirectCall() && |
3159 | CLI.CB->getOperandBundle(ID: LLVMContext::OB_kcfi)) |
3160 | return false; |
3161 | |
3162 | // Allow SelectionDAG isel to handle tail calls. |
3163 | if (IsTailCall) |
3164 | return false; |
3165 | |
3166 | // FIXME: we could and should support this, but for now correctness at -O0 is |
3167 | // more important. |
3168 | if (Subtarget->isTargetILP32()) |
3169 | return false; |
3170 | |
3171 | CodeModel::Model CM = TM.getCodeModel(); |
3172 | // Only support the small-addressing and large code models. |
3173 | if (CM != CodeModel::Large && !Subtarget->useSmallAddressing()) |
3174 | return false; |
3175 | |
3176 | // FIXME: Add large code model support for ELF. |
3177 | if (CM == CodeModel::Large && !Subtarget->isTargetMachO()) |
3178 | return false; |
3179 | |
3180 | // ELF -fno-plt compiled intrinsic calls do not have the nonlazybind |
3181 | // attribute. Check "RtLibUseGOT" instead. |
3182 | if (MF->getFunction().getParent()->getRtLibUseGOT()) |
3183 | return false; |
3184 | |
3185 | // Let SDISel handle vararg functions. |
3186 | if (IsVarArg) |
3187 | return false; |
3188 | |
3189 | if (Subtarget->isWindowsArm64EC()) |
3190 | return false; |
3191 | |
3192 | for (auto Flag : CLI.OutFlags) |
3193 | if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() || |
3194 | Flag.isSwiftSelf() || Flag.isSwiftAsync() || Flag.isSwiftError()) |
3195 | return false; |
3196 | |
3197 | // Set up the argument vectors. |
3198 | SmallVector<MVT, 16> OutVTs; |
3199 | OutVTs.reserve(N: CLI.OutVals.size()); |
3200 | |
3201 | for (auto *Val : CLI.OutVals) { |
3202 | MVT VT; |
3203 | if (!isTypeLegal(Ty: Val->getType(), VT) && |
3204 | !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)) |
3205 | return false; |
3206 | |
3207 | // We don't handle vector parameters yet. |
3208 | if (VT.isVector() || VT.getSizeInBits() > 64) |
3209 | return false; |
3210 | |
3211 | OutVTs.push_back(Elt: VT); |
3212 | } |
3213 | |
3214 | Address Addr; |
3215 | if (Callee && !computeCallAddress(V: Callee, Addr)) |
3216 | return false; |
3217 | |
3218 | // The weak function target may be zero; in that case we must use indirect |
3219 | // addressing via a stub on windows as it may be out of range for a |
3220 | // PC-relative jump. |
3221 | if (Subtarget->isTargetWindows() && Addr.getGlobalValue() && |
3222 | Addr.getGlobalValue()->hasExternalWeakLinkage()) |
3223 | return false; |
3224 | |
3225 | // Handle the arguments now that we've gotten them. |
3226 | unsigned NumBytes; |
3227 | if (!processCallArgs(CLI, OutVTs, NumBytes)) |
3228 | return false; |
3229 | |
3230 | const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); |
3231 | if (RegInfo->isAnyArgRegReserved(MF: *MF)) |
3232 | RegInfo->emitReservedArgRegCallError(MF: *MF); |
3233 | |
3234 | // Issue the call. |
3235 | MachineInstrBuilder MIB; |
3236 | if (Subtarget->useSmallAddressing()) { |
3237 | const MCInstrDesc &II = |
3238 | TII.get(Opcode: Addr.getReg() ? getBLRCallOpcode(MF: *MF) : (unsigned)AArch64::BL); |
3239 | MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II); |
3240 | if (Symbol) |
3241 | MIB.addSym(Sym: Symbol, TargetFlags: 0); |
3242 | else if (Addr.getGlobalValue()) |
3243 | MIB.addGlobalAddress(GV: Addr.getGlobalValue(), Offset: 0, TargetFlags: 0); |
3244 | else if (Addr.getReg()) { |
3245 | Register Reg = constrainOperandRegClass(II, Op: Addr.getReg(), OpNum: 0); |
3246 | MIB.addReg(RegNo: Reg); |
3247 | } else |
3248 | return false; |
3249 | } else { |
3250 | unsigned CallReg = 0; |
3251 | if (Symbol) { |
3252 | Register ADRPReg = createResultReg(RC: &AArch64::GPR64commonRegClass); |
3253 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AArch64::ADRP), |
3254 | DestReg: ADRPReg) |
3255 | .addSym(Sym: Symbol, TargetFlags: AArch64II::MO_GOT | AArch64II::MO_PAGE); |
3256 | |
3257 | CallReg = createResultReg(RC: &AArch64::GPR64RegClass); |
3258 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, |
3259 | MCID: TII.get(Opcode: AArch64::LDRXui), DestReg: CallReg) |
3260 | .addReg(RegNo: ADRPReg) |
3261 | .addSym(Sym: Symbol, |
3262 | TargetFlags: AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); |
3263 | } else if (Addr.getGlobalValue()) |
3264 | CallReg = materializeGV(GV: Addr.getGlobalValue()); |
3265 | else if (Addr.getReg()) |
3266 | CallReg = Addr.getReg(); |
3267 | |
3268 | if (!CallReg) |
3269 | return false; |
3270 | |
3271 | const MCInstrDesc &II = TII.get(Opcode: getBLRCallOpcode(MF: *MF)); |
3272 | CallReg = constrainOperandRegClass(II, Op: CallReg, OpNum: 0); |
3273 | MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II).addReg(RegNo: CallReg); |
3274 | } |
3275 | |
3276 | // Add implicit physical register uses to the call. |
3277 | for (auto Reg : CLI.OutRegs) |
3278 | MIB.addReg(RegNo: Reg, flags: RegState::Implicit); |
3279 | |
3280 | // Add a register mask with the call-preserved registers. |
3281 | // Proper defs for return values will be added by setPhysRegsDeadExcept(). |
3282 | MIB.addRegMask(Mask: TRI.getCallPreservedMask(MF: *FuncInfo.MF, CC)); |
3283 | |
3284 | CLI.Call = MIB; |
3285 | |
3286 | // Finish off the call including any return values. |
3287 | return finishCall(CLI, NumBytes); |
3288 | } |
3289 | |
3290 | bool AArch64FastISel::isMemCpySmall(uint64_t Len, MaybeAlign Alignment) { |
3291 | if (Alignment) |
3292 | return Len / Alignment->value() <= 4; |
3293 | else |
3294 | return Len < 32; |
3295 | } |
3296 | |
3297 | bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src, |
3298 | uint64_t Len, MaybeAlign Alignment) { |
3299 | // Make sure we don't bloat code by inlining very large memcpy's. |
3300 | if (!isMemCpySmall(Len, Alignment)) |
3301 | return false; |
3302 | |
3303 | int64_t UnscaledOffset = 0; |
3304 | Address OrigDest = Dest; |
3305 | Address OrigSrc = Src; |
3306 | |
3307 | while (Len) { |
3308 | MVT VT; |
3309 | if (!Alignment || *Alignment >= 8) { |
3310 | if (Len >= 8) |
3311 | VT = MVT::i64; |
3312 | else if (Len >= 4) |
3313 | VT = MVT::i32; |
3314 | else if (Len >= 2) |
3315 | VT = MVT::i16; |
3316 | else { |
3317 | VT = MVT::i8; |
3318 | } |
3319 | } else { |
3320 | assert(Alignment && "Alignment is set in this branch" ); |
3321 | // Bound based on alignment. |
3322 | if (Len >= 4 && *Alignment == 4) |
3323 | VT = MVT::i32; |
3324 | else if (Len >= 2 && *Alignment == 2) |
3325 | VT = MVT::i16; |
3326 | else { |
3327 | VT = MVT::i8; |
3328 | } |
3329 | } |
3330 | |
3331 | unsigned ResultReg = emitLoad(VT, RetVT: VT, Addr: Src); |
3332 | if (!ResultReg) |
3333 | return false; |
3334 | |
3335 | if (!emitStore(VT, SrcReg: ResultReg, Addr: Dest)) |
3336 | return false; |
3337 | |
3338 | int64_t Size = VT.getSizeInBits() / 8; |
3339 | Len -= Size; |
3340 | UnscaledOffset += Size; |
3341 | |
3342 | // We need to recompute the unscaled offset for each iteration. |
3343 | Dest.setOffset(OrigDest.getOffset() + UnscaledOffset); |
3344 | Src.setOffset(OrigSrc.getOffset() + UnscaledOffset); |
3345 | } |
3346 | |
3347 | return true; |
3348 | } |
3349 | |
3350 | /// Check if it is possible to fold the condition from the XALU intrinsic |
3351 | /// into the user. The condition code will only be updated on success. |
3352 | bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC, |
3353 | const Instruction *I, |
3354 | const Value *Cond) { |
3355 | if (!isa<ExtractValueInst>(Val: Cond)) |
3356 | return false; |
3357 | |
3358 | const auto *EV = cast<ExtractValueInst>(Val: Cond); |
3359 | if (!isa<IntrinsicInst>(Val: EV->getAggregateOperand())) |
3360 | return false; |
3361 | |
3362 | const auto *II = cast<IntrinsicInst>(Val: EV->getAggregateOperand()); |
3363 | MVT RetVT; |
3364 | const Function *Callee = II->getCalledFunction(); |
3365 | Type *RetTy = |
3366 | cast<StructType>(Val: Callee->getReturnType())->getTypeAtIndex(N: 0U); |
3367 | if (!isTypeLegal(Ty: RetTy, VT&: RetVT)) |
3368 | return false; |
3369 | |
3370 | if (RetVT != MVT::i32 && RetVT != MVT::i64) |
3371 | return false; |
3372 | |
3373 | const Value *LHS = II->getArgOperand(i: 0); |
3374 | const Value *RHS = II->getArgOperand(i: 1); |
3375 | |
3376 | // Canonicalize immediate to the RHS. |
3377 | if (isa<ConstantInt>(Val: LHS) && !isa<ConstantInt>(Val: RHS) && II->isCommutative()) |
3378 | std::swap(a&: LHS, b&: RHS); |
3379 | |
3380 | // Simplify multiplies. |
3381 | Intrinsic::ID IID = II->getIntrinsicID(); |
3382 | switch (IID) { |
3383 | default: |
3384 | break; |
3385 | case Intrinsic::smul_with_overflow: |
3386 | if (const auto *C = dyn_cast<ConstantInt>(Val: RHS)) |
3387 | if (C->getValue() == 2) |
3388 | IID = Intrinsic::sadd_with_overflow; |
3389 | break; |
3390 | case Intrinsic::umul_with_overflow: |
3391 | if (const auto *C = dyn_cast<ConstantInt>(Val: RHS)) |
3392 | if (C->getValue() == 2) |
3393 | IID = Intrinsic::uadd_with_overflow; |
3394 | break; |
3395 | } |
3396 | |
3397 | AArch64CC::CondCode TmpCC; |
3398 | switch (IID) { |
3399 | default: |
3400 | return false; |
3401 | case Intrinsic::sadd_with_overflow: |
3402 | case Intrinsic::ssub_with_overflow: |
3403 | TmpCC = AArch64CC::VS; |
3404 | break; |
3405 | case Intrinsic::uadd_with_overflow: |
3406 | TmpCC = AArch64CC::HS; |
3407 | break; |
3408 | case Intrinsic::usub_with_overflow: |
3409 | TmpCC = AArch64CC::LO; |
3410 | break; |
3411 | case Intrinsic::smul_with_overflow: |
3412 | case Intrinsic::umul_with_overflow: |
3413 | TmpCC = AArch64CC::NE; |
3414 | break; |
3415 | } |
3416 | |
3417 | // Check if both instructions are in the same basic block. |
3418 | if (!isValueAvailable(V: II)) |
3419 | return false; |
3420 | |
3421 | // Make sure nothing is in the way |
3422 | BasicBlock::const_iterator Start(I); |
3423 | BasicBlock::const_iterator End(II); |
3424 | for (auto Itr = std::prev(x: Start); Itr != End; --Itr) { |
3425 | // We only expect extractvalue instructions between the intrinsic and the |
3426 | // instruction to be selected. |
3427 | if (!isa<ExtractValueInst>(Val: Itr)) |
3428 | return false; |
3429 | |
3430 | // Check that the extractvalue operand comes from the intrinsic. |
3431 | const auto *EVI = cast<ExtractValueInst>(Val&: Itr); |
3432 | if (EVI->getAggregateOperand() != II) |
3433 | return false; |
3434 | } |
3435 | |
3436 | CC = TmpCC; |
3437 | return true; |
3438 | } |
3439 | |
3440 | bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { |
3441 | // FIXME: Handle more intrinsics. |
3442 | switch (II->getIntrinsicID()) { |
3443 | default: return false; |
3444 | case Intrinsic::frameaddress: { |
3445 | MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo(); |
3446 | MFI.setFrameAddressIsTaken(true); |
3447 | |
3448 | const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); |
3449 | Register FramePtr = RegInfo->getFrameRegister(MF: *(FuncInfo.MF)); |
3450 | Register SrcReg = MRI.createVirtualRegister(RegClass: &AArch64::GPR64RegClass); |
3451 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, |
3452 | MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: SrcReg).addReg(RegNo: FramePtr); |
3453 | // Recursively load frame address |
3454 | // ldr x0, [fp] |
3455 | // ldr x0, [x0] |
3456 | // ldr x0, [x0] |
3457 | // ... |
3458 | unsigned DestReg; |
3459 | unsigned Depth = cast<ConstantInt>(Val: II->getOperand(i_nocapture: 0))->getZExtValue(); |
3460 | while (Depth--) { |
3461 | DestReg = fastEmitInst_ri(MachineInstOpcode: AArch64::LDRXui, RC: &AArch64::GPR64RegClass, |
3462 | Op0: SrcReg, Imm: 0); |
3463 | assert(DestReg && "Unexpected LDR instruction emission failure." ); |
3464 | SrcReg = DestReg; |
3465 | } |
3466 | |
3467 | updateValueMap(I: II, Reg: SrcReg); |
3468 | return true; |
3469 | } |
3470 | case Intrinsic::sponentry: { |
3471 | MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo(); |
3472 | |
3473 | // SP = FP + Fixed Object + 16 |
3474 | int FI = MFI.CreateFixedObject(Size: 4, SPOffset: 0, IsImmutable: false); |
3475 | Register ResultReg = createResultReg(RC: &AArch64::GPR64spRegClass); |
3476 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, |
3477 | MCID: TII.get(Opcode: AArch64::ADDXri), DestReg: ResultReg) |
3478 | .addFrameIndex(Idx: FI) |
3479 | .addImm(Val: 0) |
3480 | .addImm(Val: 0); |
3481 | |
3482 | updateValueMap(I: II, Reg: ResultReg); |
3483 | return true; |
3484 | } |
3485 | case Intrinsic::memcpy: |
3486 | case Intrinsic::memmove: { |
3487 | const auto *MTI = cast<MemTransferInst>(Val: II); |
3488 | // Don't handle volatile. |
3489 | if (MTI->isVolatile()) |
3490 | return false; |
3491 | |
3492 | // Disable inlining for memmove before calls to ComputeAddress. Otherwise, |
3493 | // we would emit dead code because we don't currently handle memmoves. |
3494 | bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy); |
3495 | if (isa<ConstantInt>(Val: MTI->getLength()) && IsMemCpy) { |
3496 | // Small memcpy's are common enough that we want to do them without a call |
3497 | // if possible. |
3498 | uint64_t Len = cast<ConstantInt>(Val: MTI->getLength())->getZExtValue(); |
3499 | MaybeAlign Alignment; |
3500 | if (MTI->getDestAlign() || MTI->getSourceAlign()) |
3501 | Alignment = std::min(a: MTI->getDestAlign().valueOrOne(), |
3502 | b: MTI->getSourceAlign().valueOrOne()); |
3503 | if (isMemCpySmall(Len, Alignment)) { |
3504 | Address Dest, Src; |
3505 | if (!computeAddress(Obj: MTI->getRawDest(), Addr&: Dest) || |
3506 | !computeAddress(Obj: MTI->getRawSource(), Addr&: Src)) |
3507 | return false; |
3508 | if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment)) |
3509 | return true; |
3510 | } |
3511 | } |
3512 | |
3513 | if (!MTI->getLength()->getType()->isIntegerTy(Bitwidth: 64)) |
3514 | return false; |
3515 | |
3516 | if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255) |
3517 | // Fast instruction selection doesn't support the special |
3518 | // address spaces. |
3519 | return false; |
3520 | |
3521 | const char *IntrMemName = isa<MemCpyInst>(Val: II) ? "memcpy" : "memmove" ; |
3522 | return lowerCallTo(CI: II, SymName: IntrMemName, NumArgs: II->arg_size() - 1); |
3523 | } |
3524 | case Intrinsic::memset: { |
3525 | const MemSetInst *MSI = cast<MemSetInst>(Val: II); |
3526 | // Don't handle volatile. |
3527 | if (MSI->isVolatile()) |
3528 | return false; |
3529 | |
3530 | if (!MSI->getLength()->getType()->isIntegerTy(Bitwidth: 64)) |
3531 | return false; |
3532 | |
3533 | if (MSI->getDestAddressSpace() > 255) |
3534 | // Fast instruction selection doesn't support the special |
3535 | // address spaces. |
3536 | return false; |
3537 | |
3538 | return lowerCallTo(CI: II, SymName: "memset" , NumArgs: II->arg_size() - 1); |
3539 | } |
3540 | case Intrinsic::sin: |
3541 | case Intrinsic::cos: |
3542 | case Intrinsic::tan: |
3543 | case Intrinsic::pow: { |
3544 | MVT RetVT; |
3545 | if (!isTypeLegal(Ty: II->getType(), VT&: RetVT)) |
3546 | return false; |
3547 | |
3548 | if (RetVT != MVT::f32 && RetVT != MVT::f64) |
3549 | return false; |
3550 | |
3551 | static const RTLIB::Libcall LibCallTable[4][2] = { |
3552 | {RTLIB::SIN_F32, RTLIB::SIN_F64}, |
3553 | {RTLIB::COS_F32, RTLIB::COS_F64}, |
3554 | {RTLIB::TAN_F32, RTLIB::TAN_F64}, |
3555 | {RTLIB::POW_F32, RTLIB::POW_F64}}; |
3556 | RTLIB::Libcall LC; |
3557 | bool Is64Bit = RetVT == MVT::f64; |
3558 | switch (II->getIntrinsicID()) { |
3559 | default: |
3560 | llvm_unreachable("Unexpected intrinsic." ); |
3561 | case Intrinsic::sin: |
3562 | LC = LibCallTable[0][Is64Bit]; |
3563 | break; |
3564 | case Intrinsic::cos: |
3565 | LC = LibCallTable[1][Is64Bit]; |
3566 | break; |
3567 | case Intrinsic::tan: |
3568 | LC = LibCallTable[2][Is64Bit]; |
3569 | break; |
3570 | case Intrinsic::pow: |
3571 | LC = LibCallTable[3][Is64Bit]; |
3572 | break; |
3573 | } |
3574 | |
3575 | ArgListTy Args; |
3576 | Args.reserve(n: II->arg_size()); |
3577 | |
3578 | // Populate the argument list. |
3579 | for (auto &Arg : II->args()) { |
3580 | ArgListEntry Entry; |
3581 | Entry.Val = Arg; |
3582 | Entry.Ty = Arg->getType(); |
3583 | Args.push_back(x: Entry); |
3584 | } |
3585 | |
3586 | CallLoweringInfo CLI; |
3587 | MCContext &Ctx = MF->getContext(); |
3588 | CLI.setCallee(DL, Ctx, CC: TLI.getLibcallCallingConv(Call: LC), ResultTy: II->getType(), |
3589 | Target: TLI.getLibcallName(Call: LC), ArgsList: std::move(Args)); |
3590 | if (!lowerCallTo(CLI)) |
3591 | return false; |
3592 | updateValueMap(I: II, Reg: CLI.ResultReg); |
3593 | return true; |
3594 | } |
3595 | case Intrinsic::fabs: { |
3596 | MVT VT; |
3597 | if (!isTypeLegal(Ty: II->getType(), VT)) |
3598 | return false; |
3599 | |
3600 | unsigned Opc; |
3601 | switch (VT.SimpleTy) { |
3602 | default: |
3603 | return false; |
3604 | case MVT::f32: |
3605 | Opc = AArch64::FABSSr; |
3606 | break; |
3607 | case MVT::f64: |
3608 | Opc = AArch64::FABSDr; |
3609 | break; |
3610 | } |
3611 | Register SrcReg = getRegForValue(V: II->getOperand(i_nocapture: 0)); |
3612 | if (!SrcReg) |
3613 | return false; |
3614 | Register ResultReg = createResultReg(RC: TLI.getRegClassFor(VT)); |
3615 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg: ResultReg) |
3616 | .addReg(RegNo: SrcReg); |
3617 | updateValueMap(I: II, Reg: ResultReg); |
3618 | return true; |
3619 | } |
3620 | case Intrinsic::trap: |
3621 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AArch64::BRK)) |
3622 | .addImm(Val: 1); |
3623 | return true; |
3624 | case Intrinsic::debugtrap: |
3625 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AArch64::BRK)) |
3626 | .addImm(Val: 0xF000); |
3627 | return true; |
3628 | |
3629 | case Intrinsic::sqrt: { |
3630 | Type *RetTy = II->getCalledFunction()->getReturnType(); |
3631 | |
3632 | MVT VT; |
3633 | if (!isTypeLegal(Ty: RetTy, VT)) |
3634 | return false; |
3635 | |
3636 | Register Op0Reg = getRegForValue(V: II->getOperand(i_nocapture: 0)); |
3637 | if (!Op0Reg) |
3638 | return false; |
3639 | |
3640 | unsigned ResultReg = fastEmit_r(VT, RetVT: VT, Opcode: ISD::FSQRT, Op0: Op0Reg); |
3641 | if (!ResultReg) |
3642 | return false; |
3643 | |
3644 | updateValueMap(I: II, Reg: ResultReg); |
3645 | return true; |
3646 | } |
3647 | case Intrinsic::sadd_with_overflow: |
3648 | case Intrinsic::uadd_with_overflow: |
3649 | case Intrinsic::ssub_with_overflow: |
3650 | case Intrinsic::usub_with_overflow: |
3651 | case Intrinsic::smul_with_overflow: |
3652 | case Intrinsic::umul_with_overflow: { |
3653 | // This implements the basic lowering of the xalu with overflow intrinsics. |
3654 | const Function *Callee = II->getCalledFunction(); |
3655 | auto *Ty = cast<StructType>(Val: Callee->getReturnType()); |
3656 | Type *RetTy = Ty->getTypeAtIndex(N: 0U); |
3657 | |
3658 | MVT VT; |
3659 | if (!isTypeLegal(Ty: RetTy, VT)) |
3660 | return false; |
3661 | |
3662 | if (VT != MVT::i32 && VT != MVT::i64) |
3663 | return false; |
3664 | |
3665 | const Value *LHS = II->getArgOperand(i: 0); |
3666 | const Value *RHS = II->getArgOperand(i: 1); |
3667 | // Canonicalize immediate to the RHS. |
3668 | if (isa<ConstantInt>(Val: LHS) && !isa<ConstantInt>(Val: RHS) && II->isCommutative()) |
3669 | std::swap(a&: LHS, b&: RHS); |
3670 | |
3671 | // Simplify multiplies. |
3672 | Intrinsic::ID IID = II->getIntrinsicID(); |
3673 | switch (IID) { |
3674 | default: |
3675 | break; |
3676 | case Intrinsic::smul_with_overflow: |
3677 | if (const auto *C = dyn_cast<ConstantInt>(Val: RHS)) |
3678 | if (C->getValue() == 2) { |
3679 | IID = Intrinsic::sadd_with_overflow; |
3680 | RHS = LHS; |
3681 | } |
3682 | break; |
3683 | case Intrinsic::umul_with_overflow: |
3684 | if (const auto *C = dyn_cast<ConstantInt>(Val: RHS)) |
3685 | if (C->getValue() == 2) { |
3686 | IID = Intrinsic::uadd_with_overflow; |
3687 | RHS = LHS; |
3688 | } |
3689 | break; |
3690 | } |
3691 | |
3692 | unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0; |
3693 | AArch64CC::CondCode CC = AArch64CC::Invalid; |
3694 | switch (IID) { |
3695 | default: llvm_unreachable("Unexpected intrinsic!" ); |
3696 | case Intrinsic::sadd_with_overflow: |
3697 | ResultReg1 = emitAdd(RetVT: VT, LHS, RHS, /*SetFlags=*/true); |
3698 | CC = AArch64CC::VS; |
3699 | break; |
3700 | case Intrinsic::uadd_with_overflow: |
3701 | ResultReg1 = emitAdd(RetVT: VT, LHS, RHS, /*SetFlags=*/true); |
3702 | CC = AArch64CC::HS; |
3703 | break; |
3704 | case Intrinsic::ssub_with_overflow: |
3705 | ResultReg1 = emitSub(RetVT: VT, LHS, RHS, /*SetFlags=*/true); |
3706 | CC = AArch64CC::VS; |
3707 | break; |
3708 | case Intrinsic::usub_with_overflow: |
3709 | ResultReg1 = emitSub(RetVT: VT, LHS, RHS, /*SetFlags=*/true); |
3710 | CC = AArch64CC::LO; |
3711 | break; |
3712 | case Intrinsic::smul_with_overflow: { |
3713 | CC = AArch64CC::NE; |
3714 | Register LHSReg = getRegForValue(V: LHS); |
3715 | if (!LHSReg) |
3716 | return false; |
3717 | |
3718 | Register RHSReg = getRegForValue(V: RHS); |
3719 | if (!RHSReg) |
3720 | return false; |
3721 | |
3722 | if (VT == MVT::i32) { |
3723 | MulReg = emitSMULL_rr(RetVT: MVT::i64, Op0: LHSReg, Op1: RHSReg); |
3724 | Register MulSubReg = |
3725 | fastEmitInst_extractsubreg(RetVT: VT, Op0: MulReg, Idx: AArch64::sub_32); |
3726 | // cmp xreg, wreg, sxtw |
3727 | emitAddSub_rx(/*UseAdd=*/false, RetVT: MVT::i64, LHSReg: MulReg, RHSReg: MulSubReg, |
3728 | ExtType: AArch64_AM::SXTW, /*ShiftImm=*/0, /*SetFlags=*/true, |
3729 | /*WantResult=*/false); |
3730 | MulReg = MulSubReg; |
3731 | } else { |
3732 | assert(VT == MVT::i64 && "Unexpected value type." ); |
3733 | // LHSReg and RHSReg cannot be killed by this Mul, since they are |
3734 | // reused in the next instruction. |
3735 | MulReg = emitMul_rr(RetVT: VT, Op0: LHSReg, Op1: RHSReg); |
3736 | unsigned SMULHReg = fastEmit_rr(VT, RetVT: VT, Opcode: ISD::MULHS, Op0: LHSReg, Op1: RHSReg); |
3737 | emitSubs_rs(RetVT: VT, LHSReg: SMULHReg, RHSReg: MulReg, ShiftType: AArch64_AM::ASR, ShiftImm: 63, |
3738 | /*WantResult=*/false); |
3739 | } |
3740 | break; |
3741 | } |
3742 | case Intrinsic::umul_with_overflow: { |
3743 | CC = AArch64CC::NE; |
3744 | Register LHSReg = getRegForValue(V: LHS); |
3745 | if (!LHSReg) |
3746 | return false; |
3747 | |
3748 | Register RHSReg = getRegForValue(V: RHS); |
3749 | if (!RHSReg) |
3750 | return false; |
3751 | |
3752 | if (VT == MVT::i32) { |
3753 | MulReg = emitUMULL_rr(RetVT: MVT::i64, Op0: LHSReg, Op1: RHSReg); |
3754 | // tst xreg, #0xffffffff00000000 |
3755 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, |
3756 | MCID: TII.get(Opcode: AArch64::ANDSXri), DestReg: AArch64::XZR) |
3757 | .addReg(RegNo: MulReg) |
3758 | .addImm(Val: AArch64_AM::encodeLogicalImmediate(imm: 0xFFFFFFFF00000000, regSize: 64)); |
3759 | MulReg = fastEmitInst_extractsubreg(RetVT: VT, Op0: MulReg, Idx: AArch64::sub_32); |
3760 | } else { |
3761 | assert(VT == MVT::i64 && "Unexpected value type." ); |
3762 | // LHSReg and RHSReg cannot be killed by this Mul, since they are |
3763 | // reused in the next instruction. |
3764 | MulReg = emitMul_rr(RetVT: VT, Op0: LHSReg, Op1: RHSReg); |
3765 | unsigned UMULHReg = fastEmit_rr(VT, RetVT: VT, Opcode: ISD::MULHU, Op0: LHSReg, Op1: RHSReg); |
3766 | emitSubs_rr(RetVT: VT, LHSReg: AArch64::XZR, RHSReg: UMULHReg, /*WantResult=*/false); |
3767 | } |
3768 | break; |
3769 | } |
3770 | } |
3771 | |
3772 | if (MulReg) { |
3773 | ResultReg1 = createResultReg(RC: TLI.getRegClassFor(VT)); |
3774 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, |
3775 | MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: ResultReg1).addReg(RegNo: MulReg); |
3776 | } |
3777 | |
3778 | if (!ResultReg1) |
3779 | return false; |
3780 | |
3781 | ResultReg2 = fastEmitInst_rri(MachineInstOpcode: AArch64::CSINCWr, RC: &AArch64::GPR32RegClass, |
3782 | Op0: AArch64::WZR, Op1: AArch64::WZR, |
3783 | Imm: getInvertedCondCode(Code: CC)); |
3784 | (void)ResultReg2; |
3785 | assert((ResultReg1 + 1) == ResultReg2 && |
3786 | "Nonconsecutive result registers." ); |
3787 | updateValueMap(I: II, Reg: ResultReg1, NumRegs: 2); |
3788 | return true; |
3789 | } |
3790 | case Intrinsic::aarch64_crc32b: |
3791 | case Intrinsic::aarch64_crc32h: |
3792 | case Intrinsic::aarch64_crc32w: |
3793 | case Intrinsic::aarch64_crc32x: |
3794 | case Intrinsic::aarch64_crc32cb: |
3795 | case Intrinsic::aarch64_crc32ch: |
3796 | case Intrinsic::aarch64_crc32cw: |
3797 | case Intrinsic::aarch64_crc32cx: { |
3798 | if (!Subtarget->hasCRC()) |
3799 | return false; |
3800 | |
3801 | unsigned Opc; |
3802 | switch (II->getIntrinsicID()) { |
3803 | default: |
3804 | llvm_unreachable("Unexpected intrinsic!" ); |
3805 | case Intrinsic::aarch64_crc32b: |
3806 | Opc = AArch64::CRC32Brr; |
3807 | break; |
3808 | case Intrinsic::aarch64_crc32h: |
3809 | Opc = AArch64::CRC32Hrr; |
3810 | break; |
3811 | case Intrinsic::aarch64_crc32w: |
3812 | Opc = AArch64::CRC32Wrr; |
3813 | break; |
3814 | case Intrinsic::aarch64_crc32x: |
3815 | Opc = AArch64::CRC32Xrr; |
3816 | break; |
3817 | case Intrinsic::aarch64_crc32cb: |
3818 | Opc = AArch64::CRC32CBrr; |
3819 | break; |
3820 | case Intrinsic::aarch64_crc32ch: |
3821 | Opc = AArch64::CRC32CHrr; |
3822 | break; |
3823 | case Intrinsic::aarch64_crc32cw: |
3824 | Opc = AArch64::CRC32CWrr; |
3825 | break; |
3826 | case Intrinsic::aarch64_crc32cx: |
3827 | Opc = AArch64::CRC32CXrr; |
3828 | break; |
3829 | } |
3830 | |
3831 | Register LHSReg = getRegForValue(V: II->getArgOperand(i: 0)); |
3832 | Register RHSReg = getRegForValue(V: II->getArgOperand(i: 1)); |
3833 | if (!LHSReg || !RHSReg) |
3834 | return false; |
3835 | |
3836 | Register ResultReg = |
3837 | fastEmitInst_rr(MachineInstOpcode: Opc, RC: &AArch64::GPR32RegClass, Op0: LHSReg, Op1: RHSReg); |
3838 | updateValueMap(I: II, Reg: ResultReg); |
3839 | return true; |
3840 | } |
3841 | } |
3842 | return false; |
3843 | } |
3844 | |
3845 | bool AArch64FastISel::selectRet(const Instruction *I) { |
3846 | const ReturnInst *Ret = cast<ReturnInst>(Val: I); |
3847 | const Function &F = *I->getParent()->getParent(); |
3848 | |
3849 | if (!FuncInfo.CanLowerReturn) |
3850 | return false; |
3851 | |
3852 | if (F.isVarArg()) |
3853 | return false; |
3854 | |
3855 | if (TLI.supportSwiftError() && |
3856 | F.getAttributes().hasAttrSomewhere(Kind: Attribute::SwiftError)) |
3857 | return false; |
3858 | |
3859 | if (TLI.supportSplitCSR(MF: FuncInfo.MF)) |
3860 | return false; |
3861 | |
3862 | // Build a list of return value registers. |
3863 | SmallVector<unsigned, 4> RetRegs; |
3864 | |
3865 | if (Ret->getNumOperands() > 0) { |
3866 | CallingConv::ID CC = F.getCallingConv(); |
3867 | SmallVector<ISD::OutputArg, 4> Outs; |
3868 | GetReturnInfo(CC, ReturnType: F.getReturnType(), attr: F.getAttributes(), Outs, TLI, DL); |
3869 | |
3870 | // Analyze operands of the call, assigning locations to each operand. |
3871 | SmallVector<CCValAssign, 16> ValLocs; |
3872 | CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext()); |
3873 | CCInfo.AnalyzeReturn(Outs, Fn: RetCC_AArch64_AAPCS); |
3874 | |
3875 | // Only handle a single return value for now. |
3876 | if (ValLocs.size() != 1) |
3877 | return false; |
3878 | |
3879 | CCValAssign &VA = ValLocs[0]; |
3880 | const Value *RV = Ret->getOperand(i_nocapture: 0); |
3881 | |
3882 | // Don't bother handling odd stuff for now. |
3883 | if ((VA.getLocInfo() != CCValAssign::Full) && |
3884 | (VA.getLocInfo() != CCValAssign::BCvt)) |
3885 | return false; |
3886 | |
3887 | // Only handle register returns for now. |
3888 | if (!VA.isRegLoc()) |
3889 | return false; |
3890 | |
3891 | Register Reg = getRegForValue(V: RV); |
3892 | if (Reg == 0) |
3893 | return false; |
3894 | |
3895 | unsigned SrcReg = Reg + VA.getValNo(); |
3896 | Register DestReg = VA.getLocReg(); |
3897 | // Avoid a cross-class copy. This is very unlikely. |
3898 | if (!MRI.getRegClass(Reg: SrcReg)->contains(Reg: DestReg)) |
3899 | return false; |
3900 | |
3901 | EVT RVEVT = TLI.getValueType(DL, Ty: RV->getType()); |
3902 | if (!RVEVT.isSimple()) |
3903 | return false; |
3904 | |
3905 | // Vectors (of > 1 lane) in big endian need tricky handling. |
3906 | if (RVEVT.isVector() && RVEVT.getVectorElementCount().isVector() && |
3907 | !Subtarget->isLittleEndian()) |
3908 | return false; |
3909 | |
3910 | MVT RVVT = RVEVT.getSimpleVT(); |
3911 | if (RVVT == MVT::f128) |
3912 | return false; |
3913 | |
3914 | MVT DestVT = VA.getValVT(); |
3915 | // Special handling for extended integers. |
3916 | if (RVVT != DestVT) { |
3917 | if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16) |
3918 | return false; |
3919 | |
3920 | if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt()) |
3921 | return false; |
3922 | |
3923 | bool IsZExt = Outs[0].Flags.isZExt(); |
3924 | SrcReg = emitIntExt(SrcVT: RVVT, SrcReg, DestVT, isZExt: IsZExt); |
3925 | if (SrcReg == 0) |
3926 | return false; |
3927 | } |
3928 | |
3929 | // "Callee" (i.e. value producer) zero extends pointers at function |
3930 | // boundary. |
3931 | if (Subtarget->isTargetILP32() && RV->getType()->isPointerTy()) |
3932 | SrcReg = emitAnd_ri(RetVT: MVT::i64, LHSReg: SrcReg, Imm: 0xffffffff); |
3933 | |
3934 | // Make the copy. |
3935 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, |
3936 | MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg).addReg(RegNo: SrcReg); |
3937 | |
3938 | // Add register to return instruction. |
3939 | RetRegs.push_back(Elt: VA.getLocReg()); |
3940 | } |
3941 | |
3942 | MachineInstrBuilder MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, |
3943 | MCID: TII.get(Opcode: AArch64::RET_ReallyLR)); |
3944 | for (unsigned RetReg : RetRegs) |
3945 | MIB.addReg(RegNo: RetReg, flags: RegState::Implicit); |
3946 | return true; |
3947 | } |
3948 | |
3949 | bool AArch64FastISel::selectTrunc(const Instruction *I) { |
3950 | Type *DestTy = I->getType(); |
3951 | Value *Op = I->getOperand(i: 0); |
3952 | Type *SrcTy = Op->getType(); |
3953 | |
3954 | EVT SrcEVT = TLI.getValueType(DL, Ty: SrcTy, AllowUnknown: true); |
3955 | EVT DestEVT = TLI.getValueType(DL, Ty: DestTy, AllowUnknown: true); |
3956 | if (!SrcEVT.isSimple()) |
3957 | return false; |
3958 | if (!DestEVT.isSimple()) |
3959 | return false; |
3960 | |
3961 | MVT SrcVT = SrcEVT.getSimpleVT(); |
3962 | MVT DestVT = DestEVT.getSimpleVT(); |
3963 | |
3964 | if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 && |
3965 | SrcVT != MVT::i8) |
3966 | return false; |
3967 | if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 && |
3968 | DestVT != MVT::i1) |
3969 | return false; |
3970 | |
3971 | Register SrcReg = getRegForValue(V: Op); |
3972 | if (!SrcReg) |
3973 | return false; |
3974 | |
3975 | // If we're truncating from i64 to a smaller non-legal type then generate an |
3976 | // AND. Otherwise, we know the high bits are undefined and a truncate only |
3977 | // generate a COPY. We cannot mark the source register also as result |
3978 | // register, because this can incorrectly transfer the kill flag onto the |
3979 | // source register. |
3980 | unsigned ResultReg; |
3981 | if (SrcVT == MVT::i64) { |
3982 | uint64_t Mask = 0; |
3983 | switch (DestVT.SimpleTy) { |
3984 | default: |
3985 | // Trunc i64 to i32 is handled by the target-independent fast-isel. |
3986 | return false; |
3987 | case MVT::i1: |
3988 | Mask = 0x1; |
3989 | break; |
3990 | case MVT::i8: |
3991 | Mask = 0xff; |
3992 | break; |
3993 | case MVT::i16: |
3994 | Mask = 0xffff; |
3995 | break; |
3996 | } |
3997 | // Issue an extract_subreg to get the lower 32-bits. |
3998 | Register Reg32 = fastEmitInst_extractsubreg(RetVT: MVT::i32, Op0: SrcReg, |
3999 | Idx: AArch64::sub_32); |
4000 | // Create the AND instruction which performs the actual truncation. |
4001 | ResultReg = emitAnd_ri(RetVT: MVT::i32, LHSReg: Reg32, Imm: Mask); |
4002 | assert(ResultReg && "Unexpected AND instruction emission failure." ); |
4003 | } else { |
4004 | ResultReg = createResultReg(RC: &AArch64::GPR32RegClass); |
4005 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, |
4006 | MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: ResultReg) |
4007 | .addReg(RegNo: SrcReg); |
4008 | } |
4009 | |
4010 | updateValueMap(I, Reg: ResultReg); |
4011 | return true; |
4012 | } |
4013 | |
4014 | unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) { |
4015 | assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 || |
4016 | DestVT == MVT::i64) && |
4017 | "Unexpected value type." ); |
4018 | // Handle i8 and i16 as i32. |
4019 | if (DestVT == MVT::i8 || DestVT == MVT::i16) |
4020 | DestVT = MVT::i32; |
4021 | |
4022 | if (IsZExt) { |
4023 | unsigned ResultReg = emitAnd_ri(RetVT: MVT::i32, LHSReg: SrcReg, Imm: 1); |
4024 | assert(ResultReg && "Unexpected AND instruction emission failure." ); |
4025 | if (DestVT == MVT::i64) { |
4026 | // We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the |
4027 | // upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd. |
4028 | Register Reg64 = MRI.createVirtualRegister(RegClass: &AArch64::GPR64RegClass); |
4029 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, |
4030 | MCID: TII.get(Opcode: AArch64::SUBREG_TO_REG), DestReg: Reg64) |
4031 | .addImm(Val: 0) |
4032 | .addReg(RegNo: ResultReg) |
4033 | .addImm(Val: AArch64::sub_32); |
4034 | ResultReg = Reg64; |
4035 | } |
4036 | return ResultReg; |
4037 | } else { |
4038 | if (DestVT == MVT::i64) { |
4039 | // FIXME: We're SExt i1 to i64. |
4040 | return 0; |
4041 | } |
4042 | return fastEmitInst_rii(MachineInstOpcode: AArch64::SBFMWri, RC: &AArch64::GPR32RegClass, Op0: SrcReg, |
4043 | Imm1: 0, Imm2: 0); |
4044 | } |
4045 | } |
4046 | |
4047 | unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1) { |
4048 | unsigned Opc, ZReg; |
4049 | switch (RetVT.SimpleTy) { |
4050 | default: return 0; |
4051 | case MVT::i8: |
4052 | case MVT::i16: |
4053 | case MVT::i32: |
4054 | RetVT = MVT::i32; |
4055 | Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break; |
4056 | case MVT::i64: |
4057 | Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break; |
4058 | } |
4059 | |
4060 | const TargetRegisterClass *RC = |
4061 | (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; |
4062 | return fastEmitInst_rrr(MachineInstOpcode: Opc, RC, Op0, Op1, Op2: ZReg); |
4063 | } |
4064 | |
4065 | unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) { |
4066 | if (RetVT != MVT::i64) |
4067 | return 0; |
4068 | |
4069 | return fastEmitInst_rrr(MachineInstOpcode: AArch64::SMADDLrrr, RC: &AArch64::GPR64RegClass, |
4070 | Op0, Op1, Op2: AArch64::XZR); |
4071 | } |
4072 | |
4073 | unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) { |
4074 | if (RetVT != MVT::i64) |
4075 | return 0; |
4076 | |
4077 | return fastEmitInst_rrr(MachineInstOpcode: AArch64::UMADDLrrr, RC: &AArch64::GPR64RegClass, |
4078 | Op0, Op1, Op2: AArch64::XZR); |
4079 | } |
4080 | |
4081 | unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, |
4082 | unsigned Op1Reg) { |
4083 | unsigned Opc = 0; |
4084 | bool NeedTrunc = false; |
4085 | uint64_t Mask = 0; |
4086 | switch (RetVT.SimpleTy) { |
4087 | default: return 0; |
4088 | case MVT::i8: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff; break; |
4089 | case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break; |
4090 | case MVT::i32: Opc = AArch64::LSLVWr; break; |
4091 | case MVT::i64: Opc = AArch64::LSLVXr; break; |
4092 | } |
4093 | |
4094 | const TargetRegisterClass *RC = |
4095 | (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; |
4096 | if (NeedTrunc) |
4097 | Op1Reg = emitAnd_ri(RetVT: MVT::i32, LHSReg: Op1Reg, Imm: Mask); |
4098 | |
4099 | Register ResultReg = fastEmitInst_rr(MachineInstOpcode: Opc, RC, Op0: Op0Reg, Op1: Op1Reg); |
4100 | if (NeedTrunc) |
4101 | ResultReg = emitAnd_ri(RetVT: MVT::i32, LHSReg: ResultReg, Imm: Mask); |
4102 | return ResultReg; |
4103 | } |
4104 | |
4105 | unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0, |
4106 | uint64_t Shift, bool IsZExt) { |
4107 | assert(RetVT.SimpleTy >= SrcVT.SimpleTy && |
4108 | "Unexpected source/return type pair." ); |
4109 | assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 || |
4110 | SrcVT == MVT::i32 || SrcVT == MVT::i64) && |
4111 | "Unexpected source value type." ); |
4112 | assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 || |
4113 | RetVT == MVT::i64) && "Unexpected return value type." ); |
4114 | |
4115 | bool Is64Bit = (RetVT == MVT::i64); |
4116 | unsigned RegSize = Is64Bit ? 64 : 32; |
4117 | unsigned DstBits = RetVT.getSizeInBits(); |
4118 | unsigned SrcBits = SrcVT.getSizeInBits(); |
4119 | const TargetRegisterClass *RC = |
4120 | Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; |
4121 | |
4122 | // Just emit a copy for "zero" shifts. |
4123 | if (Shift == 0) { |
4124 | if (RetVT == SrcVT) { |
4125 | Register ResultReg = createResultReg(RC); |
4126 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, |
4127 | MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: ResultReg) |
4128 | .addReg(RegNo: Op0); |
4129 | return ResultReg; |
4130 | } else |
4131 | return emitIntExt(SrcVT, SrcReg: Op0, DestVT: RetVT, isZExt: IsZExt); |
4132 | } |
4133 | |
4134 | // Don't deal with undefined shifts. |
4135 | if (Shift >= DstBits) |
4136 | return 0; |
4137 | |
4138 | // For immediate shifts we can fold the zero-/sign-extension into the shift. |
4139 | // {S|U}BFM Wd, Wn, #r, #s |
4140 | // Wd<32+s-r,32-r> = Wn<s:0> when r > s |
4141 | |
4142 | // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 |
4143 | // %2 = shl i16 %1, 4 |
4144 | // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7 |
4145 | // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext |
4146 | // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext |
4147 | // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext |
4148 | |
4149 | // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 |
4150 | // %2 = shl i16 %1, 8 |
4151 | // Wd<32+7-24,32-24> = Wn<7:0> |
4152 | // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext |
4153 | // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext |
4154 | // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext |
4155 | |
4156 | // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 |
4157 | // %2 = shl i16 %1, 12 |
4158 | // Wd<32+3-20,32-20> = Wn<3:0> |
4159 | // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext |
4160 | // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext |
4161 | // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext |
4162 | |
4163 | unsigned ImmR = RegSize - Shift; |
4164 | // Limit the width to the length of the source type. |
4165 | unsigned ImmS = std::min<unsigned>(a: SrcBits - 1, b: DstBits - 1 - Shift); |
4166 | static const unsigned OpcTable[2][2] = { |
4167 | {AArch64::SBFMWri, AArch64::SBFMXri}, |
4168 | {AArch64::UBFMWri, AArch64::UBFMXri} |
4169 | }; |
4170 | unsigned Opc = OpcTable[IsZExt][Is64Bit]; |
4171 | if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { |
4172 | Register TmpReg = MRI.createVirtualRegister(RegClass: RC); |
4173 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, |
4174 | MCID: TII.get(Opcode: AArch64::SUBREG_TO_REG), DestReg: TmpReg) |
4175 | .addImm(Val: 0) |
4176 | .addReg(RegNo: Op0) |
4177 | .addImm(Val: AArch64::sub_32); |
4178 | Op0 = TmpReg; |
4179 | } |
4180 | return fastEmitInst_rii(MachineInstOpcode: Opc, RC, Op0, Imm1: ImmR, Imm2: ImmS); |
4181 | } |
4182 | |
4183 | unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg, |
4184 | unsigned Op1Reg) { |
4185 | unsigned Opc = 0; |
4186 | bool NeedTrunc = false; |
4187 | uint64_t Mask = 0; |
4188 | switch (RetVT.SimpleTy) { |
4189 | default: return 0; |
4190 | case MVT::i8: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff; break; |
4191 | case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break; |
4192 | case MVT::i32: Opc = AArch64::LSRVWr; break; |
4193 | case MVT::i64: Opc = AArch64::LSRVXr; break; |
4194 | } |
4195 | |
4196 | const TargetRegisterClass *RC = |
4197 | (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; |
4198 | if (NeedTrunc) { |
4199 | Op0Reg = emitAnd_ri(RetVT: MVT::i32, LHSReg: Op0Reg, Imm: Mask); |
4200 | Op1Reg = emitAnd_ri(RetVT: MVT::i32, LHSReg: Op1Reg, Imm: Mask); |
4201 | } |
4202 | Register ResultReg = fastEmitInst_rr(MachineInstOpcode: Opc, RC, Op0: Op0Reg, Op1: Op1Reg); |
4203 | if (NeedTrunc) |
4204 | ResultReg = emitAnd_ri(RetVT: MVT::i32, LHSReg: ResultReg, Imm: Mask); |
4205 | return ResultReg; |
4206 | } |
4207 | |
4208 | unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0, |
4209 | uint64_t Shift, bool IsZExt) { |
4210 | assert(RetVT.SimpleTy >= SrcVT.SimpleTy && |
4211 | "Unexpected source/return type pair." ); |
4212 | assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 || |
4213 | SrcVT == MVT::i32 || SrcVT == MVT::i64) && |
4214 | "Unexpected source value type." ); |
4215 | assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 || |
4216 | RetVT == MVT::i64) && "Unexpected return value type." ); |
4217 | |
4218 | bool Is64Bit = (RetVT == MVT::i64); |
4219 | unsigned RegSize = Is64Bit ? 64 : 32; |
4220 | unsigned DstBits = RetVT.getSizeInBits(); |
4221 | unsigned SrcBits = SrcVT.getSizeInBits(); |
4222 | const TargetRegisterClass *RC = |
4223 | Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; |
4224 | |
4225 | // Just emit a copy for "zero" shifts. |
4226 | if (Shift == 0) { |
4227 | if (RetVT == SrcVT) { |
4228 | Register ResultReg = createResultReg(RC); |
4229 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, |
4230 | MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: ResultReg) |
4231 | .addReg(RegNo: Op0); |
4232 | return ResultReg; |
4233 | } else |
4234 | return emitIntExt(SrcVT, SrcReg: Op0, DestVT: RetVT, isZExt: IsZExt); |
4235 | } |
4236 | |
4237 | // Don't deal with undefined shifts. |
4238 | if (Shift >= DstBits) |
4239 | return 0; |
4240 | |
4241 | // For immediate shifts we can fold the zero-/sign-extension into the shift. |
4242 | // {S|U}BFM Wd, Wn, #r, #s |
4243 | // Wd<s-r:0> = Wn<s:r> when r <= s |
4244 | |
4245 | // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 |
4246 | // %2 = lshr i16 %1, 4 |
4247 | // Wd<7-4:0> = Wn<7:4> |
4248 | // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext |
4249 | // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext |
4250 | // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext |
4251 | |
4252 | // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 |
4253 | // %2 = lshr i16 %1, 8 |
4254 | // Wd<7-7,0> = Wn<7:7> |
4255 | // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext |
4256 | // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext |
4257 | // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext |
4258 | |
4259 | // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 |
4260 | // %2 = lshr i16 %1, 12 |
4261 | // Wd<7-7,0> = Wn<7:7> <- clamp r to 7 |
4262 | // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext |
4263 | // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext |
4264 | // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext |
4265 | |
4266 | if (Shift >= SrcBits && IsZExt) |
4267 | return materializeInt(CI: ConstantInt::get(Context&: *Context, V: APInt(RegSize, 0)), VT: RetVT); |
4268 | |
4269 | // It is not possible to fold a sign-extend into the LShr instruction. In this |
4270 | // case emit a sign-extend. |
4271 | if (!IsZExt) { |
4272 | Op0 = emitIntExt(SrcVT, SrcReg: Op0, DestVT: RetVT, isZExt: IsZExt); |
4273 | if (!Op0) |
4274 | return 0; |
4275 | SrcVT = RetVT; |
4276 | SrcBits = SrcVT.getSizeInBits(); |
4277 | IsZExt = true; |
4278 | } |
4279 | |
4280 | unsigned ImmR = std::min<unsigned>(a: SrcBits - 1, b: Shift); |
4281 | unsigned ImmS = SrcBits - 1; |
4282 | static const unsigned OpcTable[2][2] = { |
4283 | {AArch64::SBFMWri, AArch64::SBFMXri}, |
4284 | {AArch64::UBFMWri, AArch64::UBFMXri} |
4285 | }; |
4286 | unsigned Opc = OpcTable[IsZExt][Is64Bit]; |
4287 | if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { |
4288 | Register TmpReg = MRI.createVirtualRegister(RegClass: RC); |
4289 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, |
4290 | MCID: TII.get(Opcode: AArch64::SUBREG_TO_REG), DestReg: TmpReg) |
4291 | .addImm(Val: 0) |
4292 | .addReg(RegNo: Op0) |
4293 | .addImm(Val: AArch64::sub_32); |
4294 | Op0 = TmpReg; |
4295 | } |
4296 | return fastEmitInst_rii(MachineInstOpcode: Opc, RC, Op0, Imm1: ImmR, Imm2: ImmS); |
4297 | } |
4298 | |
4299 | unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, |
4300 | unsigned Op1Reg) { |
4301 | unsigned Opc = 0; |
4302 | bool NeedTrunc = false; |
4303 | uint64_t Mask = 0; |
4304 | switch (RetVT.SimpleTy) { |
4305 | default: return 0; |
4306 | case MVT::i8: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff; break; |
4307 | case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break; |
4308 | case MVT::i32: Opc = AArch64::ASRVWr; break; |
4309 | case MVT::i64: Opc = AArch64::ASRVXr; break; |
4310 | } |
4311 | |
4312 | const TargetRegisterClass *RC = |
4313 | (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; |
4314 | if (NeedTrunc) { |
4315 | Op0Reg = emitIntExt(SrcVT: RetVT, SrcReg: Op0Reg, DestVT: MVT::i32, /*isZExt=*/false); |
4316 | Op1Reg = emitAnd_ri(RetVT: MVT::i32, LHSReg: Op1Reg, Imm: Mask); |
4317 | } |
4318 | Register ResultReg = fastEmitInst_rr(MachineInstOpcode: Opc, RC, Op0: Op0Reg, Op1: Op1Reg); |
4319 | if (NeedTrunc) |
4320 | ResultReg = emitAnd_ri(RetVT: MVT::i32, LHSReg: ResultReg, Imm: Mask); |
4321 | return ResultReg; |
4322 | } |
4323 | |
4324 | unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0, |
4325 | uint64_t Shift, bool IsZExt) { |
4326 | assert(RetVT.SimpleTy >= SrcVT.SimpleTy && |
4327 | "Unexpected source/return type pair." ); |
4328 | assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 || |
4329 | SrcVT == MVT::i32 || SrcVT == MVT::i64) && |
4330 | "Unexpected source value type." ); |
4331 | assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 || |
4332 | RetVT == MVT::i64) && "Unexpected return value type." ); |
4333 | |
4334 | bool Is64Bit = (RetVT == MVT::i64); |
4335 | unsigned RegSize = Is64Bit ? 64 : 32; |
4336 | unsigned DstBits = RetVT.getSizeInBits(); |
4337 | unsigned SrcBits = SrcVT.getSizeInBits(); |
4338 | const TargetRegisterClass *RC = |
4339 | Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; |
4340 | |
4341 | // Just emit a copy for "zero" shifts. |
4342 | if (Shift == 0) { |
4343 | if (RetVT == SrcVT) { |
4344 | Register ResultReg = createResultReg(RC); |
4345 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, |
4346 | MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: ResultReg) |
4347 | .addReg(RegNo: Op0); |
4348 | return ResultReg; |
4349 | } else |
4350 | return emitIntExt(SrcVT, SrcReg: Op0, DestVT: RetVT, isZExt: IsZExt); |
4351 | } |
4352 | |
4353 | // Don't deal with undefined shifts. |
4354 | if (Shift >= DstBits) |
4355 | return 0; |
4356 | |
4357 | // For immediate shifts we can fold the zero-/sign-extension into the shift. |
4358 | // {S|U}BFM Wd, Wn, #r, #s |
4359 | // Wd<s-r:0> = Wn<s:r> when r <= s |
4360 | |
4361 | // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 |
4362 | // %2 = ashr i16 %1, 4 |
4363 | // Wd<7-4:0> = Wn<7:4> |
4364 | // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext |
4365 | // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext |
4366 | // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext |
4367 | |
4368 | // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 |
4369 | // %2 = ashr i16 %1, 8 |
4370 | // Wd<7-7,0> = Wn<7:7> |
4371 | // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext |
4372 | // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext |
4373 | // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext |
4374 | |
4375 | // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 |
4376 | // %2 = ashr i16 %1, 12 |
4377 | // Wd<7-7,0> = Wn<7:7> <- clamp r to 7 |
4378 | // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext |
4379 | // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext |
4380 | // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext |
4381 | |
4382 | if (Shift >= SrcBits && IsZExt) |
4383 | return materializeInt(CI: ConstantInt::get(Context&: *Context, V: APInt(RegSize, 0)), VT: RetVT); |
4384 | |
4385 | unsigned ImmR = std::min<unsigned>(a: SrcBits - 1, b: Shift); |
4386 | unsigned ImmS = SrcBits - 1; |
4387 | static const unsigned OpcTable[2][2] = { |
4388 | {AArch64::SBFMWri, AArch64::SBFMXri}, |
4389 | {AArch64::UBFMWri, AArch64::UBFMXri} |
4390 | }; |
4391 | unsigned Opc = OpcTable[IsZExt][Is64Bit]; |
4392 | if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { |
4393 | Register TmpReg = MRI.createVirtualRegister(RegClass: RC); |
4394 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, |
4395 | MCID: TII.get(Opcode: AArch64::SUBREG_TO_REG), DestReg: TmpReg) |
4396 | .addImm(Val: 0) |
4397 | .addReg(RegNo: Op0) |
4398 | .addImm(Val: AArch64::sub_32); |
4399 | Op0 = TmpReg; |
4400 | } |
4401 | return fastEmitInst_rii(MachineInstOpcode: Opc, RC, Op0, Imm1: ImmR, Imm2: ImmS); |
4402 | } |
4403 | |
4404 | unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, |
4405 | bool IsZExt) { |
4406 | assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?" ); |
4407 | |
4408 | // FastISel does not have plumbing to deal with extensions where the SrcVT or |
4409 | // DestVT are odd things, so test to make sure that they are both types we can |
4410 | // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise |
4411 | // bail out to SelectionDAG. |
4412 | if (((DestVT != MVT::i8) && (DestVT != MVT::i16) && |
4413 | (DestVT != MVT::i32) && (DestVT != MVT::i64)) || |
4414 | ((SrcVT != MVT::i1) && (SrcVT != MVT::i8) && |
4415 | (SrcVT != MVT::i16) && (SrcVT != MVT::i32))) |
4416 | return 0; |
4417 | |
4418 | unsigned Opc; |
4419 | unsigned Imm = 0; |
4420 | |
4421 | switch (SrcVT.SimpleTy) { |
4422 | default: |
4423 | return 0; |
4424 | case MVT::i1: |
4425 | return emiti1Ext(SrcReg, DestVT, IsZExt); |
4426 | case MVT::i8: |
4427 | if (DestVT == MVT::i64) |
4428 | Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri; |
4429 | else |
4430 | Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri; |
4431 | Imm = 7; |
4432 | break; |
4433 | case MVT::i16: |
4434 | if (DestVT == MVT::i64) |
4435 | Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri; |
4436 | else |
4437 | Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri; |
4438 | Imm = 15; |
4439 | break; |
4440 | case MVT::i32: |
4441 | assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?" ); |
4442 | Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri; |
4443 | Imm = 31; |
4444 | break; |
4445 | } |
4446 | |
4447 | // Handle i8 and i16 as i32. |
4448 | if (DestVT == MVT::i8 || DestVT == MVT::i16) |
4449 | DestVT = MVT::i32; |
4450 | else if (DestVT == MVT::i64) { |
4451 | Register Src64 = MRI.createVirtualRegister(RegClass: &AArch64::GPR64RegClass); |
4452 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, |
4453 | MCID: TII.get(Opcode: AArch64::SUBREG_TO_REG), DestReg: Src64) |
4454 | .addImm(Val: 0) |
4455 | .addReg(RegNo: SrcReg) |
4456 | .addImm(Val: AArch64::sub_32); |
4457 | SrcReg = Src64; |
4458 | } |
4459 | |
4460 | const TargetRegisterClass *RC = |
4461 | (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; |
4462 | return fastEmitInst_rii(MachineInstOpcode: Opc, RC, Op0: SrcReg, Imm1: 0, Imm2: Imm); |
4463 | } |
4464 | |
4465 | static bool isZExtLoad(const MachineInstr *LI) { |
4466 | switch (LI->getOpcode()) { |
4467 | default: |
4468 | return false; |
4469 | case AArch64::LDURBBi: |
4470 | case AArch64::LDURHHi: |
4471 | case AArch64::LDURWi: |
4472 | case AArch64::LDRBBui: |
4473 | case AArch64::LDRHHui: |
4474 | case AArch64::LDRWui: |
4475 | case AArch64::LDRBBroX: |
4476 | case AArch64::LDRHHroX: |
4477 | case AArch64::LDRWroX: |
4478 | case AArch64::LDRBBroW: |
4479 | case AArch64::LDRHHroW: |
4480 | case AArch64::LDRWroW: |
4481 | return true; |
4482 | } |
4483 | } |
4484 | |
4485 | static bool isSExtLoad(const MachineInstr *LI) { |
4486 | switch (LI->getOpcode()) { |
4487 | default: |
4488 | return false; |
4489 | case AArch64::LDURSBWi: |
4490 | case AArch64::LDURSHWi: |
4491 | case AArch64::LDURSBXi: |
4492 | case AArch64::LDURSHXi: |
4493 | case AArch64::LDURSWi: |
4494 | case AArch64::LDRSBWui: |
4495 | case AArch64::LDRSHWui: |
4496 | case AArch64::LDRSBXui: |
4497 | case AArch64::LDRSHXui: |
4498 | case AArch64::LDRSWui: |
4499 | case AArch64::LDRSBWroX: |
4500 | case AArch64::LDRSHWroX: |
4501 | case AArch64::LDRSBXroX: |
4502 | case AArch64::LDRSHXroX: |
4503 | case AArch64::LDRSWroX: |
4504 | case AArch64::LDRSBWroW: |
4505 | case AArch64::LDRSHWroW: |
4506 | case AArch64::LDRSBXroW: |
4507 | case AArch64::LDRSHXroW: |
4508 | case AArch64::LDRSWroW: |
4509 | return true; |
4510 | } |
4511 | } |
4512 | |
4513 | bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT, |
4514 | MVT SrcVT) { |
4515 | const auto *LI = dyn_cast<LoadInst>(Val: I->getOperand(i: 0)); |
4516 | if (!LI || !LI->hasOneUse()) |
4517 | return false; |
4518 | |
4519 | // Check if the load instruction has already been selected. |
4520 | Register Reg = lookUpRegForValue(V: LI); |
4521 | if (!Reg) |
4522 | return false; |
4523 | |
4524 | MachineInstr *MI = MRI.getUniqueVRegDef(Reg); |
4525 | if (!MI) |
4526 | return false; |
4527 | |
4528 | // Check if the correct load instruction has been emitted - SelectionDAG might |
4529 | // have emitted a zero-extending load, but we need a sign-extending load. |
4530 | bool IsZExt = isa<ZExtInst>(Val: I); |
4531 | const auto *LoadMI = MI; |
4532 | if (LoadMI->getOpcode() == TargetOpcode::COPY && |
4533 | LoadMI->getOperand(i: 1).getSubReg() == AArch64::sub_32) { |
4534 | Register LoadReg = MI->getOperand(i: 1).getReg(); |
4535 | LoadMI = MRI.getUniqueVRegDef(Reg: LoadReg); |
4536 | assert(LoadMI && "Expected valid instruction" ); |
4537 | } |
4538 | if (!(IsZExt && isZExtLoad(LI: LoadMI)) && !(!IsZExt && isSExtLoad(LI: LoadMI))) |
4539 | return false; |
4540 | |
4541 | // Nothing to be done. |
4542 | if (RetVT != MVT::i64 || SrcVT > MVT::i32) { |
4543 | updateValueMap(I, Reg); |
4544 | return true; |
4545 | } |
4546 | |
4547 | if (IsZExt) { |
4548 | Register Reg64 = createResultReg(RC: &AArch64::GPR64RegClass); |
4549 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, |
4550 | MCID: TII.get(Opcode: AArch64::SUBREG_TO_REG), DestReg: Reg64) |
4551 | .addImm(Val: 0) |
4552 | .addReg(RegNo: Reg, flags: getKillRegState(B: true)) |
4553 | .addImm(Val: AArch64::sub_32); |
4554 | Reg = Reg64; |
4555 | } else { |
4556 | assert((MI->getOpcode() == TargetOpcode::COPY && |
4557 | MI->getOperand(1).getSubReg() == AArch64::sub_32) && |
4558 | "Expected copy instruction" ); |
4559 | Reg = MI->getOperand(i: 1).getReg(); |
4560 | MachineBasicBlock::iterator I(MI); |
4561 | removeDeadCode(I, E: std::next(x: I)); |
4562 | } |
4563 | updateValueMap(I, Reg); |
4564 | return true; |
4565 | } |
4566 | |
4567 | bool AArch64FastISel::selectIntExt(const Instruction *I) { |
4568 | assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) && |
4569 | "Unexpected integer extend instruction." ); |
4570 | MVT RetVT; |
4571 | MVT SrcVT; |
4572 | if (!isTypeSupported(Ty: I->getType(), VT&: RetVT)) |
4573 | return false; |
4574 | |
4575 | if (!isTypeSupported(Ty: I->getOperand(i: 0)->getType(), VT&: SrcVT)) |
4576 | return false; |
4577 | |
4578 | // Try to optimize already sign-/zero-extended values from load instructions. |
4579 | if (optimizeIntExtLoad(I, RetVT, SrcVT)) |
4580 | return true; |
4581 | |
4582 | Register SrcReg = getRegForValue(V: I->getOperand(i: 0)); |
4583 | if (!SrcReg) |
4584 | return false; |
4585 | |
4586 | // Try to optimize already sign-/zero-extended values from function arguments. |
4587 | bool IsZExt = isa<ZExtInst>(Val: I); |
4588 | if (const auto *Arg = dyn_cast<Argument>(Val: I->getOperand(i: 0))) { |
4589 | if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) { |
4590 | if (RetVT == MVT::i64 && SrcVT != MVT::i64) { |
4591 | Register ResultReg = createResultReg(RC: &AArch64::GPR64RegClass); |
4592 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, |
4593 | MCID: TII.get(Opcode: AArch64::SUBREG_TO_REG), DestReg: ResultReg) |
4594 | .addImm(Val: 0) |
4595 | .addReg(RegNo: SrcReg) |
4596 | .addImm(Val: AArch64::sub_32); |
4597 | SrcReg = ResultReg; |
4598 | } |
4599 | |
4600 | updateValueMap(I, Reg: SrcReg); |
4601 | return true; |
4602 | } |
4603 | } |
4604 | |
4605 | unsigned ResultReg = emitIntExt(SrcVT, SrcReg, DestVT: RetVT, IsZExt); |
4606 | if (!ResultReg) |
4607 | return false; |
4608 | |
4609 | updateValueMap(I, Reg: ResultReg); |
4610 | return true; |
4611 | } |
4612 | |
4613 | bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) { |
4614 | EVT DestEVT = TLI.getValueType(DL, Ty: I->getType(), AllowUnknown: true); |
4615 | if (!DestEVT.isSimple()) |
4616 | return false; |
4617 | |
4618 | MVT DestVT = DestEVT.getSimpleVT(); |
4619 | if (DestVT != MVT::i64 && DestVT != MVT::i32) |
4620 | return false; |
4621 | |
4622 | unsigned DivOpc; |
4623 | bool Is64bit = (DestVT == MVT::i64); |
4624 | switch (ISDOpcode) { |
4625 | default: |
4626 | return false; |
4627 | case ISD::SREM: |
4628 | DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr; |
4629 | break; |
4630 | case ISD::UREM: |
4631 | DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr; |
4632 | break; |
4633 | } |
4634 | unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr; |
4635 | Register Src0Reg = getRegForValue(V: I->getOperand(i: 0)); |
4636 | if (!Src0Reg) |
4637 | return false; |
4638 | |
4639 | Register Src1Reg = getRegForValue(V: I->getOperand(i: 1)); |
4640 | if (!Src1Reg) |
4641 | return false; |
4642 | |
4643 | const TargetRegisterClass *RC = |
4644 | (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; |
4645 | Register QuotReg = fastEmitInst_rr(MachineInstOpcode: DivOpc, RC, Op0: Src0Reg, Op1: Src1Reg); |
4646 | assert(QuotReg && "Unexpected DIV instruction emission failure." ); |
4647 | // The remainder is computed as numerator - (quotient * denominator) using the |
4648 | // MSUB instruction. |
4649 | Register ResultReg = fastEmitInst_rrr(MachineInstOpcode: MSubOpc, RC, Op0: QuotReg, Op1: Src1Reg, Op2: Src0Reg); |
4650 | updateValueMap(I, Reg: ResultReg); |
4651 | return true; |
4652 | } |
4653 | |
4654 | bool AArch64FastISel::selectMul(const Instruction *I) { |
4655 | MVT VT; |
4656 | if (!isTypeSupported(Ty: I->getType(), VT, /*IsVectorAllowed=*/true)) |
4657 | return false; |
4658 | |
4659 | if (VT.isVector()) |
4660 | return selectBinaryOp(I, ISDOpcode: ISD::MUL); |
4661 | |
4662 | const Value *Src0 = I->getOperand(i: 0); |
4663 | const Value *Src1 = I->getOperand(i: 1); |
4664 | if (const auto *C = dyn_cast<ConstantInt>(Val: Src0)) |
4665 | if (C->getValue().isPowerOf2()) |
4666 | std::swap(a&: Src0, b&: Src1); |
4667 | |
4668 | // Try to simplify to a shift instruction. |
4669 | if (const auto *C = dyn_cast<ConstantInt>(Val: Src1)) |
4670 | if (C->getValue().isPowerOf2()) { |
4671 | uint64_t ShiftVal = C->getValue().logBase2(); |
4672 | MVT SrcVT = VT; |
4673 | bool IsZExt = true; |
4674 | if (const auto *ZExt = dyn_cast<ZExtInst>(Val: Src0)) { |
4675 | if (!isIntExtFree(I: ZExt)) { |
4676 | MVT VT; |
4677 | if (isValueAvailable(V: ZExt) && isTypeSupported(Ty: ZExt->getSrcTy(), VT)) { |
4678 | SrcVT = VT; |
4679 | IsZExt = true; |
4680 | Src0 = ZExt->getOperand(i_nocapture: 0); |
4681 | } |
4682 | } |
4683 | } else if (const auto *SExt = dyn_cast<SExtInst>(Val: Src0)) { |
4684 | if (!isIntExtFree(I: SExt)) { |
4685 | MVT VT; |
4686 | if (isValueAvailable(V: SExt) && isTypeSupported(Ty: SExt->getSrcTy(), VT)) { |
4687 | SrcVT = VT; |
4688 | IsZExt = false; |
4689 | Src0 = SExt->getOperand(i_nocapture: 0); |
4690 | } |
4691 | } |
4692 | } |
4693 | |
4694 | Register Src0Reg = getRegForValue(V: Src0); |
4695 | if (!Src0Reg) |
4696 | return false; |
4697 | |
4698 | unsigned ResultReg = |
4699 | emitLSL_ri(RetVT: VT, SrcVT, Op0: Src0Reg, Shift: ShiftVal, IsZExt); |
4700 | |
4701 | if (ResultReg) { |
4702 | updateValueMap(I, Reg: ResultReg); |
4703 | return true; |
4704 | } |
4705 | } |
4706 | |
4707 | Register Src0Reg = getRegForValue(V: I->getOperand(i: 0)); |
4708 | if (!Src0Reg) |
4709 | return false; |
4710 | |
4711 | Register Src1Reg = getRegForValue(V: I->getOperand(i: 1)); |
4712 | if (!Src1Reg) |
4713 | return false; |
4714 | |
4715 | unsigned ResultReg = emitMul_rr(RetVT: VT, Op0: Src0Reg, Op1: Src1Reg); |
4716 | |
4717 | if (!ResultReg) |
4718 | return false; |
4719 | |
4720 | updateValueMap(I, Reg: ResultReg); |
4721 | return true; |
4722 | } |
4723 | |
4724 | bool AArch64FastISel::selectShift(const Instruction *I) { |
4725 | MVT RetVT; |
4726 | if (!isTypeSupported(Ty: I->getType(), VT&: RetVT, /*IsVectorAllowed=*/true)) |
4727 | return false; |
4728 | |
4729 | if (RetVT.isVector()) |
4730 | return selectOperator(I, Opcode: I->getOpcode()); |
4731 | |
4732 | if (const auto *C = dyn_cast<ConstantInt>(Val: I->getOperand(i: 1))) { |
4733 | unsigned ResultReg = 0; |
4734 | uint64_t ShiftVal = C->getZExtValue(); |
4735 | MVT SrcVT = RetVT; |
4736 | bool IsZExt = I->getOpcode() != Instruction::AShr; |
4737 | const Value *Op0 = I->getOperand(i: 0); |
4738 | if (const auto *ZExt = dyn_cast<ZExtInst>(Val: Op0)) { |
4739 | if (!isIntExtFree(I: ZExt)) { |
4740 | MVT TmpVT; |
4741 | if (isValueAvailable(V: ZExt) && isTypeSupported(Ty: ZExt->getSrcTy(), VT&: TmpVT)) { |
4742 | SrcVT = TmpVT; |
4743 | IsZExt = true; |
4744 | Op0 = ZExt->getOperand(i_nocapture: 0); |
4745 | } |
4746 | } |
4747 | } else if (const auto *SExt = dyn_cast<SExtInst>(Val: Op0)) { |
4748 | if (!isIntExtFree(I: SExt)) { |
4749 | MVT TmpVT; |
4750 | if (isValueAvailable(V: SExt) && isTypeSupported(Ty: SExt->getSrcTy(), VT&: TmpVT)) { |
4751 | SrcVT = TmpVT; |
4752 | IsZExt = false; |
4753 | Op0 = SExt->getOperand(i_nocapture: 0); |
4754 | } |
4755 | } |
4756 | } |
4757 | |
4758 | Register Op0Reg = getRegForValue(V: Op0); |
4759 | if (!Op0Reg) |
4760 | return false; |
4761 | |
4762 | switch (I->getOpcode()) { |
4763 | default: llvm_unreachable("Unexpected instruction." ); |
4764 | case Instruction::Shl: |
4765 | ResultReg = emitLSL_ri(RetVT, SrcVT, Op0: Op0Reg, Shift: ShiftVal, IsZExt); |
4766 | break; |
4767 | case Instruction::AShr: |
4768 | ResultReg = emitASR_ri(RetVT, SrcVT, Op0: Op0Reg, Shift: ShiftVal, IsZExt); |
4769 | break; |
4770 | case Instruction::LShr: |
4771 | ResultReg = emitLSR_ri(RetVT, SrcVT, Op0: Op0Reg, Shift: ShiftVal, IsZExt); |
4772 | break; |
4773 | } |
4774 | if (!ResultReg) |
4775 | return false; |
4776 | |
4777 | updateValueMap(I, Reg: ResultReg); |
4778 | return true; |
4779 | } |
4780 | |
4781 | Register Op0Reg = getRegForValue(V: I->getOperand(i: 0)); |
4782 | if (!Op0Reg) |
4783 | return false; |
4784 | |
4785 | Register Op1Reg = getRegForValue(V: I->getOperand(i: 1)); |
4786 | if (!Op1Reg) |
4787 | return false; |
4788 | |
4789 | unsigned ResultReg = 0; |
4790 | switch (I->getOpcode()) { |
4791 | default: llvm_unreachable("Unexpected instruction." ); |
4792 | case Instruction::Shl: |
4793 | ResultReg = emitLSL_rr(RetVT, Op0Reg, Op1Reg); |
4794 | break; |
4795 | case Instruction::AShr: |
4796 | ResultReg = emitASR_rr(RetVT, Op0Reg, Op1Reg); |
4797 | break; |
4798 | case Instruction::LShr: |
4799 | ResultReg = emitLSR_rr(RetVT, Op0Reg, Op1Reg); |
4800 | break; |
4801 | } |
4802 | |
4803 | if (!ResultReg) |
4804 | return false; |
4805 | |
4806 | updateValueMap(I, Reg: ResultReg); |
4807 | return true; |
4808 | } |
4809 | |
4810 | bool AArch64FastISel::selectBitCast(const Instruction *I) { |
4811 | MVT RetVT, SrcVT; |
4812 | |
4813 | if (!isTypeLegal(Ty: I->getOperand(i: 0)->getType(), VT&: SrcVT)) |
4814 | return false; |
4815 | if (!isTypeLegal(Ty: I->getType(), VT&: RetVT)) |
4816 | return false; |
4817 | |
4818 | unsigned Opc; |
4819 | if (RetVT == MVT::f32 && SrcVT == MVT::i32) |
4820 | Opc = AArch64::FMOVWSr; |
4821 | else if (RetVT == MVT::f64 && SrcVT == MVT::i64) |
4822 | Opc = AArch64::FMOVXDr; |
4823 | else if (RetVT == MVT::i32 && SrcVT == MVT::f32) |
4824 | Opc = AArch64::FMOVSWr; |
4825 | else if (RetVT == MVT::i64 && SrcVT == MVT::f64) |
4826 | Opc = AArch64::FMOVDXr; |
4827 | else |
4828 | return false; |
4829 | |
4830 | const TargetRegisterClass *RC = nullptr; |
4831 | switch (RetVT.SimpleTy) { |
4832 | default: llvm_unreachable("Unexpected value type." ); |
4833 | case MVT::i32: RC = &AArch64::GPR32RegClass; break; |
4834 | case MVT::i64: RC = &AArch64::GPR64RegClass; break; |
4835 | case MVT::f32: RC = &AArch64::FPR32RegClass; break; |
4836 | case MVT::f64: RC = &AArch64::FPR64RegClass; break; |
4837 | } |
4838 | Register Op0Reg = getRegForValue(V: I->getOperand(i: 0)); |
4839 | if (!Op0Reg) |
4840 | return false; |
4841 | |
4842 | Register ResultReg = fastEmitInst_r(MachineInstOpcode: Opc, RC, Op0: Op0Reg); |
4843 | if (!ResultReg) |
4844 | return false; |
4845 | |
4846 | updateValueMap(I, Reg: ResultReg); |
4847 | return true; |
4848 | } |
4849 | |
4850 | bool AArch64FastISel::selectFRem(const Instruction *I) { |
4851 | MVT RetVT; |
4852 | if (!isTypeLegal(Ty: I->getType(), VT&: RetVT)) |
4853 | return false; |
4854 | |
4855 | RTLIB::Libcall LC; |
4856 | switch (RetVT.SimpleTy) { |
4857 | default: |
4858 | return false; |
4859 | case MVT::f32: |
4860 | LC = RTLIB::REM_F32; |
4861 | break; |
4862 | case MVT::f64: |
4863 | LC = RTLIB::REM_F64; |
4864 | break; |
4865 | } |
4866 | |
4867 | ArgListTy Args; |
4868 | Args.reserve(n: I->getNumOperands()); |
4869 | |
4870 | // Populate the argument list. |
4871 | for (auto &Arg : I->operands()) { |
4872 | ArgListEntry Entry; |
4873 | Entry.Val = Arg; |
4874 | Entry.Ty = Arg->getType(); |
4875 | Args.push_back(x: Entry); |
4876 | } |
4877 | |
4878 | CallLoweringInfo CLI; |
4879 | MCContext &Ctx = MF->getContext(); |
4880 | CLI.setCallee(DL, Ctx, CC: TLI.getLibcallCallingConv(Call: LC), ResultTy: I->getType(), |
4881 | Target: TLI.getLibcallName(Call: LC), ArgsList: std::move(Args)); |
4882 | if (!lowerCallTo(CLI)) |
4883 | return false; |
4884 | updateValueMap(I, Reg: CLI.ResultReg); |
4885 | return true; |
4886 | } |
4887 | |
4888 | bool AArch64FastISel::selectSDiv(const Instruction *I) { |
4889 | MVT VT; |
4890 | if (!isTypeLegal(Ty: I->getType(), VT)) |
4891 | return false; |
4892 | |
4893 | if (!isa<ConstantInt>(Val: I->getOperand(i: 1))) |
4894 | return selectBinaryOp(I, ISDOpcode: ISD::SDIV); |
4895 | |
4896 | const APInt &C = cast<ConstantInt>(Val: I->getOperand(i: 1))->getValue(); |
4897 | if ((VT != MVT::i32 && VT != MVT::i64) || !C || |
4898 | !(C.isPowerOf2() || C.isNegatedPowerOf2())) |
4899 | return selectBinaryOp(I, ISDOpcode: ISD::SDIV); |
4900 | |
4901 | unsigned Lg2 = C.countr_zero(); |
4902 | Register Src0Reg = getRegForValue(V: I->getOperand(i: 0)); |
4903 | if (!Src0Reg) |
4904 | return false; |
4905 | |
4906 | if (cast<BinaryOperator>(Val: I)->isExact()) { |
4907 | unsigned ResultReg = emitASR_ri(RetVT: VT, SrcVT: VT, Op0: Src0Reg, Shift: Lg2); |
4908 | if (!ResultReg) |
4909 | return false; |
4910 | updateValueMap(I, Reg: ResultReg); |
4911 | return true; |
4912 | } |
4913 | |
4914 | int64_t Pow2MinusOne = (1ULL << Lg2) - 1; |
4915 | unsigned AddReg = emitAdd_ri_(VT, Op0: Src0Reg, Imm: Pow2MinusOne); |
4916 | if (!AddReg) |
4917 | return false; |
4918 | |
4919 | // (Src0 < 0) ? Pow2 - 1 : 0; |
4920 | if (!emitICmp_ri(RetVT: VT, LHSReg: Src0Reg, Imm: 0)) |
4921 | return false; |
4922 | |
4923 | unsigned SelectOpc; |
4924 | const TargetRegisterClass *RC; |
4925 | if (VT == MVT::i64) { |
4926 | SelectOpc = AArch64::CSELXr; |
4927 | RC = &AArch64::GPR64RegClass; |
4928 | } else { |
4929 | SelectOpc = AArch64::CSELWr; |
4930 | RC = &AArch64::GPR32RegClass; |
4931 | } |
4932 | Register SelectReg = fastEmitInst_rri(MachineInstOpcode: SelectOpc, RC, Op0: AddReg, Op1: Src0Reg, |
4933 | Imm: AArch64CC::LT); |
4934 | if (!SelectReg) |
4935 | return false; |
4936 | |
4937 | // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also |
4938 | // negate the result. |
4939 | unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; |
4940 | unsigned ResultReg; |
4941 | if (C.isNegative()) |
4942 | ResultReg = emitAddSub_rs(/*UseAdd=*/false, RetVT: VT, LHSReg: ZeroReg, RHSReg: SelectReg, |
4943 | ShiftType: AArch64_AM::ASR, ShiftImm: Lg2); |
4944 | else |
4945 | ResultReg = emitASR_ri(RetVT: VT, SrcVT: VT, Op0: SelectReg, Shift: Lg2); |
4946 | |
4947 | if (!ResultReg) |
4948 | return false; |
4949 | |
4950 | updateValueMap(I, Reg: ResultReg); |
4951 | return true; |
4952 | } |
4953 | |
4954 | /// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We |
4955 | /// have to duplicate it for AArch64, because otherwise we would fail during the |
4956 | /// sign-extend emission. |
4957 | unsigned AArch64FastISel::getRegForGEPIndex(const Value *Idx) { |
4958 | Register IdxN = getRegForValue(V: Idx); |
4959 | if (IdxN == 0) |
4960 | // Unhandled operand. Halt "fast" selection and bail. |
4961 | return 0; |
4962 | |
4963 | // If the index is smaller or larger than intptr_t, truncate or extend it. |
4964 | MVT PtrVT = TLI.getPointerTy(DL); |
4965 | EVT IdxVT = EVT::getEVT(Ty: Idx->getType(), /*HandleUnknown=*/false); |
4966 | if (IdxVT.bitsLT(VT: PtrVT)) { |
4967 | IdxN = emitIntExt(SrcVT: IdxVT.getSimpleVT(), SrcReg: IdxN, DestVT: PtrVT, /*isZExt=*/IsZExt: false); |
4968 | } else if (IdxVT.bitsGT(VT: PtrVT)) |
4969 | llvm_unreachable("AArch64 FastISel doesn't support types larger than i64" ); |
4970 | return IdxN; |
4971 | } |
4972 | |
4973 | /// This is mostly a copy of the existing FastISel GEP code, but we have to |
4974 | /// duplicate it for AArch64, because otherwise we would bail out even for |
4975 | /// simple cases. This is because the standard fastEmit functions don't cover |
4976 | /// MUL at all and ADD is lowered very inefficientily. |
4977 | bool AArch64FastISel::selectGetElementPtr(const Instruction *I) { |
4978 | if (Subtarget->isTargetILP32()) |
4979 | return false; |
4980 | |
4981 | Register N = getRegForValue(V: I->getOperand(i: 0)); |
4982 | if (!N) |
4983 | return false; |
4984 | |
4985 | // Keep a running tab of the total offset to coalesce multiple N = N + Offset |
4986 | // into a single N = N + TotalOffset. |
4987 | uint64_t TotalOffs = 0; |
4988 | MVT VT = TLI.getPointerTy(DL); |
4989 | for (gep_type_iterator GTI = gep_type_begin(GEP: I), E = gep_type_end(GEP: I); |
4990 | GTI != E; ++GTI) { |
4991 | const Value *Idx = GTI.getOperand(); |
4992 | if (auto *StTy = GTI.getStructTypeOrNull()) { |
4993 | unsigned Field = cast<ConstantInt>(Val: Idx)->getZExtValue(); |
4994 | // N = N + Offset |
4995 | if (Field) |
4996 | TotalOffs += DL.getStructLayout(Ty: StTy)->getElementOffset(Idx: Field); |
4997 | } else { |
4998 | // If this is a constant subscript, handle it quickly. |
4999 | if (const auto *CI = dyn_cast<ConstantInt>(Val: Idx)) { |
5000 | if (CI->isZero()) |
5001 | continue; |
5002 | // N = N + Offset |
5003 | TotalOffs += GTI.getSequentialElementStride(DL) * |
5004 | cast<ConstantInt>(Val: CI)->getSExtValue(); |
5005 | continue; |
5006 | } |
5007 | if (TotalOffs) { |
5008 | N = emitAdd_ri_(VT, Op0: N, Imm: TotalOffs); |
5009 | if (!N) |
5010 | return false; |
5011 | TotalOffs = 0; |
5012 | } |
5013 | |
5014 | // N = N + Idx * ElementSize; |
5015 | uint64_t ElementSize = GTI.getSequentialElementStride(DL); |
5016 | unsigned IdxN = getRegForGEPIndex(Idx); |
5017 | if (!IdxN) |
5018 | return false; |
5019 | |
5020 | if (ElementSize != 1) { |
5021 | unsigned C = fastEmit_i(VT, RetVT: VT, Opcode: ISD::Constant, imm0: ElementSize); |
5022 | if (!C) |
5023 | return false; |
5024 | IdxN = emitMul_rr(RetVT: VT, Op0: IdxN, Op1: C); |
5025 | if (!IdxN) |
5026 | return false; |
5027 | } |
5028 | N = fastEmit_rr(VT, RetVT: VT, Opcode: ISD::ADD, Op0: N, Op1: IdxN); |
5029 | if (!N) |
5030 | return false; |
5031 | } |
5032 | } |
5033 | if (TotalOffs) { |
5034 | N = emitAdd_ri_(VT, Op0: N, Imm: TotalOffs); |
5035 | if (!N) |
5036 | return false; |
5037 | } |
5038 | updateValueMap(I, Reg: N); |
5039 | return true; |
5040 | } |
5041 | |
5042 | bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) { |
5043 | assert(TM.getOptLevel() == CodeGenOptLevel::None && |
5044 | "cmpxchg survived AtomicExpand at optlevel > -O0" ); |
5045 | |
5046 | auto *RetPairTy = cast<StructType>(Val: I->getType()); |
5047 | Type *RetTy = RetPairTy->getTypeAtIndex(N: 0U); |
5048 | assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) && |
5049 | "cmpxchg has a non-i1 status result" ); |
5050 | |
5051 | MVT VT; |
5052 | if (!isTypeLegal(Ty: RetTy, VT)) |
5053 | return false; |
5054 | |
5055 | const TargetRegisterClass *ResRC; |
5056 | unsigned Opc, CmpOpc; |
5057 | // This only supports i32/i64, because i8/i16 aren't legal, and the generic |
5058 | // extractvalue selection doesn't support that. |
5059 | if (VT == MVT::i32) { |
5060 | Opc = AArch64::CMP_SWAP_32; |
5061 | CmpOpc = AArch64::SUBSWrs; |
5062 | ResRC = &AArch64::GPR32RegClass; |
5063 | } else if (VT == MVT::i64) { |
5064 | Opc = AArch64::CMP_SWAP_64; |
5065 | CmpOpc = AArch64::SUBSXrs; |
5066 | ResRC = &AArch64::GPR64RegClass; |
5067 | } else { |
5068 | return false; |
5069 | } |
5070 | |
5071 | const MCInstrDesc &II = TII.get(Opcode: Opc); |
5072 | |
5073 | const Register AddrReg = constrainOperandRegClass( |
5074 | II, Op: getRegForValue(V: I->getPointerOperand()), OpNum: II.getNumDefs()); |
5075 | const Register DesiredReg = constrainOperandRegClass( |
5076 | II, Op: getRegForValue(V: I->getCompareOperand()), OpNum: II.getNumDefs() + 1); |
5077 | const Register NewReg = constrainOperandRegClass( |
5078 | II, Op: getRegForValue(V: I->getNewValOperand()), OpNum: II.getNumDefs() + 2); |
5079 | |
5080 | const Register ResultReg1 = createResultReg(RC: ResRC); |
5081 | const Register ResultReg2 = createResultReg(RC: &AArch64::GPR32RegClass); |
5082 | const Register ScratchReg = createResultReg(RC: &AArch64::GPR32RegClass); |
5083 | |
5084 | // FIXME: MachineMemOperand doesn't support cmpxchg yet. |
5085 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II) |
5086 | .addDef(RegNo: ResultReg1) |
5087 | .addDef(RegNo: ScratchReg) |
5088 | .addUse(RegNo: AddrReg) |
5089 | .addUse(RegNo: DesiredReg) |
5090 | .addUse(RegNo: NewReg); |
5091 | |
5092 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: CmpOpc)) |
5093 | .addDef(RegNo: VT == MVT::i32 ? AArch64::WZR : AArch64::XZR) |
5094 | .addUse(RegNo: ResultReg1) |
5095 | .addUse(RegNo: DesiredReg) |
5096 | .addImm(Val: 0); |
5097 | |
5098 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AArch64::CSINCWr)) |
5099 | .addDef(RegNo: ResultReg2) |
5100 | .addUse(RegNo: AArch64::WZR) |
5101 | .addUse(RegNo: AArch64::WZR) |
5102 | .addImm(Val: AArch64CC::NE); |
5103 | |
5104 | assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers." ); |
5105 | updateValueMap(I, Reg: ResultReg1, NumRegs: 2); |
5106 | return true; |
5107 | } |
5108 | |
5109 | bool AArch64FastISel::fastSelectInstruction(const Instruction *I) { |
5110 | if (TLI.fallBackToDAGISel(Inst: *I)) |
5111 | return false; |
5112 | switch (I->getOpcode()) { |
5113 | default: |
5114 | break; |
5115 | case Instruction::Add: |
5116 | case Instruction::Sub: |
5117 | return selectAddSub(I); |
5118 | case Instruction::Mul: |
5119 | return selectMul(I); |
5120 | case Instruction::SDiv: |
5121 | return selectSDiv(I); |
5122 | case Instruction::SRem: |
5123 | if (!selectBinaryOp(I, ISDOpcode: ISD::SREM)) |
5124 | return selectRem(I, ISDOpcode: ISD::SREM); |
5125 | return true; |
5126 | case Instruction::URem: |
5127 | if (!selectBinaryOp(I, ISDOpcode: ISD::UREM)) |
5128 | return selectRem(I, ISDOpcode: ISD::UREM); |
5129 | return true; |
5130 | case Instruction::Shl: |
5131 | case Instruction::LShr: |
5132 | case Instruction::AShr: |
5133 | return selectShift(I); |
5134 | case Instruction::And: |
5135 | case Instruction::Or: |
5136 | case Instruction::Xor: |
5137 | return selectLogicalOp(I); |
5138 | case Instruction::Br: |
5139 | return selectBranch(I); |
5140 | case Instruction::IndirectBr: |
5141 | return selectIndirectBr(I); |
5142 | case Instruction::BitCast: |
5143 | if (!FastISel::selectBitCast(I)) |
5144 | return selectBitCast(I); |
5145 | return true; |
5146 | case Instruction::FPToSI: |
5147 | if (!selectCast(I, Opcode: ISD::FP_TO_SINT)) |
5148 | return selectFPToInt(I, /*Signed=*/true); |
5149 | return true; |
5150 | case Instruction::FPToUI: |
5151 | return selectFPToInt(I, /*Signed=*/false); |
5152 | case Instruction::ZExt: |
5153 | case Instruction::SExt: |
5154 | return selectIntExt(I); |
5155 | case Instruction::Trunc: |
5156 | if (!selectCast(I, Opcode: ISD::TRUNCATE)) |
5157 | return selectTrunc(I); |
5158 | return true; |
5159 | case Instruction::FPExt: |
5160 | return selectFPExt(I); |
5161 | case Instruction::FPTrunc: |
5162 | return selectFPTrunc(I); |
5163 | case Instruction::SIToFP: |
5164 | if (!selectCast(I, Opcode: ISD::SINT_TO_FP)) |
5165 | return selectIntToFP(I, /*Signed=*/true); |
5166 | return true; |
5167 | case Instruction::UIToFP: |
5168 | return selectIntToFP(I, /*Signed=*/false); |
5169 | case Instruction::Load: |
5170 | return selectLoad(I); |
5171 | case Instruction::Store: |
5172 | return selectStore(I); |
5173 | case Instruction::FCmp: |
5174 | case Instruction::ICmp: |
5175 | return selectCmp(I); |
5176 | case Instruction::Select: |
5177 | return selectSelect(I); |
5178 | case Instruction::Ret: |
5179 | return selectRet(I); |
5180 | case Instruction::FRem: |
5181 | return selectFRem(I); |
5182 | case Instruction::GetElementPtr: |
5183 | return selectGetElementPtr(I); |
5184 | case Instruction::AtomicCmpXchg: |
5185 | return selectAtomicCmpXchg(I: cast<AtomicCmpXchgInst>(Val: I)); |
5186 | } |
5187 | |
5188 | // fall-back to target-independent instruction selection. |
5189 | return selectOperator(I, Opcode: I->getOpcode()); |
5190 | } |
5191 | |
5192 | FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo, |
5193 | const TargetLibraryInfo *LibInfo) { |
5194 | |
5195 | SMEAttrs CallerAttrs(*FuncInfo.Fn); |
5196 | if (CallerAttrs.hasZAState() || CallerAttrs.hasZT0State() || |
5197 | CallerAttrs.hasStreamingInterfaceOrBody() || |
5198 | CallerAttrs.hasStreamingCompatibleInterface()) |
5199 | return nullptr; |
5200 | return new AArch64FastISel(FuncInfo, LibInfo); |
5201 | } |
5202 | |