1 | //===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines the AArch64-specific support for the FastISel class. Some |
10 | // of the target-specific code is generated by tablegen in the file |
11 | // AArch64GenFastISel.inc, which is #included here. |
12 | // |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #include "AArch64.h" |
16 | #include "AArch64CallingConvention.h" |
17 | #include "AArch64MachineFunctionInfo.h" |
18 | #include "AArch64RegisterInfo.h" |
19 | #include "AArch64Subtarget.h" |
20 | #include "MCTargetDesc/AArch64AddressingModes.h" |
21 | #include "Utils/AArch64BaseInfo.h" |
22 | #include "Utils/AArch64SMEAttributes.h" |
23 | #include "llvm/ADT/APFloat.h" |
24 | #include "llvm/ADT/APInt.h" |
25 | #include "llvm/ADT/DenseMap.h" |
26 | #include "llvm/ADT/SmallVector.h" |
27 | #include "llvm/Analysis/BranchProbabilityInfo.h" |
28 | #include "llvm/CodeGen/CallingConvLower.h" |
29 | #include "llvm/CodeGen/FastISel.h" |
30 | #include "llvm/CodeGen/FunctionLoweringInfo.h" |
31 | #include "llvm/CodeGen/ISDOpcodes.h" |
32 | #include "llvm/CodeGen/MachineBasicBlock.h" |
33 | #include "llvm/CodeGen/MachineConstantPool.h" |
34 | #include "llvm/CodeGen/MachineFrameInfo.h" |
35 | #include "llvm/CodeGen/MachineInstr.h" |
36 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
37 | #include "llvm/CodeGen/MachineMemOperand.h" |
38 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
39 | #include "llvm/CodeGen/ValueTypes.h" |
40 | #include "llvm/CodeGenTypes/MachineValueType.h" |
41 | #include "llvm/IR/Argument.h" |
42 | #include "llvm/IR/Attributes.h" |
43 | #include "llvm/IR/BasicBlock.h" |
44 | #include "llvm/IR/CallingConv.h" |
45 | #include "llvm/IR/Constant.h" |
46 | #include "llvm/IR/Constants.h" |
47 | #include "llvm/IR/DataLayout.h" |
48 | #include "llvm/IR/DerivedTypes.h" |
49 | #include "llvm/IR/Function.h" |
50 | #include "llvm/IR/GetElementPtrTypeIterator.h" |
51 | #include "llvm/IR/GlobalValue.h" |
52 | #include "llvm/IR/InstrTypes.h" |
53 | #include "llvm/IR/Instruction.h" |
54 | #include "llvm/IR/Instructions.h" |
55 | #include "llvm/IR/IntrinsicInst.h" |
56 | #include "llvm/IR/Intrinsics.h" |
57 | #include "llvm/IR/IntrinsicsAArch64.h" |
58 | #include "llvm/IR/Module.h" |
59 | #include "llvm/IR/Operator.h" |
60 | #include "llvm/IR/Type.h" |
61 | #include "llvm/IR/User.h" |
62 | #include "llvm/IR/Value.h" |
63 | #include "llvm/MC/MCInstrDesc.h" |
64 | #include "llvm/MC/MCSymbol.h" |
65 | #include "llvm/Support/AtomicOrdering.h" |
66 | #include "llvm/Support/Casting.h" |
67 | #include "llvm/Support/CodeGen.h" |
68 | #include "llvm/Support/Compiler.h" |
69 | #include "llvm/Support/ErrorHandling.h" |
70 | #include "llvm/Support/MathExtras.h" |
71 | #include <algorithm> |
72 | #include <cassert> |
73 | #include <cstdint> |
74 | #include <iterator> |
75 | #include <utility> |
76 | |
77 | using namespace llvm; |
78 | |
79 | namespace { |
80 | |
81 | class AArch64FastISel final : public FastISel { |
82 | class Address { |
83 | public: |
84 | using BaseKind = enum { |
85 | RegBase, |
86 | FrameIndexBase |
87 | }; |
88 | |
89 | private: |
90 | BaseKind Kind = RegBase; |
91 | AArch64_AM::ShiftExtendType ExtType = AArch64_AM::InvalidShiftExtend; |
92 | union { |
93 | unsigned Reg; |
94 | int FI; |
95 | } Base; |
96 | Register OffsetReg; |
97 | unsigned Shift = 0; |
98 | int64_t Offset = 0; |
99 | const GlobalValue *GV = nullptr; |
100 | |
101 | public: |
102 | Address() { Base.Reg = 0; } |
103 | |
104 | void setKind(BaseKind K) { Kind = K; } |
105 | BaseKind getKind() const { return Kind; } |
106 | void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; } |
107 | AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; } |
108 | bool isRegBase() const { return Kind == RegBase; } |
109 | bool isFIBase() const { return Kind == FrameIndexBase; } |
110 | |
111 | void setReg(Register Reg) { |
112 | assert(isRegBase() && "Invalid base register access!" ); |
113 | Base.Reg = Reg.id(); |
114 | } |
115 | |
116 | Register getReg() const { |
117 | assert(isRegBase() && "Invalid base register access!" ); |
118 | return Base.Reg; |
119 | } |
120 | |
121 | void setOffsetReg(Register Reg) { OffsetReg = Reg; } |
122 | |
123 | Register getOffsetReg() const { return OffsetReg; } |
124 | |
125 | void setFI(unsigned FI) { |
126 | assert(isFIBase() && "Invalid base frame index access!" ); |
127 | Base.FI = FI; |
128 | } |
129 | |
130 | unsigned getFI() const { |
131 | assert(isFIBase() && "Invalid base frame index access!" ); |
132 | return Base.FI; |
133 | } |
134 | |
135 | void setOffset(int64_t O) { Offset = O; } |
136 | int64_t getOffset() { return Offset; } |
137 | void setShift(unsigned S) { Shift = S; } |
138 | unsigned getShift() { return Shift; } |
139 | |
140 | void setGlobalValue(const GlobalValue *G) { GV = G; } |
141 | const GlobalValue *getGlobalValue() { return GV; } |
142 | }; |
143 | |
144 | /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can |
145 | /// make the right decision when generating code for different targets. |
146 | const AArch64Subtarget *Subtarget; |
147 | LLVMContext *Context; |
148 | |
149 | bool fastLowerArguments() override; |
150 | bool fastLowerCall(CallLoweringInfo &CLI) override; |
151 | bool fastLowerIntrinsicCall(const IntrinsicInst *II) override; |
152 | |
153 | private: |
154 | // Selection routines. |
155 | bool selectAddSub(const Instruction *I); |
156 | bool selectLogicalOp(const Instruction *I); |
157 | bool selectLoad(const Instruction *I); |
158 | bool selectStore(const Instruction *I); |
159 | bool selectBranch(const Instruction *I); |
160 | bool selectIndirectBr(const Instruction *I); |
161 | bool selectCmp(const Instruction *I); |
162 | bool selectSelect(const Instruction *I); |
163 | bool selectFPExt(const Instruction *I); |
164 | bool selectFPTrunc(const Instruction *I); |
165 | bool selectFPToInt(const Instruction *I, bool Signed); |
166 | bool selectIntToFP(const Instruction *I, bool Signed); |
167 | bool selectRem(const Instruction *I, unsigned ISDOpcode); |
168 | bool selectRet(const Instruction *I); |
169 | bool selectTrunc(const Instruction *I); |
170 | bool selectIntExt(const Instruction *I); |
171 | bool selectMul(const Instruction *I); |
172 | bool selectShift(const Instruction *I); |
173 | bool selectBitCast(const Instruction *I); |
174 | bool selectFRem(const Instruction *I); |
175 | bool selectSDiv(const Instruction *I); |
176 | bool selectGetElementPtr(const Instruction *I); |
177 | bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I); |
178 | |
179 | // Utility helper routines. |
180 | bool isTypeLegal(Type *Ty, MVT &VT); |
181 | bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false); |
182 | bool isValueAvailable(const Value *V) const; |
183 | bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr); |
184 | bool computeCallAddress(const Value *V, Address &Addr); |
185 | bool simplifyAddress(Address &Addr, MVT VT); |
186 | void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB, |
187 | MachineMemOperand::Flags Flags, |
188 | unsigned ScaleFactor, MachineMemOperand *MMO); |
189 | bool isMemCpySmall(uint64_t Len, MaybeAlign Alignment); |
190 | bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len, |
191 | MaybeAlign Alignment); |
192 | bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I, |
193 | const Value *Cond); |
194 | bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT); |
195 | bool optimizeSelect(const SelectInst *SI); |
196 | Register getRegForGEPIndex(const Value *Idx); |
197 | |
198 | // Emit helper routines. |
199 | Register emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS, |
200 | const Value *RHS, bool SetFlags = false, |
201 | bool WantResult = true, bool IsZExt = false); |
202 | Register emitAddSub_rr(bool UseAdd, MVT RetVT, Register LHSReg, |
203 | Register RHSReg, bool SetFlags = false, |
204 | bool WantResult = true); |
205 | Register emitAddSub_ri(bool UseAdd, MVT RetVT, Register LHSReg, uint64_t Imm, |
206 | bool SetFlags = false, bool WantResult = true); |
207 | Register emitAddSub_rs(bool UseAdd, MVT RetVT, Register LHSReg, |
208 | Register RHSReg, AArch64_AM::ShiftExtendType ShiftType, |
209 | uint64_t ShiftImm, bool SetFlags = false, |
210 | bool WantResult = true); |
211 | Register emitAddSub_rx(bool UseAdd, MVT RetVT, Register LHSReg, |
212 | Register RHSReg, AArch64_AM::ShiftExtendType ExtType, |
213 | uint64_t ShiftImm, bool SetFlags = false, |
214 | bool WantResult = true); |
215 | |
216 | // Emit functions. |
217 | bool emitCompareAndBranch(const BranchInst *BI); |
218 | bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt); |
219 | bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt); |
220 | bool emitICmp_ri(MVT RetVT, Register LHSReg, uint64_t Imm); |
221 | bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS); |
222 | Register emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true, |
223 | MachineMemOperand *MMO = nullptr); |
224 | bool emitStore(MVT VT, Register SrcReg, Address Addr, |
225 | MachineMemOperand *MMO = nullptr); |
226 | bool emitStoreRelease(MVT VT, Register SrcReg, Register AddrReg, |
227 | MachineMemOperand *MMO = nullptr); |
228 | Register emitIntExt(MVT SrcVT, Register SrcReg, MVT DestVT, bool isZExt); |
229 | Register emiti1Ext(Register SrcReg, MVT DestVT, bool isZExt); |
230 | Register emitAdd(MVT RetVT, const Value *LHS, const Value *RHS, |
231 | bool SetFlags = false, bool WantResult = true, |
232 | bool IsZExt = false); |
233 | Register emitAdd_ri_(MVT VT, Register Op0, int64_t Imm); |
234 | Register emitSub(MVT RetVT, const Value *LHS, const Value *RHS, |
235 | bool SetFlags = false, bool WantResult = true, |
236 | bool IsZExt = false); |
237 | Register emitSubs_rr(MVT RetVT, Register LHSReg, Register RHSReg, |
238 | bool WantResult = true); |
239 | Register emitSubs_rs(MVT RetVT, Register LHSReg, Register RHSReg, |
240 | AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm, |
241 | bool WantResult = true); |
242 | Register emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS, |
243 | const Value *RHS); |
244 | Register emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, Register LHSReg, |
245 | uint64_t Imm); |
246 | Register emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, Register LHSReg, |
247 | Register RHSReg, uint64_t ShiftImm); |
248 | Register emitAnd_ri(MVT RetVT, Register LHSReg, uint64_t Imm); |
249 | Register emitMul_rr(MVT RetVT, Register Op0, Register Op1); |
250 | Register emitSMULL_rr(MVT RetVT, Register Op0, Register Op1); |
251 | Register emitUMULL_rr(MVT RetVT, Register Op0, Register Op1); |
252 | Register emitLSL_rr(MVT RetVT, Register Op0Reg, Register Op1Reg); |
253 | Register emitLSL_ri(MVT RetVT, MVT SrcVT, Register Op0Reg, uint64_t Imm, |
254 | bool IsZExt = true); |
255 | Register emitLSR_rr(MVT RetVT, Register Op0Reg, Register Op1Reg); |
256 | Register emitLSR_ri(MVT RetVT, MVT SrcVT, Register Op0Reg, uint64_t Imm, |
257 | bool IsZExt = true); |
258 | Register emitASR_rr(MVT RetVT, Register Op0Reg, Register Op1Reg); |
259 | Register emitASR_ri(MVT RetVT, MVT SrcVT, Register Op0Reg, uint64_t Imm, |
260 | bool IsZExt = false); |
261 | |
262 | Register materializeInt(const ConstantInt *CI, MVT VT); |
263 | Register materializeFP(const ConstantFP *CFP, MVT VT); |
264 | Register materializeGV(const GlobalValue *GV); |
265 | |
266 | // Call handling routines. |
267 | private: |
268 | CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const; |
269 | bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs, |
270 | unsigned &NumBytes); |
271 | bool finishCall(CallLoweringInfo &CLI, unsigned NumBytes); |
272 | |
273 | public: |
274 | // Backend specific FastISel code. |
275 | Register fastMaterializeAlloca(const AllocaInst *AI) override; |
276 | Register fastMaterializeConstant(const Constant *C) override; |
277 | Register fastMaterializeFloatZero(const ConstantFP *CF) override; |
278 | |
279 | explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo, |
280 | const TargetLibraryInfo *LibInfo) |
281 | : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) { |
282 | Subtarget = &FuncInfo.MF->getSubtarget<AArch64Subtarget>(); |
283 | Context = &FuncInfo.Fn->getContext(); |
284 | } |
285 | |
286 | bool fastSelectInstruction(const Instruction *I) override; |
287 | |
288 | #include "AArch64GenFastISel.inc" |
289 | }; |
290 | |
291 | } // end anonymous namespace |
292 | |
293 | /// Check if the sign-/zero-extend will be a noop. |
294 | static bool isIntExtFree(const Instruction *I) { |
295 | assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) && |
296 | "Unexpected integer extend instruction." ); |
297 | assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() && |
298 | "Unexpected value type." ); |
299 | bool IsZExt = isa<ZExtInst>(Val: I); |
300 | |
301 | if (const auto *LI = dyn_cast<LoadInst>(Val: I->getOperand(i: 0))) |
302 | if (LI->hasOneUse()) |
303 | return true; |
304 | |
305 | if (const auto *Arg = dyn_cast<Argument>(Val: I->getOperand(i: 0))) |
306 | if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) |
307 | return true; |
308 | |
309 | return false; |
310 | } |
311 | |
312 | /// Determine the implicit scale factor that is applied by a memory |
313 | /// operation for a given value type. |
314 | static unsigned getImplicitScaleFactor(MVT VT) { |
315 | switch (VT.SimpleTy) { |
316 | default: |
317 | return 0; // invalid |
318 | case MVT::i1: // fall-through |
319 | case MVT::i8: |
320 | return 1; |
321 | case MVT::i16: |
322 | return 2; |
323 | case MVT::i32: // fall-through |
324 | case MVT::f32: |
325 | return 4; |
326 | case MVT::i64: // fall-through |
327 | case MVT::f64: |
328 | return 8; |
329 | } |
330 | } |
331 | |
332 | CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const { |
333 | if (CC == CallingConv::GHC) |
334 | return CC_AArch64_GHC; |
335 | if (CC == CallingConv::CFGuard_Check) |
336 | return CC_AArch64_Win64_CFGuard_Check; |
337 | if (Subtarget->isTargetDarwin()) |
338 | return CC_AArch64_DarwinPCS; |
339 | if (Subtarget->isTargetWindows()) |
340 | return CC_AArch64_Win64PCS; |
341 | return CC_AArch64_AAPCS; |
342 | } |
343 | |
344 | Register AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) { |
345 | assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 && |
346 | "Alloca should always return a pointer." ); |
347 | |
348 | // Don't handle dynamic allocas. |
349 | auto SI = FuncInfo.StaticAllocaMap.find(Val: AI); |
350 | if (SI == FuncInfo.StaticAllocaMap.end()) |
351 | return Register(); |
352 | |
353 | if (SI != FuncInfo.StaticAllocaMap.end()) { |
354 | Register ResultReg = createResultReg(RC: &AArch64::GPR64spRegClass); |
355 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AArch64::ADDXri), |
356 | DestReg: ResultReg) |
357 | .addFrameIndex(Idx: SI->second) |
358 | .addImm(Val: 0) |
359 | .addImm(Val: 0); |
360 | return ResultReg; |
361 | } |
362 | |
363 | return Register(); |
364 | } |
365 | |
366 | Register AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) { |
367 | if (VT > MVT::i64) |
368 | return Register(); |
369 | |
370 | if (!CI->isZero()) |
371 | return fastEmit_i(VT, RetVT: VT, Opcode: ISD::Constant, imm0: CI->getZExtValue()); |
372 | |
373 | // Create a copy from the zero register to materialize a "0" value. |
374 | const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass |
375 | : &AArch64::GPR32RegClass; |
376 | unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; |
377 | Register ResultReg = createResultReg(RC); |
378 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: TargetOpcode::COPY), |
379 | DestReg: ResultReg).addReg(RegNo: ZeroReg, flags: getKillRegState(B: true)); |
380 | return ResultReg; |
381 | } |
382 | |
383 | Register AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) { |
384 | // Positive zero (+0.0) has to be materialized with a fmov from the zero |
385 | // register, because the immediate version of fmov cannot encode zero. |
386 | if (CFP->isNullValue()) |
387 | return fastMaterializeFloatZero(CF: CFP); |
388 | |
389 | if (VT != MVT::f32 && VT != MVT::f64) |
390 | return Register(); |
391 | |
392 | const APFloat Val = CFP->getValueAPF(); |
393 | bool Is64Bit = (VT == MVT::f64); |
394 | // This checks to see if we can use FMOV instructions to materialize |
395 | // a constant, otherwise we have to materialize via the constant pool. |
396 | int Imm = |
397 | Is64Bit ? AArch64_AM::getFP64Imm(FPImm: Val) : AArch64_AM::getFP32Imm(FPImm: Val); |
398 | if (Imm != -1) { |
399 | unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi; |
400 | return fastEmitInst_i(MachineInstOpcode: Opc, RC: TLI.getRegClassFor(VT), Imm); |
401 | } |
402 | |
403 | // For the large code model materialize the FP constant in code. |
404 | if (TM.getCodeModel() == CodeModel::Large) { |
405 | unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm; |
406 | const TargetRegisterClass *RC = Is64Bit ? |
407 | &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; |
408 | |
409 | Register TmpReg = createResultReg(RC); |
410 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc1), DestReg: TmpReg) |
411 | .addImm(Val: CFP->getValueAPF().bitcastToAPInt().getZExtValue()); |
412 | |
413 | Register ResultReg = createResultReg(RC: TLI.getRegClassFor(VT)); |
414 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, |
415 | MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: ResultReg) |
416 | .addReg(RegNo: TmpReg, flags: getKillRegState(B: true)); |
417 | |
418 | return ResultReg; |
419 | } |
420 | |
421 | // Materialize via constant pool. MachineConstantPool wants an explicit |
422 | // alignment. |
423 | Align Alignment = DL.getPrefTypeAlign(Ty: CFP->getType()); |
424 | |
425 | unsigned CPI = MCP.getConstantPoolIndex(C: cast<Constant>(Val: CFP), Alignment); |
426 | Register ADRPReg = createResultReg(RC: &AArch64::GPR64commonRegClass); |
427 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AArch64::ADRP), |
428 | DestReg: ADRPReg).addConstantPoolIndex(Idx: CPI, Offset: 0, TargetFlags: AArch64II::MO_PAGE); |
429 | |
430 | unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui; |
431 | Register ResultReg = createResultReg(RC: TLI.getRegClassFor(VT)); |
432 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg: ResultReg) |
433 | .addReg(RegNo: ADRPReg) |
434 | .addConstantPoolIndex(Idx: CPI, Offset: 0, TargetFlags: AArch64II::MO_PAGEOFF | AArch64II::MO_NC); |
435 | return ResultReg; |
436 | } |
437 | |
438 | Register AArch64FastISel::materializeGV(const GlobalValue *GV) { |
439 | // We can't handle thread-local variables quickly yet. |
440 | if (GV->isThreadLocal()) |
441 | return Register(); |
442 | |
443 | // MachO still uses GOT for large code-model accesses, but ELF requires |
444 | // movz/movk sequences, which FastISel doesn't handle yet. |
445 | if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO()) |
446 | return Register(); |
447 | |
448 | if (FuncInfo.MF->getInfo<AArch64FunctionInfo>()->hasELFSignedGOT()) |
449 | return Register(); |
450 | |
451 | unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, TM); |
452 | |
453 | EVT DestEVT = TLI.getValueType(DL, Ty: GV->getType(), AllowUnknown: true); |
454 | if (!DestEVT.isSimple()) |
455 | return Register(); |
456 | |
457 | Register ADRPReg = createResultReg(RC: &AArch64::GPR64commonRegClass); |
458 | Register ResultReg; |
459 | |
460 | if (OpFlags & AArch64II::MO_GOT) { |
461 | // ADRP + LDRX |
462 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AArch64::ADRP), |
463 | DestReg: ADRPReg) |
464 | .addGlobalAddress(GV, Offset: 0, TargetFlags: AArch64II::MO_PAGE | OpFlags); |
465 | |
466 | unsigned LdrOpc; |
467 | if (Subtarget->isTargetILP32()) { |
468 | ResultReg = createResultReg(RC: &AArch64::GPR32RegClass); |
469 | LdrOpc = AArch64::LDRWui; |
470 | } else { |
471 | ResultReg = createResultReg(RC: &AArch64::GPR64RegClass); |
472 | LdrOpc = AArch64::LDRXui; |
473 | } |
474 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: LdrOpc), |
475 | DestReg: ResultReg) |
476 | .addReg(RegNo: ADRPReg) |
477 | .addGlobalAddress(GV, Offset: 0, TargetFlags: AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | |
478 | AArch64II::MO_NC | OpFlags); |
479 | if (!Subtarget->isTargetILP32()) |
480 | return ResultReg; |
481 | |
482 | // LDRWui produces a 32-bit register, but pointers in-register are 64-bits |
483 | // so we must extend the result on ILP32. |
484 | Register Result64 = createResultReg(RC: &AArch64::GPR64RegClass); |
485 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, |
486 | MCID: TII.get(Opcode: TargetOpcode::SUBREG_TO_REG)) |
487 | .addDef(RegNo: Result64) |
488 | .addImm(Val: 0) |
489 | .addReg(RegNo: ResultReg, flags: RegState::Kill) |
490 | .addImm(Val: AArch64::sub_32); |
491 | return Result64; |
492 | } else { |
493 | // ADRP + ADDX |
494 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AArch64::ADRP), |
495 | DestReg: ADRPReg) |
496 | .addGlobalAddress(GV, Offset: 0, TargetFlags: AArch64II::MO_PAGE | OpFlags); |
497 | |
498 | if (OpFlags & AArch64II::MO_TAGGED) { |
499 | // MO_TAGGED on the page indicates a tagged address. Set the tag now. |
500 | // We do so by creating a MOVK that sets bits 48-63 of the register to |
501 | // (global address + 0x100000000 - PC) >> 48. This assumes that we're in |
502 | // the small code model so we can assume a binary size of <= 4GB, which |
503 | // makes the untagged PC relative offset positive. The binary must also be |
504 | // loaded into address range [0, 2^48). Both of these properties need to |
505 | // be ensured at runtime when using tagged addresses. |
506 | // |
507 | // TODO: There is duplicate logic in AArch64ExpandPseudoInsts.cpp that |
508 | // also uses BuildMI for making an ADRP (+ MOVK) + ADD, but the operands |
509 | // are not exactly 1:1 with FastISel so we cannot easily abstract this |
510 | // out. At some point, it would be nice to find a way to not have this |
511 | // duplicate code. |
512 | Register DstReg = createResultReg(RC: &AArch64::GPR64commonRegClass); |
513 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AArch64::MOVKXi), |
514 | DestReg: DstReg) |
515 | .addReg(RegNo: ADRPReg) |
516 | .addGlobalAddress(GV, /*Offset=*/0x100000000, |
517 | TargetFlags: AArch64II::MO_PREL | AArch64II::MO_G3) |
518 | .addImm(Val: 48); |
519 | ADRPReg = DstReg; |
520 | } |
521 | |
522 | ResultReg = createResultReg(RC: &AArch64::GPR64spRegClass); |
523 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AArch64::ADDXri), |
524 | DestReg: ResultReg) |
525 | .addReg(RegNo: ADRPReg) |
526 | .addGlobalAddress(GV, Offset: 0, |
527 | TargetFlags: AArch64II::MO_PAGEOFF | AArch64II::MO_NC | OpFlags) |
528 | .addImm(Val: 0); |
529 | } |
530 | return ResultReg; |
531 | } |
532 | |
533 | Register AArch64FastISel::fastMaterializeConstant(const Constant *C) { |
534 | EVT CEVT = TLI.getValueType(DL, Ty: C->getType(), AllowUnknown: true); |
535 | |
536 | // Only handle simple types. |
537 | if (!CEVT.isSimple()) |
538 | return Register(); |
539 | MVT VT = CEVT.getSimpleVT(); |
540 | // arm64_32 has 32-bit pointers held in 64-bit registers. Because of that, |
541 | // 'null' pointers need to have a somewhat special treatment. |
542 | if (isa<ConstantPointerNull>(Val: C)) { |
543 | assert(VT == MVT::i64 && "Expected 64-bit pointers" ); |
544 | return materializeInt(CI: ConstantInt::get(Ty: Type::getInt64Ty(C&: *Context), V: 0), VT); |
545 | } |
546 | |
547 | if (const auto *CI = dyn_cast<ConstantInt>(Val: C)) |
548 | return materializeInt(CI, VT); |
549 | else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(Val: C)) |
550 | return materializeFP(CFP, VT); |
551 | else if (const GlobalValue *GV = dyn_cast<GlobalValue>(Val: C)) |
552 | return materializeGV(GV); |
553 | |
554 | return Register(); |
555 | } |
556 | |
557 | Register AArch64FastISel::fastMaterializeFloatZero(const ConstantFP *CFP) { |
558 | assert(CFP->isNullValue() && |
559 | "Floating-point constant is not a positive zero." ); |
560 | MVT VT; |
561 | if (!isTypeLegal(Ty: CFP->getType(), VT)) |
562 | return Register(); |
563 | |
564 | if (VT != MVT::f32 && VT != MVT::f64) |
565 | return Register(); |
566 | |
567 | bool Is64Bit = (VT == MVT::f64); |
568 | unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR; |
569 | unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr; |
570 | return fastEmitInst_r(MachineInstOpcode: Opc, RC: TLI.getRegClassFor(VT), Op0: ZReg); |
571 | } |
572 | |
573 | /// Check if the multiply is by a power-of-2 constant. |
574 | static bool isMulPowOf2(const Value *I) { |
575 | if (const auto *MI = dyn_cast<MulOperator>(Val: I)) { |
576 | if (const auto *C = dyn_cast<ConstantInt>(Val: MI->getOperand(i_nocapture: 0))) |
577 | if (C->getValue().isPowerOf2()) |
578 | return true; |
579 | if (const auto *C = dyn_cast<ConstantInt>(Val: MI->getOperand(i_nocapture: 1))) |
580 | if (C->getValue().isPowerOf2()) |
581 | return true; |
582 | } |
583 | return false; |
584 | } |
585 | |
586 | // Computes the address to get to an object. |
587 | bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty) |
588 | { |
589 | const User *U = nullptr; |
590 | unsigned Opcode = Instruction::UserOp1; |
591 | if (const Instruction *I = dyn_cast<Instruction>(Val: Obj)) { |
592 | // Don't walk into other basic blocks unless the object is an alloca from |
593 | // another block, otherwise it may not have a virtual register assigned. |
594 | if (FuncInfo.StaticAllocaMap.count(Val: static_cast<const AllocaInst *>(Obj)) || |
595 | FuncInfo.getMBB(BB: I->getParent()) == FuncInfo.MBB) { |
596 | Opcode = I->getOpcode(); |
597 | U = I; |
598 | } |
599 | } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Val: Obj)) { |
600 | Opcode = C->getOpcode(); |
601 | U = C; |
602 | } |
603 | |
604 | if (auto *Ty = dyn_cast<PointerType>(Val: Obj->getType())) |
605 | if (Ty->getAddressSpace() > 255) |
606 | // Fast instruction selection doesn't support the special |
607 | // address spaces. |
608 | return false; |
609 | |
610 | switch (Opcode) { |
611 | default: |
612 | break; |
613 | case Instruction::BitCast: |
614 | // Look through bitcasts. |
615 | return computeAddress(Obj: U->getOperand(i: 0), Addr, Ty); |
616 | |
617 | case Instruction::IntToPtr: |
618 | // Look past no-op inttoptrs. |
619 | if (TLI.getValueType(DL, Ty: U->getOperand(i: 0)->getType()) == |
620 | TLI.getPointerTy(DL)) |
621 | return computeAddress(Obj: U->getOperand(i: 0), Addr, Ty); |
622 | break; |
623 | |
624 | case Instruction::PtrToInt: |
625 | // Look past no-op ptrtoints. |
626 | if (TLI.getValueType(DL, Ty: U->getType()) == TLI.getPointerTy(DL)) |
627 | return computeAddress(Obj: U->getOperand(i: 0), Addr, Ty); |
628 | break; |
629 | |
630 | case Instruction::GetElementPtr: { |
631 | Address SavedAddr = Addr; |
632 | uint64_t TmpOffset = Addr.getOffset(); |
633 | |
634 | // Iterate through the GEP folding the constants into offsets where |
635 | // we can. |
636 | for (gep_type_iterator GTI = gep_type_begin(GEP: U), E = gep_type_end(GEP: U); |
637 | GTI != E; ++GTI) { |
638 | const Value *Op = GTI.getOperand(); |
639 | if (StructType *STy = GTI.getStructTypeOrNull()) { |
640 | const StructLayout *SL = DL.getStructLayout(Ty: STy); |
641 | unsigned Idx = cast<ConstantInt>(Val: Op)->getZExtValue(); |
642 | TmpOffset += SL->getElementOffset(Idx); |
643 | } else { |
644 | uint64_t S = GTI.getSequentialElementStride(DL); |
645 | while (true) { |
646 | if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val: Op)) { |
647 | // Constant-offset addressing. |
648 | TmpOffset += CI->getSExtValue() * S; |
649 | break; |
650 | } |
651 | if (canFoldAddIntoGEP(GEP: U, Add: Op)) { |
652 | // A compatible add with a constant operand. Fold the constant. |
653 | ConstantInt *CI = |
654 | cast<ConstantInt>(Val: cast<AddOperator>(Val: Op)->getOperand(i_nocapture: 1)); |
655 | TmpOffset += CI->getSExtValue() * S; |
656 | // Iterate on the other operand. |
657 | Op = cast<AddOperator>(Val: Op)->getOperand(i_nocapture: 0); |
658 | continue; |
659 | } |
660 | // Unsupported |
661 | goto unsupported_gep; |
662 | } |
663 | } |
664 | } |
665 | |
666 | // Try to grab the base operand now. |
667 | Addr.setOffset(TmpOffset); |
668 | if (computeAddress(Obj: U->getOperand(i: 0), Addr, Ty)) |
669 | return true; |
670 | |
671 | // We failed, restore everything and try the other options. |
672 | Addr = SavedAddr; |
673 | |
674 | unsupported_gep: |
675 | break; |
676 | } |
677 | case Instruction::Alloca: { |
678 | const AllocaInst *AI = cast<AllocaInst>(Val: Obj); |
679 | DenseMap<const AllocaInst *, int>::iterator SI = |
680 | FuncInfo.StaticAllocaMap.find(Val: AI); |
681 | if (SI != FuncInfo.StaticAllocaMap.end()) { |
682 | Addr.setKind(Address::FrameIndexBase); |
683 | Addr.setFI(SI->second); |
684 | return true; |
685 | } |
686 | break; |
687 | } |
688 | case Instruction::Add: { |
689 | // Adds of constants are common and easy enough. |
690 | const Value *LHS = U->getOperand(i: 0); |
691 | const Value *RHS = U->getOperand(i: 1); |
692 | |
693 | if (isa<ConstantInt>(Val: LHS)) |
694 | std::swap(a&: LHS, b&: RHS); |
695 | |
696 | if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val: RHS)) { |
697 | Addr.setOffset(Addr.getOffset() + CI->getSExtValue()); |
698 | return computeAddress(Obj: LHS, Addr, Ty); |
699 | } |
700 | |
701 | Address Backup = Addr; |
702 | if (computeAddress(Obj: LHS, Addr, Ty) && computeAddress(Obj: RHS, Addr, Ty)) |
703 | return true; |
704 | Addr = Backup; |
705 | |
706 | break; |
707 | } |
708 | case Instruction::Sub: { |
709 | // Subs of constants are common and easy enough. |
710 | const Value *LHS = U->getOperand(i: 0); |
711 | const Value *RHS = U->getOperand(i: 1); |
712 | |
713 | if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val: RHS)) { |
714 | Addr.setOffset(Addr.getOffset() - CI->getSExtValue()); |
715 | return computeAddress(Obj: LHS, Addr, Ty); |
716 | } |
717 | break; |
718 | } |
719 | case Instruction::Shl: { |
720 | if (Addr.getOffsetReg()) |
721 | break; |
722 | |
723 | const auto *CI = dyn_cast<ConstantInt>(Val: U->getOperand(i: 1)); |
724 | if (!CI) |
725 | break; |
726 | |
727 | unsigned Val = CI->getZExtValue(); |
728 | if (Val < 1 || Val > 3) |
729 | break; |
730 | |
731 | uint64_t NumBytes = 0; |
732 | if (Ty && Ty->isSized()) { |
733 | uint64_t NumBits = DL.getTypeSizeInBits(Ty); |
734 | NumBytes = NumBits / 8; |
735 | if (!isPowerOf2_64(Value: NumBits)) |
736 | NumBytes = 0; |
737 | } |
738 | |
739 | if (NumBytes != (1ULL << Val)) |
740 | break; |
741 | |
742 | Addr.setShift(Val); |
743 | Addr.setExtendType(AArch64_AM::LSL); |
744 | |
745 | const Value *Src = U->getOperand(i: 0); |
746 | if (const auto *I = dyn_cast<Instruction>(Val: Src)) { |
747 | if (FuncInfo.getMBB(BB: I->getParent()) == FuncInfo.MBB) { |
748 | // Fold the zext or sext when it won't become a noop. |
749 | if (const auto *ZE = dyn_cast<ZExtInst>(Val: I)) { |
750 | if (!isIntExtFree(I: ZE) && |
751 | ZE->getOperand(i_nocapture: 0)->getType()->isIntegerTy(Bitwidth: 32)) { |
752 | Addr.setExtendType(AArch64_AM::UXTW); |
753 | Src = ZE->getOperand(i_nocapture: 0); |
754 | } |
755 | } else if (const auto *SE = dyn_cast<SExtInst>(Val: I)) { |
756 | if (!isIntExtFree(I: SE) && |
757 | SE->getOperand(i_nocapture: 0)->getType()->isIntegerTy(Bitwidth: 32)) { |
758 | Addr.setExtendType(AArch64_AM::SXTW); |
759 | Src = SE->getOperand(i_nocapture: 0); |
760 | } |
761 | } |
762 | } |
763 | } |
764 | |
765 | if (const auto *AI = dyn_cast<BinaryOperator>(Val: Src)) |
766 | if (AI->getOpcode() == Instruction::And) { |
767 | const Value *LHS = AI->getOperand(i_nocapture: 0); |
768 | const Value *RHS = AI->getOperand(i_nocapture: 1); |
769 | |
770 | if (const auto *C = dyn_cast<ConstantInt>(Val: LHS)) |
771 | if (C->getValue() == 0xffffffff) |
772 | std::swap(a&: LHS, b&: RHS); |
773 | |
774 | if (const auto *C = dyn_cast<ConstantInt>(Val: RHS)) |
775 | if (C->getValue() == 0xffffffff) { |
776 | Addr.setExtendType(AArch64_AM::UXTW); |
777 | Register Reg = getRegForValue(V: LHS); |
778 | if (!Reg) |
779 | return false; |
780 | Reg = fastEmitInst_extractsubreg(RetVT: MVT::i32, Op0: Reg, Idx: AArch64::sub_32); |
781 | Addr.setOffsetReg(Reg); |
782 | return true; |
783 | } |
784 | } |
785 | |
786 | Register Reg = getRegForValue(V: Src); |
787 | if (!Reg) |
788 | return false; |
789 | Addr.setOffsetReg(Reg); |
790 | return true; |
791 | } |
792 | case Instruction::Mul: { |
793 | if (Addr.getOffsetReg()) |
794 | break; |
795 | |
796 | if (!isMulPowOf2(I: U)) |
797 | break; |
798 | |
799 | const Value *LHS = U->getOperand(i: 0); |
800 | const Value *RHS = U->getOperand(i: 1); |
801 | |
802 | // Canonicalize power-of-2 value to the RHS. |
803 | if (const auto *C = dyn_cast<ConstantInt>(Val: LHS)) |
804 | if (C->getValue().isPowerOf2()) |
805 | std::swap(a&: LHS, b&: RHS); |
806 | |
807 | assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt." ); |
808 | const auto *C = cast<ConstantInt>(Val: RHS); |
809 | unsigned Val = C->getValue().logBase2(); |
810 | if (Val < 1 || Val > 3) |
811 | break; |
812 | |
813 | uint64_t NumBytes = 0; |
814 | if (Ty && Ty->isSized()) { |
815 | uint64_t NumBits = DL.getTypeSizeInBits(Ty); |
816 | NumBytes = NumBits / 8; |
817 | if (!isPowerOf2_64(Value: NumBits)) |
818 | NumBytes = 0; |
819 | } |
820 | |
821 | if (NumBytes != (1ULL << Val)) |
822 | break; |
823 | |
824 | Addr.setShift(Val); |
825 | Addr.setExtendType(AArch64_AM::LSL); |
826 | |
827 | const Value *Src = LHS; |
828 | if (const auto *I = dyn_cast<Instruction>(Val: Src)) { |
829 | if (FuncInfo.getMBB(BB: I->getParent()) == FuncInfo.MBB) { |
830 | // Fold the zext or sext when it won't become a noop. |
831 | if (const auto *ZE = dyn_cast<ZExtInst>(Val: I)) { |
832 | if (!isIntExtFree(I: ZE) && |
833 | ZE->getOperand(i_nocapture: 0)->getType()->isIntegerTy(Bitwidth: 32)) { |
834 | Addr.setExtendType(AArch64_AM::UXTW); |
835 | Src = ZE->getOperand(i_nocapture: 0); |
836 | } |
837 | } else if (const auto *SE = dyn_cast<SExtInst>(Val: I)) { |
838 | if (!isIntExtFree(I: SE) && |
839 | SE->getOperand(i_nocapture: 0)->getType()->isIntegerTy(Bitwidth: 32)) { |
840 | Addr.setExtendType(AArch64_AM::SXTW); |
841 | Src = SE->getOperand(i_nocapture: 0); |
842 | } |
843 | } |
844 | } |
845 | } |
846 | |
847 | Register Reg = getRegForValue(V: Src); |
848 | if (!Reg) |
849 | return false; |
850 | Addr.setOffsetReg(Reg); |
851 | return true; |
852 | } |
853 | case Instruction::And: { |
854 | if (Addr.getOffsetReg()) |
855 | break; |
856 | |
857 | if (!Ty || DL.getTypeSizeInBits(Ty) != 8) |
858 | break; |
859 | |
860 | const Value *LHS = U->getOperand(i: 0); |
861 | const Value *RHS = U->getOperand(i: 1); |
862 | |
863 | if (const auto *C = dyn_cast<ConstantInt>(Val: LHS)) |
864 | if (C->getValue() == 0xffffffff) |
865 | std::swap(a&: LHS, b&: RHS); |
866 | |
867 | if (const auto *C = dyn_cast<ConstantInt>(Val: RHS)) |
868 | if (C->getValue() == 0xffffffff) { |
869 | Addr.setShift(0); |
870 | Addr.setExtendType(AArch64_AM::LSL); |
871 | Addr.setExtendType(AArch64_AM::UXTW); |
872 | |
873 | Register Reg = getRegForValue(V: LHS); |
874 | if (!Reg) |
875 | return false; |
876 | Reg = fastEmitInst_extractsubreg(RetVT: MVT::i32, Op0: Reg, Idx: AArch64::sub_32); |
877 | Addr.setOffsetReg(Reg); |
878 | return true; |
879 | } |
880 | break; |
881 | } |
882 | case Instruction::SExt: |
883 | case Instruction::ZExt: { |
884 | if (!Addr.getReg() || Addr.getOffsetReg()) |
885 | break; |
886 | |
887 | const Value *Src = nullptr; |
888 | // Fold the zext or sext when it won't become a noop. |
889 | if (const auto *ZE = dyn_cast<ZExtInst>(Val: U)) { |
890 | if (!isIntExtFree(I: ZE) && ZE->getOperand(i_nocapture: 0)->getType()->isIntegerTy(Bitwidth: 32)) { |
891 | Addr.setExtendType(AArch64_AM::UXTW); |
892 | Src = ZE->getOperand(i_nocapture: 0); |
893 | } |
894 | } else if (const auto *SE = dyn_cast<SExtInst>(Val: U)) { |
895 | if (!isIntExtFree(I: SE) && SE->getOperand(i_nocapture: 0)->getType()->isIntegerTy(Bitwidth: 32)) { |
896 | Addr.setExtendType(AArch64_AM::SXTW); |
897 | Src = SE->getOperand(i_nocapture: 0); |
898 | } |
899 | } |
900 | |
901 | if (!Src) |
902 | break; |
903 | |
904 | Addr.setShift(0); |
905 | Register Reg = getRegForValue(V: Src); |
906 | if (!Reg) |
907 | return false; |
908 | Addr.setOffsetReg(Reg); |
909 | return true; |
910 | } |
911 | } // end switch |
912 | |
913 | if (Addr.isRegBase() && !Addr.getReg()) { |
914 | Register Reg = getRegForValue(V: Obj); |
915 | if (!Reg) |
916 | return false; |
917 | Addr.setReg(Reg); |
918 | return true; |
919 | } |
920 | |
921 | if (!Addr.getOffsetReg()) { |
922 | Register Reg = getRegForValue(V: Obj); |
923 | if (!Reg) |
924 | return false; |
925 | Addr.setOffsetReg(Reg); |
926 | return true; |
927 | } |
928 | |
929 | return false; |
930 | } |
931 | |
932 | bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) { |
933 | const User *U = nullptr; |
934 | unsigned Opcode = Instruction::UserOp1; |
935 | bool InMBB = true; |
936 | |
937 | if (const auto *I = dyn_cast<Instruction>(Val: V)) { |
938 | Opcode = I->getOpcode(); |
939 | U = I; |
940 | InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock(); |
941 | } else if (const auto *C = dyn_cast<ConstantExpr>(Val: V)) { |
942 | Opcode = C->getOpcode(); |
943 | U = C; |
944 | } |
945 | |
946 | switch (Opcode) { |
947 | default: break; |
948 | case Instruction::BitCast: |
949 | // Look past bitcasts if its operand is in the same BB. |
950 | if (InMBB) |
951 | return computeCallAddress(V: U->getOperand(i: 0), Addr); |
952 | break; |
953 | case Instruction::IntToPtr: |
954 | // Look past no-op inttoptrs if its operand is in the same BB. |
955 | if (InMBB && |
956 | TLI.getValueType(DL, Ty: U->getOperand(i: 0)->getType()) == |
957 | TLI.getPointerTy(DL)) |
958 | return computeCallAddress(V: U->getOperand(i: 0), Addr); |
959 | break; |
960 | case Instruction::PtrToInt: |
961 | // Look past no-op ptrtoints if its operand is in the same BB. |
962 | if (InMBB && TLI.getValueType(DL, Ty: U->getType()) == TLI.getPointerTy(DL)) |
963 | return computeCallAddress(V: U->getOperand(i: 0), Addr); |
964 | break; |
965 | } |
966 | |
967 | if (const GlobalValue *GV = dyn_cast<GlobalValue>(Val: V)) { |
968 | Addr.setGlobalValue(GV); |
969 | return true; |
970 | } |
971 | |
972 | // If all else fails, try to materialize the value in a register. |
973 | if (!Addr.getGlobalValue()) { |
974 | Addr.setReg(getRegForValue(V)); |
975 | return Addr.getReg().isValid(); |
976 | } |
977 | |
978 | return false; |
979 | } |
980 | |
981 | bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) { |
982 | EVT evt = TLI.getValueType(DL, Ty, AllowUnknown: true); |
983 | |
984 | if (Subtarget->isTargetILP32() && Ty->isPointerTy()) |
985 | return false; |
986 | |
987 | // Only handle simple types. |
988 | if (evt == MVT::Other || !evt.isSimple()) |
989 | return false; |
990 | VT = evt.getSimpleVT(); |
991 | |
992 | // This is a legal type, but it's not something we handle in fast-isel. |
993 | if (VT == MVT::f128) |
994 | return false; |
995 | |
996 | // Handle all other legal types, i.e. a register that will directly hold this |
997 | // value. |
998 | return TLI.isTypeLegal(VT); |
999 | } |
1000 | |
1001 | /// Determine if the value type is supported by FastISel. |
1002 | /// |
1003 | /// FastISel for AArch64 can handle more value types than are legal. This adds |
1004 | /// simple value type such as i1, i8, and i16. |
1005 | bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) { |
1006 | if (Ty->isVectorTy() && !IsVectorAllowed) |
1007 | return false; |
1008 | |
1009 | if (isTypeLegal(Ty, VT)) |
1010 | return true; |
1011 | |
1012 | // If this is a type than can be sign or zero-extended to a basic operation |
1013 | // go ahead and accept it now. |
1014 | if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16) |
1015 | return true; |
1016 | |
1017 | return false; |
1018 | } |
1019 | |
1020 | bool AArch64FastISel::isValueAvailable(const Value *V) const { |
1021 | if (!isa<Instruction>(Val: V)) |
1022 | return true; |
1023 | |
1024 | const auto *I = cast<Instruction>(Val: V); |
1025 | return FuncInfo.getMBB(BB: I->getParent()) == FuncInfo.MBB; |
1026 | } |
1027 | |
1028 | bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) { |
1029 | if (Subtarget->isTargetILP32()) |
1030 | return false; |
1031 | |
1032 | unsigned ScaleFactor = getImplicitScaleFactor(VT); |
1033 | if (!ScaleFactor) |
1034 | return false; |
1035 | |
1036 | bool ImmediateOffsetNeedsLowering = false; |
1037 | bool RegisterOffsetNeedsLowering = false; |
1038 | int64_t Offset = Addr.getOffset(); |
1039 | if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(x: Offset)) |
1040 | ImmediateOffsetNeedsLowering = true; |
1041 | else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) && |
1042 | !isUInt<12>(x: Offset / ScaleFactor)) |
1043 | ImmediateOffsetNeedsLowering = true; |
1044 | |
1045 | // Cannot encode an offset register and an immediate offset in the same |
1046 | // instruction. Fold the immediate offset into the load/store instruction and |
1047 | // emit an additional add to take care of the offset register. |
1048 | if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg()) |
1049 | RegisterOffsetNeedsLowering = true; |
1050 | |
1051 | // Cannot encode zero register as base. |
1052 | if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg()) |
1053 | RegisterOffsetNeedsLowering = true; |
1054 | |
1055 | // If this is a stack pointer and the offset needs to be simplified then put |
1056 | // the alloca address into a register, set the base type back to register and |
1057 | // continue. This should almost never happen. |
1058 | if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase()) |
1059 | { |
1060 | Register ResultReg = createResultReg(RC: &AArch64::GPR64spRegClass); |
1061 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AArch64::ADDXri), |
1062 | DestReg: ResultReg) |
1063 | .addFrameIndex(Idx: Addr.getFI()) |
1064 | .addImm(Val: 0) |
1065 | .addImm(Val: 0); |
1066 | Addr.setKind(Address::RegBase); |
1067 | Addr.setReg(ResultReg); |
1068 | } |
1069 | |
1070 | if (RegisterOffsetNeedsLowering) { |
1071 | Register ResultReg; |
1072 | if (Addr.getReg()) { |
1073 | if (Addr.getExtendType() == AArch64_AM::SXTW || |
1074 | Addr.getExtendType() == AArch64_AM::UXTW ) |
1075 | ResultReg = emitAddSub_rx(/*UseAdd=*/true, RetVT: MVT::i64, LHSReg: Addr.getReg(), |
1076 | RHSReg: Addr.getOffsetReg(), ExtType: Addr.getExtendType(), |
1077 | ShiftImm: Addr.getShift()); |
1078 | else |
1079 | ResultReg = emitAddSub_rs(/*UseAdd=*/true, RetVT: MVT::i64, LHSReg: Addr.getReg(), |
1080 | RHSReg: Addr.getOffsetReg(), ShiftType: AArch64_AM::LSL, |
1081 | ShiftImm: Addr.getShift()); |
1082 | } else { |
1083 | if (Addr.getExtendType() == AArch64_AM::UXTW) |
1084 | ResultReg = emitLSL_ri(RetVT: MVT::i64, SrcVT: MVT::i32, Op0Reg: Addr.getOffsetReg(), |
1085 | Imm: Addr.getShift(), /*IsZExt=*/true); |
1086 | else if (Addr.getExtendType() == AArch64_AM::SXTW) |
1087 | ResultReg = emitLSL_ri(RetVT: MVT::i64, SrcVT: MVT::i32, Op0Reg: Addr.getOffsetReg(), |
1088 | Imm: Addr.getShift(), /*IsZExt=*/false); |
1089 | else |
1090 | ResultReg = emitLSL_ri(RetVT: MVT::i64, SrcVT: MVT::i64, Op0Reg: Addr.getOffsetReg(), |
1091 | Imm: Addr.getShift()); |
1092 | } |
1093 | if (!ResultReg) |
1094 | return false; |
1095 | |
1096 | Addr.setReg(ResultReg); |
1097 | Addr.setOffsetReg(0); |
1098 | Addr.setShift(0); |
1099 | Addr.setExtendType(AArch64_AM::InvalidShiftExtend); |
1100 | } |
1101 | |
1102 | // Since the offset is too large for the load/store instruction get the |
1103 | // reg+offset into a register. |
1104 | if (ImmediateOffsetNeedsLowering) { |
1105 | Register ResultReg; |
1106 | if (Addr.getReg()) |
1107 | // Try to fold the immediate into the add instruction. |
1108 | ResultReg = emitAdd_ri_(VT: MVT::i64, Op0: Addr.getReg(), Imm: Offset); |
1109 | else |
1110 | ResultReg = fastEmit_i(VT: MVT::i64, RetVT: MVT::i64, Opcode: ISD::Constant, imm0: Offset); |
1111 | |
1112 | if (!ResultReg) |
1113 | return false; |
1114 | Addr.setReg(ResultReg); |
1115 | Addr.setOffset(0); |
1116 | } |
1117 | return true; |
1118 | } |
1119 | |
1120 | void AArch64FastISel::addLoadStoreOperands(Address &Addr, |
1121 | const MachineInstrBuilder &MIB, |
1122 | MachineMemOperand::Flags Flags, |
1123 | unsigned ScaleFactor, |
1124 | MachineMemOperand *MMO) { |
1125 | int64_t Offset = Addr.getOffset() / ScaleFactor; |
1126 | // Frame base works a bit differently. Handle it separately. |
1127 | if (Addr.isFIBase()) { |
1128 | int FI = Addr.getFI(); |
1129 | // FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size |
1130 | // and alignment should be based on the VT. |
1131 | MMO = FuncInfo.MF->getMachineMemOperand( |
1132 | PtrInfo: MachinePointerInfo::getFixedStack(MF&: *FuncInfo.MF, FI, Offset), F: Flags, |
1133 | Size: MFI.getObjectSize(ObjectIdx: FI), BaseAlignment: MFI.getObjectAlign(ObjectIdx: FI)); |
1134 | // Now add the rest of the operands. |
1135 | MIB.addFrameIndex(Idx: FI).addImm(Val: Offset); |
1136 | } else { |
1137 | assert(Addr.isRegBase() && "Unexpected address kind." ); |
1138 | const MCInstrDesc &II = MIB->getDesc(); |
1139 | unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0; |
1140 | Addr.setReg( |
1141 | constrainOperandRegClass(II, Op: Addr.getReg(), OpNum: II.getNumDefs()+Idx)); |
1142 | Addr.setOffsetReg( |
1143 | constrainOperandRegClass(II, Op: Addr.getOffsetReg(), OpNum: II.getNumDefs()+Idx+1)); |
1144 | if (Addr.getOffsetReg()) { |
1145 | assert(Addr.getOffset() == 0 && "Unexpected offset" ); |
1146 | bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW || |
1147 | Addr.getExtendType() == AArch64_AM::SXTX; |
1148 | MIB.addReg(RegNo: Addr.getReg()); |
1149 | MIB.addReg(RegNo: Addr.getOffsetReg()); |
1150 | MIB.addImm(Val: IsSigned); |
1151 | MIB.addImm(Val: Addr.getShift() != 0); |
1152 | } else |
1153 | MIB.addReg(RegNo: Addr.getReg()).addImm(Val: Offset); |
1154 | } |
1155 | |
1156 | if (MMO) |
1157 | MIB.addMemOperand(MMO); |
1158 | } |
1159 | |
1160 | Register AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS, |
1161 | const Value *RHS, bool SetFlags, |
1162 | bool WantResult, bool IsZExt) { |
1163 | AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend; |
1164 | bool NeedExtend = false; |
1165 | switch (RetVT.SimpleTy) { |
1166 | default: |
1167 | return Register(); |
1168 | case MVT::i1: |
1169 | NeedExtend = true; |
1170 | break; |
1171 | case MVT::i8: |
1172 | NeedExtend = true; |
1173 | ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB; |
1174 | break; |
1175 | case MVT::i16: |
1176 | NeedExtend = true; |
1177 | ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH; |
1178 | break; |
1179 | case MVT::i32: // fall-through |
1180 | case MVT::i64: |
1181 | break; |
1182 | } |
1183 | MVT SrcVT = RetVT; |
1184 | RetVT.SimpleTy = std::max(a: RetVT.SimpleTy, b: MVT::i32); |
1185 | |
1186 | // Canonicalize immediates to the RHS first. |
1187 | if (UseAdd && isa<Constant>(Val: LHS) && !isa<Constant>(Val: RHS)) |
1188 | std::swap(a&: LHS, b&: RHS); |
1189 | |
1190 | // Canonicalize mul by power of 2 to the RHS. |
1191 | if (UseAdd && LHS->hasOneUse() && isValueAvailable(V: LHS)) |
1192 | if (isMulPowOf2(I: LHS)) |
1193 | std::swap(a&: LHS, b&: RHS); |
1194 | |
1195 | // Canonicalize shift immediate to the RHS. |
1196 | if (UseAdd && LHS->hasOneUse() && isValueAvailable(V: LHS)) |
1197 | if (const auto *SI = dyn_cast<BinaryOperator>(Val: LHS)) |
1198 | if (isa<ConstantInt>(Val: SI->getOperand(i_nocapture: 1))) |
1199 | if (SI->getOpcode() == Instruction::Shl || |
1200 | SI->getOpcode() == Instruction::LShr || |
1201 | SI->getOpcode() == Instruction::AShr ) |
1202 | std::swap(a&: LHS, b&: RHS); |
1203 | |
1204 | Register LHSReg = getRegForValue(V: LHS); |
1205 | if (!LHSReg) |
1206 | return Register(); |
1207 | |
1208 | if (NeedExtend) |
1209 | LHSReg = emitIntExt(SrcVT, SrcReg: LHSReg, DestVT: RetVT, isZExt: IsZExt); |
1210 | |
1211 | Register ResultReg; |
1212 | if (const auto *C = dyn_cast<ConstantInt>(Val: RHS)) { |
1213 | uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue(); |
1214 | if (C->isNegative()) |
1215 | ResultReg = emitAddSub_ri(UseAdd: !UseAdd, RetVT, LHSReg, Imm: -Imm, SetFlags, |
1216 | WantResult); |
1217 | else |
1218 | ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, Imm, SetFlags, |
1219 | WantResult); |
1220 | } else if (const auto *C = dyn_cast<Constant>(Val: RHS)) |
1221 | if (C->isNullValue()) |
1222 | ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, Imm: 0, SetFlags, WantResult); |
1223 | |
1224 | if (ResultReg) |
1225 | return ResultReg; |
1226 | |
1227 | // Only extend the RHS within the instruction if there is a valid extend type. |
1228 | if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() && |
1229 | isValueAvailable(V: RHS)) { |
1230 | Register RHSReg = getRegForValue(V: RHS); |
1231 | if (!RHSReg) |
1232 | return Register(); |
1233 | return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtType: ExtendType, ShiftImm: 0, |
1234 | SetFlags, WantResult); |
1235 | } |
1236 | |
1237 | // Check if the mul can be folded into the instruction. |
1238 | if (RHS->hasOneUse() && isValueAvailable(V: RHS)) { |
1239 | if (isMulPowOf2(I: RHS)) { |
1240 | const Value *MulLHS = cast<MulOperator>(Val: RHS)->getOperand(i_nocapture: 0); |
1241 | const Value *MulRHS = cast<MulOperator>(Val: RHS)->getOperand(i_nocapture: 1); |
1242 | |
1243 | if (const auto *C = dyn_cast<ConstantInt>(Val: MulLHS)) |
1244 | if (C->getValue().isPowerOf2()) |
1245 | std::swap(a&: MulLHS, b&: MulRHS); |
1246 | |
1247 | assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt." ); |
1248 | uint64_t ShiftVal = cast<ConstantInt>(Val: MulRHS)->getValue().logBase2(); |
1249 | Register RHSReg = getRegForValue(V: MulLHS); |
1250 | if (!RHSReg) |
1251 | return Register(); |
1252 | ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, ShiftType: AArch64_AM::LSL, |
1253 | ShiftImm: ShiftVal, SetFlags, WantResult); |
1254 | if (ResultReg) |
1255 | return ResultReg; |
1256 | } |
1257 | } |
1258 | |
1259 | // Check if the shift can be folded into the instruction. |
1260 | if (RHS->hasOneUse() && isValueAvailable(V: RHS)) { |
1261 | if (const auto *SI = dyn_cast<BinaryOperator>(Val: RHS)) { |
1262 | if (const auto *C = dyn_cast<ConstantInt>(Val: SI->getOperand(i_nocapture: 1))) { |
1263 | AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend; |
1264 | switch (SI->getOpcode()) { |
1265 | default: break; |
1266 | case Instruction::Shl: ShiftType = AArch64_AM::LSL; break; |
1267 | case Instruction::LShr: ShiftType = AArch64_AM::LSR; break; |
1268 | case Instruction::AShr: ShiftType = AArch64_AM::ASR; break; |
1269 | } |
1270 | uint64_t ShiftVal = C->getZExtValue(); |
1271 | if (ShiftType != AArch64_AM::InvalidShiftExtend) { |
1272 | Register RHSReg = getRegForValue(V: SI->getOperand(i_nocapture: 0)); |
1273 | if (!RHSReg) |
1274 | return Register(); |
1275 | ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, ShiftType, |
1276 | ShiftImm: ShiftVal, SetFlags, WantResult); |
1277 | if (ResultReg) |
1278 | return ResultReg; |
1279 | } |
1280 | } |
1281 | } |
1282 | } |
1283 | |
1284 | Register RHSReg = getRegForValue(V: RHS); |
1285 | if (!RHSReg) |
1286 | return Register(); |
1287 | |
1288 | if (NeedExtend) |
1289 | RHSReg = emitIntExt(SrcVT, SrcReg: RHSReg, DestVT: RetVT, isZExt: IsZExt); |
1290 | |
1291 | return emitAddSub_rr(UseAdd, RetVT, LHSReg, RHSReg, SetFlags, WantResult); |
1292 | } |
1293 | |
1294 | Register AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, Register LHSReg, |
1295 | Register RHSReg, bool SetFlags, |
1296 | bool WantResult) { |
1297 | assert(LHSReg && RHSReg && "Invalid register number." ); |
1298 | |
1299 | if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP || |
1300 | RHSReg == AArch64::SP || RHSReg == AArch64::WSP) |
1301 | return Register(); |
1302 | |
1303 | if (RetVT != MVT::i32 && RetVT != MVT::i64) |
1304 | return Register(); |
1305 | |
1306 | static const unsigned OpcTable[2][2][2] = { |
1307 | { { AArch64::SUBWrr, AArch64::SUBXrr }, |
1308 | { AArch64::ADDWrr, AArch64::ADDXrr } }, |
1309 | { { AArch64::SUBSWrr, AArch64::SUBSXrr }, |
1310 | { AArch64::ADDSWrr, AArch64::ADDSXrr } } |
1311 | }; |
1312 | bool Is64Bit = RetVT == MVT::i64; |
1313 | unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; |
1314 | const TargetRegisterClass *RC = |
1315 | Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; |
1316 | Register ResultReg; |
1317 | if (WantResult) |
1318 | ResultReg = createResultReg(RC); |
1319 | else |
1320 | ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; |
1321 | |
1322 | const MCInstrDesc &II = TII.get(Opcode: Opc); |
1323 | LHSReg = constrainOperandRegClass(II, Op: LHSReg, OpNum: II.getNumDefs()); |
1324 | RHSReg = constrainOperandRegClass(II, Op: RHSReg, OpNum: II.getNumDefs() + 1); |
1325 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II, DestReg: ResultReg) |
1326 | .addReg(RegNo: LHSReg) |
1327 | .addReg(RegNo: RHSReg); |
1328 | return ResultReg; |
1329 | } |
1330 | |
1331 | Register AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, Register LHSReg, |
1332 | uint64_t Imm, bool SetFlags, |
1333 | bool WantResult) { |
1334 | assert(LHSReg && "Invalid register number." ); |
1335 | |
1336 | if (RetVT != MVT::i32 && RetVT != MVT::i64) |
1337 | return Register(); |
1338 | |
1339 | unsigned ShiftImm; |
1340 | if (isUInt<12>(x: Imm)) |
1341 | ShiftImm = 0; |
1342 | else if ((Imm & 0xfff000) == Imm) { |
1343 | ShiftImm = 12; |
1344 | Imm >>= 12; |
1345 | } else |
1346 | return Register(); |
1347 | |
1348 | static const unsigned OpcTable[2][2][2] = { |
1349 | { { AArch64::SUBWri, AArch64::SUBXri }, |
1350 | { AArch64::ADDWri, AArch64::ADDXri } }, |
1351 | { { AArch64::SUBSWri, AArch64::SUBSXri }, |
1352 | { AArch64::ADDSWri, AArch64::ADDSXri } } |
1353 | }; |
1354 | bool Is64Bit = RetVT == MVT::i64; |
1355 | unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; |
1356 | const TargetRegisterClass *RC; |
1357 | if (SetFlags) |
1358 | RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; |
1359 | else |
1360 | RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass; |
1361 | Register ResultReg; |
1362 | if (WantResult) |
1363 | ResultReg = createResultReg(RC); |
1364 | else |
1365 | ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; |
1366 | |
1367 | const MCInstrDesc &II = TII.get(Opcode: Opc); |
1368 | LHSReg = constrainOperandRegClass(II, Op: LHSReg, OpNum: II.getNumDefs()); |
1369 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II, DestReg: ResultReg) |
1370 | .addReg(RegNo: LHSReg) |
1371 | .addImm(Val: Imm) |
1372 | .addImm(Val: getShifterImm(ST: AArch64_AM::LSL, Imm: ShiftImm)); |
1373 | return ResultReg; |
1374 | } |
1375 | |
1376 | Register AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, Register LHSReg, |
1377 | Register RHSReg, |
1378 | AArch64_AM::ShiftExtendType ShiftType, |
1379 | uint64_t ShiftImm, bool SetFlags, |
1380 | bool WantResult) { |
1381 | assert(LHSReg && RHSReg && "Invalid register number." ); |
1382 | assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP && |
1383 | RHSReg != AArch64::SP && RHSReg != AArch64::WSP); |
1384 | |
1385 | if (RetVT != MVT::i32 && RetVT != MVT::i64) |
1386 | return Register(); |
1387 | |
1388 | // Don't deal with undefined shifts. |
1389 | if (ShiftImm >= RetVT.getSizeInBits()) |
1390 | return Register(); |
1391 | |
1392 | static const unsigned OpcTable[2][2][2] = { |
1393 | { { AArch64::SUBWrs, AArch64::SUBXrs }, |
1394 | { AArch64::ADDWrs, AArch64::ADDXrs } }, |
1395 | { { AArch64::SUBSWrs, AArch64::SUBSXrs }, |
1396 | { AArch64::ADDSWrs, AArch64::ADDSXrs } } |
1397 | }; |
1398 | bool Is64Bit = RetVT == MVT::i64; |
1399 | unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; |
1400 | const TargetRegisterClass *RC = |
1401 | Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; |
1402 | Register ResultReg; |
1403 | if (WantResult) |
1404 | ResultReg = createResultReg(RC); |
1405 | else |
1406 | ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; |
1407 | |
1408 | const MCInstrDesc &II = TII.get(Opcode: Opc); |
1409 | LHSReg = constrainOperandRegClass(II, Op: LHSReg, OpNum: II.getNumDefs()); |
1410 | RHSReg = constrainOperandRegClass(II, Op: RHSReg, OpNum: II.getNumDefs() + 1); |
1411 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II, DestReg: ResultReg) |
1412 | .addReg(RegNo: LHSReg) |
1413 | .addReg(RegNo: RHSReg) |
1414 | .addImm(Val: getShifterImm(ST: ShiftType, Imm: ShiftImm)); |
1415 | return ResultReg; |
1416 | } |
1417 | |
1418 | Register AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, Register LHSReg, |
1419 | Register RHSReg, |
1420 | AArch64_AM::ShiftExtendType ExtType, |
1421 | uint64_t ShiftImm, bool SetFlags, |
1422 | bool WantResult) { |
1423 | assert(LHSReg && RHSReg && "Invalid register number." ); |
1424 | assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR && |
1425 | RHSReg != AArch64::XZR && RHSReg != AArch64::WZR); |
1426 | |
1427 | if (RetVT != MVT::i32 && RetVT != MVT::i64) |
1428 | return Register(); |
1429 | |
1430 | if (ShiftImm >= 4) |
1431 | return Register(); |
1432 | |
1433 | static const unsigned OpcTable[2][2][2] = { |
1434 | { { AArch64::SUBWrx, AArch64::SUBXrx }, |
1435 | { AArch64::ADDWrx, AArch64::ADDXrx } }, |
1436 | { { AArch64::SUBSWrx, AArch64::SUBSXrx }, |
1437 | { AArch64::ADDSWrx, AArch64::ADDSXrx } } |
1438 | }; |
1439 | bool Is64Bit = RetVT == MVT::i64; |
1440 | unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; |
1441 | const TargetRegisterClass *RC = nullptr; |
1442 | if (SetFlags) |
1443 | RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; |
1444 | else |
1445 | RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass; |
1446 | Register ResultReg; |
1447 | if (WantResult) |
1448 | ResultReg = createResultReg(RC); |
1449 | else |
1450 | ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; |
1451 | |
1452 | const MCInstrDesc &II = TII.get(Opcode: Opc); |
1453 | LHSReg = constrainOperandRegClass(II, Op: LHSReg, OpNum: II.getNumDefs()); |
1454 | RHSReg = constrainOperandRegClass(II, Op: RHSReg, OpNum: II.getNumDefs() + 1); |
1455 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II, DestReg: ResultReg) |
1456 | .addReg(RegNo: LHSReg) |
1457 | .addReg(RegNo: RHSReg) |
1458 | .addImm(Val: getArithExtendImm(ET: ExtType, Imm: ShiftImm)); |
1459 | return ResultReg; |
1460 | } |
1461 | |
1462 | bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) { |
1463 | Type *Ty = LHS->getType(); |
1464 | EVT EVT = TLI.getValueType(DL, Ty, AllowUnknown: true); |
1465 | if (!EVT.isSimple()) |
1466 | return false; |
1467 | MVT VT = EVT.getSimpleVT(); |
1468 | |
1469 | switch (VT.SimpleTy) { |
1470 | default: |
1471 | return false; |
1472 | case MVT::i1: |
1473 | case MVT::i8: |
1474 | case MVT::i16: |
1475 | case MVT::i32: |
1476 | case MVT::i64: |
1477 | return emitICmp(RetVT: VT, LHS, RHS, IsZExt); |
1478 | case MVT::f32: |
1479 | case MVT::f64: |
1480 | return emitFCmp(RetVT: VT, LHS, RHS); |
1481 | } |
1482 | } |
1483 | |
1484 | bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, |
1485 | bool IsZExt) { |
1486 | return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false, |
1487 | IsZExt) |
1488 | .isValid(); |
1489 | } |
1490 | |
1491 | bool AArch64FastISel::emitICmp_ri(MVT RetVT, Register LHSReg, uint64_t Imm) { |
1492 | return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, Imm, |
1493 | /*SetFlags=*/true, /*WantResult=*/false) |
1494 | .isValid(); |
1495 | } |
1496 | |
1497 | bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) { |
1498 | if (RetVT != MVT::f32 && RetVT != MVT::f64) |
1499 | return false; |
1500 | |
1501 | // Check to see if the 2nd operand is a constant that we can encode directly |
1502 | // in the compare. |
1503 | bool UseImm = false; |
1504 | if (const auto *CFP = dyn_cast<ConstantFP>(Val: RHS)) |
1505 | if (CFP->isZero() && !CFP->isNegative()) |
1506 | UseImm = true; |
1507 | |
1508 | Register LHSReg = getRegForValue(V: LHS); |
1509 | if (!LHSReg) |
1510 | return false; |
1511 | |
1512 | if (UseImm) { |
1513 | unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri; |
1514 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc)) |
1515 | .addReg(RegNo: LHSReg); |
1516 | return true; |
1517 | } |
1518 | |
1519 | Register RHSReg = getRegForValue(V: RHS); |
1520 | if (!RHSReg) |
1521 | return false; |
1522 | |
1523 | unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr; |
1524 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc)) |
1525 | .addReg(RegNo: LHSReg) |
1526 | .addReg(RegNo: RHSReg); |
1527 | return true; |
1528 | } |
1529 | |
1530 | Register AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS, |
1531 | bool SetFlags, bool WantResult, bool IsZExt) { |
1532 | return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult, |
1533 | IsZExt); |
1534 | } |
1535 | |
1536 | /// This method is a wrapper to simplify add emission. |
1537 | /// |
1538 | /// First try to emit an add with an immediate operand using emitAddSub_ri. If |
1539 | /// that fails, then try to materialize the immediate into a register and use |
1540 | /// emitAddSub_rr instead. |
1541 | Register AArch64FastISel::emitAdd_ri_(MVT VT, Register Op0, int64_t Imm) { |
1542 | Register ResultReg; |
1543 | if (Imm < 0) |
1544 | ResultReg = emitAddSub_ri(UseAdd: false, RetVT: VT, LHSReg: Op0, Imm: -Imm); |
1545 | else |
1546 | ResultReg = emitAddSub_ri(UseAdd: true, RetVT: VT, LHSReg: Op0, Imm); |
1547 | |
1548 | if (ResultReg) |
1549 | return ResultReg; |
1550 | |
1551 | Register CReg = fastEmit_i(VT, RetVT: VT, Opcode: ISD::Constant, imm0: Imm); |
1552 | if (!CReg) |
1553 | return Register(); |
1554 | |
1555 | ResultReg = emitAddSub_rr(UseAdd: true, RetVT: VT, LHSReg: Op0, RHSReg: CReg); |
1556 | return ResultReg; |
1557 | } |
1558 | |
1559 | Register AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS, |
1560 | bool SetFlags, bool WantResult, bool IsZExt) { |
1561 | return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult, |
1562 | IsZExt); |
1563 | } |
1564 | |
1565 | Register AArch64FastISel::emitSubs_rr(MVT RetVT, Register LHSReg, |
1566 | Register RHSReg, bool WantResult) { |
1567 | return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, RHSReg, |
1568 | /*SetFlags=*/true, WantResult); |
1569 | } |
1570 | |
1571 | Register AArch64FastISel::emitSubs_rs(MVT RetVT, Register LHSReg, |
1572 | Register RHSReg, |
1573 | AArch64_AM::ShiftExtendType ShiftType, |
1574 | uint64_t ShiftImm, bool WantResult) { |
1575 | return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, RHSReg, ShiftType, |
1576 | ShiftImm, /*SetFlags=*/true, WantResult); |
1577 | } |
1578 | |
1579 | Register AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT, |
1580 | const Value *LHS, const Value *RHS) { |
1581 | // Canonicalize immediates to the RHS first. |
1582 | if (isa<ConstantInt>(Val: LHS) && !isa<ConstantInt>(Val: RHS)) |
1583 | std::swap(a&: LHS, b&: RHS); |
1584 | |
1585 | // Canonicalize mul by power-of-2 to the RHS. |
1586 | if (LHS->hasOneUse() && isValueAvailable(V: LHS)) |
1587 | if (isMulPowOf2(I: LHS)) |
1588 | std::swap(a&: LHS, b&: RHS); |
1589 | |
1590 | // Canonicalize shift immediate to the RHS. |
1591 | if (LHS->hasOneUse() && isValueAvailable(V: LHS)) |
1592 | if (const auto *SI = dyn_cast<ShlOperator>(Val: LHS)) |
1593 | if (isa<ConstantInt>(Val: SI->getOperand(i_nocapture: 1))) |
1594 | std::swap(a&: LHS, b&: RHS); |
1595 | |
1596 | Register LHSReg = getRegForValue(V: LHS); |
1597 | if (!LHSReg) |
1598 | return Register(); |
1599 | |
1600 | Register ResultReg; |
1601 | if (const auto *C = dyn_cast<ConstantInt>(Val: RHS)) { |
1602 | uint64_t Imm = C->getZExtValue(); |
1603 | ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, Imm); |
1604 | } |
1605 | if (ResultReg) |
1606 | return ResultReg; |
1607 | |
1608 | // Check if the mul can be folded into the instruction. |
1609 | if (RHS->hasOneUse() && isValueAvailable(V: RHS)) { |
1610 | if (isMulPowOf2(I: RHS)) { |
1611 | const Value *MulLHS = cast<MulOperator>(Val: RHS)->getOperand(i_nocapture: 0); |
1612 | const Value *MulRHS = cast<MulOperator>(Val: RHS)->getOperand(i_nocapture: 1); |
1613 | |
1614 | if (const auto *C = dyn_cast<ConstantInt>(Val: MulLHS)) |
1615 | if (C->getValue().isPowerOf2()) |
1616 | std::swap(a&: MulLHS, b&: MulRHS); |
1617 | |
1618 | assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt." ); |
1619 | uint64_t ShiftVal = cast<ConstantInt>(Val: MulRHS)->getValue().logBase2(); |
1620 | |
1621 | Register RHSReg = getRegForValue(V: MulLHS); |
1622 | if (!RHSReg) |
1623 | return Register(); |
1624 | ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftImm: ShiftVal); |
1625 | if (ResultReg) |
1626 | return ResultReg; |
1627 | } |
1628 | } |
1629 | |
1630 | // Check if the shift can be folded into the instruction. |
1631 | if (RHS->hasOneUse() && isValueAvailable(V: RHS)) { |
1632 | if (const auto *SI = dyn_cast<ShlOperator>(Val: RHS)) |
1633 | if (const auto *C = dyn_cast<ConstantInt>(Val: SI->getOperand(i_nocapture: 1))) { |
1634 | uint64_t ShiftVal = C->getZExtValue(); |
1635 | Register RHSReg = getRegForValue(V: SI->getOperand(i_nocapture: 0)); |
1636 | if (!RHSReg) |
1637 | return Register(); |
1638 | ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftImm: ShiftVal); |
1639 | if (ResultReg) |
1640 | return ResultReg; |
1641 | } |
1642 | } |
1643 | |
1644 | Register RHSReg = getRegForValue(V: RHS); |
1645 | if (!RHSReg) |
1646 | return Register(); |
1647 | |
1648 | MVT VT = std::max(a: MVT::i32, b: RetVT.SimpleTy); |
1649 | ResultReg = fastEmit_rr(VT, RetVT: VT, Opcode: ISDOpc, Op0: LHSReg, Op1: RHSReg); |
1650 | if (RetVT >= MVT::i8 && RetVT <= MVT::i16) { |
1651 | uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff; |
1652 | ResultReg = emitAnd_ri(RetVT: MVT::i32, LHSReg: ResultReg, Imm: Mask); |
1653 | } |
1654 | return ResultReg; |
1655 | } |
1656 | |
1657 | Register AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, |
1658 | Register LHSReg, uint64_t Imm) { |
1659 | static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR), |
1660 | "ISD nodes are not consecutive!" ); |
1661 | static const unsigned OpcTable[3][2] = { |
1662 | { AArch64::ANDWri, AArch64::ANDXri }, |
1663 | { AArch64::ORRWri, AArch64::ORRXri }, |
1664 | { AArch64::EORWri, AArch64::EORXri } |
1665 | }; |
1666 | const TargetRegisterClass *RC; |
1667 | unsigned Opc; |
1668 | unsigned RegSize; |
1669 | switch (RetVT.SimpleTy) { |
1670 | default: |
1671 | return Register(); |
1672 | case MVT::i1: |
1673 | case MVT::i8: |
1674 | case MVT::i16: |
1675 | case MVT::i32: { |
1676 | unsigned Idx = ISDOpc - ISD::AND; |
1677 | Opc = OpcTable[Idx][0]; |
1678 | RC = &AArch64::GPR32spRegClass; |
1679 | RegSize = 32; |
1680 | break; |
1681 | } |
1682 | case MVT::i64: |
1683 | Opc = OpcTable[ISDOpc - ISD::AND][1]; |
1684 | RC = &AArch64::GPR64spRegClass; |
1685 | RegSize = 64; |
1686 | break; |
1687 | } |
1688 | |
1689 | if (!AArch64_AM::isLogicalImmediate(imm: Imm, regSize: RegSize)) |
1690 | return Register(); |
1691 | |
1692 | Register ResultReg = |
1693 | fastEmitInst_ri(MachineInstOpcode: Opc, RC, Op0: LHSReg, |
1694 | Imm: AArch64_AM::encodeLogicalImmediate(imm: Imm, regSize: RegSize)); |
1695 | if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) { |
1696 | uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff; |
1697 | ResultReg = emitAnd_ri(RetVT: MVT::i32, LHSReg: ResultReg, Imm: Mask); |
1698 | } |
1699 | return ResultReg; |
1700 | } |
1701 | |
1702 | Register AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, |
1703 | Register LHSReg, Register RHSReg, |
1704 | uint64_t ShiftImm) { |
1705 | static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR), |
1706 | "ISD nodes are not consecutive!" ); |
1707 | static const unsigned OpcTable[3][2] = { |
1708 | { AArch64::ANDWrs, AArch64::ANDXrs }, |
1709 | { AArch64::ORRWrs, AArch64::ORRXrs }, |
1710 | { AArch64::EORWrs, AArch64::EORXrs } |
1711 | }; |
1712 | |
1713 | // Don't deal with undefined shifts. |
1714 | if (ShiftImm >= RetVT.getSizeInBits()) |
1715 | return Register(); |
1716 | |
1717 | const TargetRegisterClass *RC; |
1718 | unsigned Opc; |
1719 | switch (RetVT.SimpleTy) { |
1720 | default: |
1721 | return Register(); |
1722 | case MVT::i1: |
1723 | case MVT::i8: |
1724 | case MVT::i16: |
1725 | case MVT::i32: |
1726 | Opc = OpcTable[ISDOpc - ISD::AND][0]; |
1727 | RC = &AArch64::GPR32RegClass; |
1728 | break; |
1729 | case MVT::i64: |
1730 | Opc = OpcTable[ISDOpc - ISD::AND][1]; |
1731 | RC = &AArch64::GPR64RegClass; |
1732 | break; |
1733 | } |
1734 | Register ResultReg = |
1735 | fastEmitInst_rri(MachineInstOpcode: Opc, RC, Op0: LHSReg, Op1: RHSReg, |
1736 | Imm: AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: ShiftImm)); |
1737 | if (RetVT >= MVT::i8 && RetVT <= MVT::i16) { |
1738 | uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff; |
1739 | ResultReg = emitAnd_ri(RetVT: MVT::i32, LHSReg: ResultReg, Imm: Mask); |
1740 | } |
1741 | return ResultReg; |
1742 | } |
1743 | |
1744 | Register AArch64FastISel::emitAnd_ri(MVT RetVT, Register LHSReg, uint64_t Imm) { |
1745 | return emitLogicalOp_ri(ISDOpc: ISD::AND, RetVT, LHSReg, Imm); |
1746 | } |
1747 | |
1748 | Register AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr, |
1749 | bool WantZExt, MachineMemOperand *MMO) { |
1750 | if (!TLI.allowsMisalignedMemoryAccesses(VT)) |
1751 | return Register(); |
1752 | |
1753 | // Simplify this down to something we can handle. |
1754 | if (!simplifyAddress(Addr, VT)) |
1755 | return Register(); |
1756 | |
1757 | unsigned ScaleFactor = getImplicitScaleFactor(VT); |
1758 | if (!ScaleFactor) |
1759 | llvm_unreachable("Unexpected value type." ); |
1760 | |
1761 | // Negative offsets require unscaled, 9-bit, signed immediate offsets. |
1762 | // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets. |
1763 | bool UseScaled = true; |
1764 | if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) { |
1765 | UseScaled = false; |
1766 | ScaleFactor = 1; |
1767 | } |
1768 | |
1769 | static const unsigned GPOpcTable[2][8][4] = { |
1770 | // Sign-extend. |
1771 | { { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi, |
1772 | AArch64::LDURXi }, |
1773 | { AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi, |
1774 | AArch64::LDURXi }, |
1775 | { AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui, |
1776 | AArch64::LDRXui }, |
1777 | { AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui, |
1778 | AArch64::LDRXui }, |
1779 | { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX, |
1780 | AArch64::LDRXroX }, |
1781 | { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX, |
1782 | AArch64::LDRXroX }, |
1783 | { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW, |
1784 | AArch64::LDRXroW }, |
1785 | { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW, |
1786 | AArch64::LDRXroW } |
1787 | }, |
1788 | // Zero-extend. |
1789 | { { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi, |
1790 | AArch64::LDURXi }, |
1791 | { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi, |
1792 | AArch64::LDURXi }, |
1793 | { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui, |
1794 | AArch64::LDRXui }, |
1795 | { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui, |
1796 | AArch64::LDRXui }, |
1797 | { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX, |
1798 | AArch64::LDRXroX }, |
1799 | { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX, |
1800 | AArch64::LDRXroX }, |
1801 | { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW, |
1802 | AArch64::LDRXroW }, |
1803 | { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW, |
1804 | AArch64::LDRXroW } |
1805 | } |
1806 | }; |
1807 | |
1808 | static const unsigned FPOpcTable[4][2] = { |
1809 | { AArch64::LDURSi, AArch64::LDURDi }, |
1810 | { AArch64::LDRSui, AArch64::LDRDui }, |
1811 | { AArch64::LDRSroX, AArch64::LDRDroX }, |
1812 | { AArch64::LDRSroW, AArch64::LDRDroW } |
1813 | }; |
1814 | |
1815 | unsigned Opc; |
1816 | const TargetRegisterClass *RC; |
1817 | bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() && |
1818 | Addr.getOffsetReg(); |
1819 | unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0; |
1820 | if (Addr.getExtendType() == AArch64_AM::UXTW || |
1821 | Addr.getExtendType() == AArch64_AM::SXTW) |
1822 | Idx++; |
1823 | |
1824 | bool IsRet64Bit = RetVT == MVT::i64; |
1825 | switch (VT.SimpleTy) { |
1826 | default: |
1827 | llvm_unreachable("Unexpected value type." ); |
1828 | case MVT::i1: // Intentional fall-through. |
1829 | case MVT::i8: |
1830 | Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0]; |
1831 | RC = (IsRet64Bit && !WantZExt) ? |
1832 | &AArch64::GPR64RegClass: &AArch64::GPR32RegClass; |
1833 | break; |
1834 | case MVT::i16: |
1835 | Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1]; |
1836 | RC = (IsRet64Bit && !WantZExt) ? |
1837 | &AArch64::GPR64RegClass: &AArch64::GPR32RegClass; |
1838 | break; |
1839 | case MVT::i32: |
1840 | Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2]; |
1841 | RC = (IsRet64Bit && !WantZExt) ? |
1842 | &AArch64::GPR64RegClass: &AArch64::GPR32RegClass; |
1843 | break; |
1844 | case MVT::i64: |
1845 | Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3]; |
1846 | RC = &AArch64::GPR64RegClass; |
1847 | break; |
1848 | case MVT::f32: |
1849 | Opc = FPOpcTable[Idx][0]; |
1850 | RC = &AArch64::FPR32RegClass; |
1851 | break; |
1852 | case MVT::f64: |
1853 | Opc = FPOpcTable[Idx][1]; |
1854 | RC = &AArch64::FPR64RegClass; |
1855 | break; |
1856 | } |
1857 | |
1858 | // Create the base instruction, then add the operands. |
1859 | Register ResultReg = createResultReg(RC); |
1860 | MachineInstrBuilder MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, |
1861 | MCID: TII.get(Opcode: Opc), DestReg: ResultReg); |
1862 | addLoadStoreOperands(Addr, MIB, Flags: MachineMemOperand::MOLoad, ScaleFactor, MMO); |
1863 | |
1864 | // Loading an i1 requires special handling. |
1865 | if (VT == MVT::i1) { |
1866 | Register ANDReg = emitAnd_ri(RetVT: MVT::i32, LHSReg: ResultReg, Imm: 1); |
1867 | assert(ANDReg && "Unexpected AND instruction emission failure." ); |
1868 | ResultReg = ANDReg; |
1869 | } |
1870 | |
1871 | // For zero-extending loads to 64bit we emit a 32bit load and then convert |
1872 | // the 32bit reg to a 64bit reg. |
1873 | if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) { |
1874 | Register Reg64 = createResultReg(RC: &AArch64::GPR64RegClass); |
1875 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, |
1876 | MCID: TII.get(Opcode: AArch64::SUBREG_TO_REG), DestReg: Reg64) |
1877 | .addImm(Val: 0) |
1878 | .addReg(RegNo: ResultReg, flags: getKillRegState(B: true)) |
1879 | .addImm(Val: AArch64::sub_32); |
1880 | ResultReg = Reg64; |
1881 | } |
1882 | return ResultReg; |
1883 | } |
1884 | |
1885 | bool AArch64FastISel::selectAddSub(const Instruction *I) { |
1886 | MVT VT; |
1887 | if (!isTypeSupported(Ty: I->getType(), VT, /*IsVectorAllowed=*/true)) |
1888 | return false; |
1889 | |
1890 | if (VT.isVector()) |
1891 | return selectOperator(I, Opcode: I->getOpcode()); |
1892 | |
1893 | Register ResultReg; |
1894 | switch (I->getOpcode()) { |
1895 | default: |
1896 | llvm_unreachable("Unexpected instruction." ); |
1897 | case Instruction::Add: |
1898 | ResultReg = emitAdd(RetVT: VT, LHS: I->getOperand(i: 0), RHS: I->getOperand(i: 1)); |
1899 | break; |
1900 | case Instruction::Sub: |
1901 | ResultReg = emitSub(RetVT: VT, LHS: I->getOperand(i: 0), RHS: I->getOperand(i: 1)); |
1902 | break; |
1903 | } |
1904 | if (!ResultReg) |
1905 | return false; |
1906 | |
1907 | updateValueMap(I, Reg: ResultReg); |
1908 | return true; |
1909 | } |
1910 | |
1911 | bool AArch64FastISel::selectLogicalOp(const Instruction *I) { |
1912 | MVT VT; |
1913 | if (!isTypeSupported(Ty: I->getType(), VT, /*IsVectorAllowed=*/true)) |
1914 | return false; |
1915 | |
1916 | if (VT.isVector()) |
1917 | return selectOperator(I, Opcode: I->getOpcode()); |
1918 | |
1919 | Register ResultReg; |
1920 | switch (I->getOpcode()) { |
1921 | default: |
1922 | llvm_unreachable("Unexpected instruction." ); |
1923 | case Instruction::And: |
1924 | ResultReg = emitLogicalOp(ISDOpc: ISD::AND, RetVT: VT, LHS: I->getOperand(i: 0), RHS: I->getOperand(i: 1)); |
1925 | break; |
1926 | case Instruction::Or: |
1927 | ResultReg = emitLogicalOp(ISDOpc: ISD::OR, RetVT: VT, LHS: I->getOperand(i: 0), RHS: I->getOperand(i: 1)); |
1928 | break; |
1929 | case Instruction::Xor: |
1930 | ResultReg = emitLogicalOp(ISDOpc: ISD::XOR, RetVT: VT, LHS: I->getOperand(i: 0), RHS: I->getOperand(i: 1)); |
1931 | break; |
1932 | } |
1933 | if (!ResultReg) |
1934 | return false; |
1935 | |
1936 | updateValueMap(I, Reg: ResultReg); |
1937 | return true; |
1938 | } |
1939 | |
1940 | bool AArch64FastISel::selectLoad(const Instruction *I) { |
1941 | MVT VT; |
1942 | // Verify we have a legal type before going any further. Currently, we handle |
1943 | // simple types that will directly fit in a register (i32/f32/i64/f64) or |
1944 | // those that can be sign or zero-extended to a basic operation (i1/i8/i16). |
1945 | if (!isTypeSupported(Ty: I->getType(), VT, /*IsVectorAllowed=*/true) || |
1946 | cast<LoadInst>(Val: I)->isAtomic()) |
1947 | return false; |
1948 | |
1949 | const Value *SV = I->getOperand(i: 0); |
1950 | if (TLI.supportSwiftError()) { |
1951 | // Swifterror values can come from either a function parameter with |
1952 | // swifterror attribute or an alloca with swifterror attribute. |
1953 | if (const Argument *Arg = dyn_cast<Argument>(Val: SV)) { |
1954 | if (Arg->hasSwiftErrorAttr()) |
1955 | return false; |
1956 | } |
1957 | |
1958 | if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(Val: SV)) { |
1959 | if (Alloca->isSwiftError()) |
1960 | return false; |
1961 | } |
1962 | } |
1963 | |
1964 | // See if we can handle this address. |
1965 | Address Addr; |
1966 | if (!computeAddress(Obj: I->getOperand(i: 0), Addr, Ty: I->getType())) |
1967 | return false; |
1968 | |
1969 | // Fold the following sign-/zero-extend into the load instruction. |
1970 | bool WantZExt = true; |
1971 | MVT RetVT = VT; |
1972 | const Value *IntExtVal = nullptr; |
1973 | if (I->hasOneUse()) { |
1974 | if (const auto *ZE = dyn_cast<ZExtInst>(Val: I->use_begin()->getUser())) { |
1975 | if (isTypeSupported(Ty: ZE->getType(), VT&: RetVT)) |
1976 | IntExtVal = ZE; |
1977 | else |
1978 | RetVT = VT; |
1979 | } else if (const auto *SE = dyn_cast<SExtInst>(Val: I->use_begin()->getUser())) { |
1980 | if (isTypeSupported(Ty: SE->getType(), VT&: RetVT)) |
1981 | IntExtVal = SE; |
1982 | else |
1983 | RetVT = VT; |
1984 | WantZExt = false; |
1985 | } |
1986 | } |
1987 | |
1988 | Register ResultReg = |
1989 | emitLoad(VT, RetVT, Addr, WantZExt, MMO: createMachineMemOperandFor(I)); |
1990 | if (!ResultReg) |
1991 | return false; |
1992 | |
1993 | // There are a few different cases we have to handle, because the load or the |
1994 | // sign-/zero-extend might not be selected by FastISel if we fall-back to |
1995 | // SelectionDAG. There is also an ordering issue when both instructions are in |
1996 | // different basic blocks. |
1997 | // 1.) The load instruction is selected by FastISel, but the integer extend |
1998 | // not. This usually happens when the integer extend is in a different |
1999 | // basic block and SelectionDAG took over for that basic block. |
2000 | // 2.) The load instruction is selected before the integer extend. This only |
2001 | // happens when the integer extend is in a different basic block. |
2002 | // 3.) The load instruction is selected by SelectionDAG and the integer extend |
2003 | // by FastISel. This happens if there are instructions between the load |
2004 | // and the integer extend that couldn't be selected by FastISel. |
2005 | if (IntExtVal) { |
2006 | // The integer extend hasn't been emitted yet. FastISel or SelectionDAG |
2007 | // could select it. Emit a copy to subreg if necessary. FastISel will remove |
2008 | // it when it selects the integer extend. |
2009 | Register Reg = lookUpRegForValue(V: IntExtVal); |
2010 | auto *MI = MRI.getUniqueVRegDef(Reg); |
2011 | if (!MI) { |
2012 | if (RetVT == MVT::i64 && VT <= MVT::i32) { |
2013 | if (WantZExt) { |
2014 | // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG). |
2015 | MachineBasicBlock::iterator I(std::prev(x: FuncInfo.InsertPt)); |
2016 | ResultReg = std::prev(x: I)->getOperand(i: 0).getReg(); |
2017 | removeDeadCode(I, E: std::next(x: I)); |
2018 | } else |
2019 | ResultReg = fastEmitInst_extractsubreg(RetVT: MVT::i32, Op0: ResultReg, |
2020 | Idx: AArch64::sub_32); |
2021 | } |
2022 | updateValueMap(I, Reg: ResultReg); |
2023 | return true; |
2024 | } |
2025 | |
2026 | // The integer extend has already been emitted - delete all the instructions |
2027 | // that have been emitted by the integer extend lowering code and use the |
2028 | // result from the load instruction directly. |
2029 | while (MI) { |
2030 | Reg = 0; |
2031 | for (auto &Opnd : MI->uses()) { |
2032 | if (Opnd.isReg()) { |
2033 | Reg = Opnd.getReg(); |
2034 | break; |
2035 | } |
2036 | } |
2037 | MachineBasicBlock::iterator I(MI); |
2038 | removeDeadCode(I, E: std::next(x: I)); |
2039 | MI = nullptr; |
2040 | if (Reg) |
2041 | MI = MRI.getUniqueVRegDef(Reg); |
2042 | } |
2043 | updateValueMap(I: IntExtVal, Reg: ResultReg); |
2044 | return true; |
2045 | } |
2046 | |
2047 | updateValueMap(I, Reg: ResultReg); |
2048 | return true; |
2049 | } |
2050 | |
2051 | bool AArch64FastISel::emitStoreRelease(MVT VT, Register SrcReg, |
2052 | Register AddrReg, |
2053 | MachineMemOperand *MMO) { |
2054 | unsigned Opc; |
2055 | switch (VT.SimpleTy) { |
2056 | default: return false; |
2057 | case MVT::i8: Opc = AArch64::STLRB; break; |
2058 | case MVT::i16: Opc = AArch64::STLRH; break; |
2059 | case MVT::i32: Opc = AArch64::STLRW; break; |
2060 | case MVT::i64: Opc = AArch64::STLRX; break; |
2061 | } |
2062 | |
2063 | const MCInstrDesc &II = TII.get(Opcode: Opc); |
2064 | SrcReg = constrainOperandRegClass(II, Op: SrcReg, OpNum: 0); |
2065 | AddrReg = constrainOperandRegClass(II, Op: AddrReg, OpNum: 1); |
2066 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II) |
2067 | .addReg(RegNo: SrcReg) |
2068 | .addReg(RegNo: AddrReg) |
2069 | .addMemOperand(MMO); |
2070 | return true; |
2071 | } |
2072 | |
2073 | bool AArch64FastISel::emitStore(MVT VT, Register SrcReg, Address Addr, |
2074 | MachineMemOperand *MMO) { |
2075 | if (!TLI.allowsMisalignedMemoryAccesses(VT)) |
2076 | return false; |
2077 | |
2078 | // Simplify this down to something we can handle. |
2079 | if (!simplifyAddress(Addr, VT)) |
2080 | return false; |
2081 | |
2082 | unsigned ScaleFactor = getImplicitScaleFactor(VT); |
2083 | if (!ScaleFactor) |
2084 | llvm_unreachable("Unexpected value type." ); |
2085 | |
2086 | // Negative offsets require unscaled, 9-bit, signed immediate offsets. |
2087 | // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets. |
2088 | bool UseScaled = true; |
2089 | if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) { |
2090 | UseScaled = false; |
2091 | ScaleFactor = 1; |
2092 | } |
2093 | |
2094 | static const unsigned OpcTable[4][6] = { |
2095 | { AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi, |
2096 | AArch64::STURSi, AArch64::STURDi }, |
2097 | { AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui, |
2098 | AArch64::STRSui, AArch64::STRDui }, |
2099 | { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX, |
2100 | AArch64::STRSroX, AArch64::STRDroX }, |
2101 | { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW, |
2102 | AArch64::STRSroW, AArch64::STRDroW } |
2103 | }; |
2104 | |
2105 | unsigned Opc; |
2106 | bool VTIsi1 = false; |
2107 | bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() && |
2108 | Addr.getOffsetReg(); |
2109 | unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0; |
2110 | if (Addr.getExtendType() == AArch64_AM::UXTW || |
2111 | Addr.getExtendType() == AArch64_AM::SXTW) |
2112 | Idx++; |
2113 | |
2114 | switch (VT.SimpleTy) { |
2115 | default: llvm_unreachable("Unexpected value type." ); |
2116 | case MVT::i1: VTIsi1 = true; [[fallthrough]]; |
2117 | case MVT::i8: Opc = OpcTable[Idx][0]; break; |
2118 | case MVT::i16: Opc = OpcTable[Idx][1]; break; |
2119 | case MVT::i32: Opc = OpcTable[Idx][2]; break; |
2120 | case MVT::i64: Opc = OpcTable[Idx][3]; break; |
2121 | case MVT::f32: Opc = OpcTable[Idx][4]; break; |
2122 | case MVT::f64: Opc = OpcTable[Idx][5]; break; |
2123 | } |
2124 | |
2125 | // Storing an i1 requires special handling. |
2126 | if (VTIsi1 && SrcReg != AArch64::WZR) { |
2127 | Register ANDReg = emitAnd_ri(RetVT: MVT::i32, LHSReg: SrcReg, Imm: 1); |
2128 | assert(ANDReg && "Unexpected AND instruction emission failure." ); |
2129 | SrcReg = ANDReg; |
2130 | } |
2131 | // Create the base instruction, then add the operands. |
2132 | const MCInstrDesc &II = TII.get(Opcode: Opc); |
2133 | SrcReg = constrainOperandRegClass(II, Op: SrcReg, OpNum: II.getNumDefs()); |
2134 | MachineInstrBuilder MIB = |
2135 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II).addReg(RegNo: SrcReg); |
2136 | addLoadStoreOperands(Addr, MIB, Flags: MachineMemOperand::MOStore, ScaleFactor, MMO); |
2137 | |
2138 | return true; |
2139 | } |
2140 | |
2141 | bool AArch64FastISel::selectStore(const Instruction *I) { |
2142 | MVT VT; |
2143 | const Value *Op0 = I->getOperand(i: 0); |
2144 | // Verify we have a legal type before going any further. Currently, we handle |
2145 | // simple types that will directly fit in a register (i32/f32/i64/f64) or |
2146 | // those that can be sign or zero-extended to a basic operation (i1/i8/i16). |
2147 | if (!isTypeSupported(Ty: Op0->getType(), VT, /*IsVectorAllowed=*/true)) |
2148 | return false; |
2149 | |
2150 | const Value *PtrV = I->getOperand(i: 1); |
2151 | if (TLI.supportSwiftError()) { |
2152 | // Swifterror values can come from either a function parameter with |
2153 | // swifterror attribute or an alloca with swifterror attribute. |
2154 | if (const Argument *Arg = dyn_cast<Argument>(Val: PtrV)) { |
2155 | if (Arg->hasSwiftErrorAttr()) |
2156 | return false; |
2157 | } |
2158 | |
2159 | if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(Val: PtrV)) { |
2160 | if (Alloca->isSwiftError()) |
2161 | return false; |
2162 | } |
2163 | } |
2164 | |
2165 | // Get the value to be stored into a register. Use the zero register directly |
2166 | // when possible to avoid an unnecessary copy and a wasted register. |
2167 | Register SrcReg; |
2168 | if (const auto *CI = dyn_cast<ConstantInt>(Val: Op0)) { |
2169 | if (CI->isZero()) |
2170 | SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; |
2171 | } else if (const auto *CF = dyn_cast<ConstantFP>(Val: Op0)) { |
2172 | if (CF->isZero() && !CF->isNegative()) { |
2173 | VT = MVT::getIntegerVT(BitWidth: VT.getSizeInBits()); |
2174 | SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; |
2175 | } |
2176 | } |
2177 | |
2178 | if (!SrcReg) |
2179 | SrcReg = getRegForValue(V: Op0); |
2180 | |
2181 | if (!SrcReg) |
2182 | return false; |
2183 | |
2184 | auto *SI = cast<StoreInst>(Val: I); |
2185 | |
2186 | // Try to emit a STLR for seq_cst/release. |
2187 | if (SI->isAtomic()) { |
2188 | AtomicOrdering Ord = SI->getOrdering(); |
2189 | // The non-atomic instructions are sufficient for relaxed stores. |
2190 | if (isReleaseOrStronger(AO: Ord)) { |
2191 | // The STLR addressing mode only supports a base reg; pass that directly. |
2192 | Register AddrReg = getRegForValue(V: PtrV); |
2193 | if (!AddrReg) |
2194 | return false; |
2195 | return emitStoreRelease(VT, SrcReg, AddrReg, |
2196 | MMO: createMachineMemOperandFor(I)); |
2197 | } |
2198 | } |
2199 | |
2200 | // See if we can handle this address. |
2201 | Address Addr; |
2202 | if (!computeAddress(Obj: PtrV, Addr, Ty: Op0->getType())) |
2203 | return false; |
2204 | |
2205 | if (!emitStore(VT, SrcReg, Addr, MMO: createMachineMemOperandFor(I))) |
2206 | return false; |
2207 | return true; |
2208 | } |
2209 | |
2210 | static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) { |
2211 | switch (Pred) { |
2212 | case CmpInst::FCMP_ONE: |
2213 | case CmpInst::FCMP_UEQ: |
2214 | default: |
2215 | // AL is our "false" for now. The other two need more compares. |
2216 | return AArch64CC::AL; |
2217 | case CmpInst::ICMP_EQ: |
2218 | case CmpInst::FCMP_OEQ: |
2219 | return AArch64CC::EQ; |
2220 | case CmpInst::ICMP_SGT: |
2221 | case CmpInst::FCMP_OGT: |
2222 | return AArch64CC::GT; |
2223 | case CmpInst::ICMP_SGE: |
2224 | case CmpInst::FCMP_OGE: |
2225 | return AArch64CC::GE; |
2226 | case CmpInst::ICMP_UGT: |
2227 | case CmpInst::FCMP_UGT: |
2228 | return AArch64CC::HI; |
2229 | case CmpInst::FCMP_OLT: |
2230 | return AArch64CC::MI; |
2231 | case CmpInst::ICMP_ULE: |
2232 | case CmpInst::FCMP_OLE: |
2233 | return AArch64CC::LS; |
2234 | case CmpInst::FCMP_ORD: |
2235 | return AArch64CC::VC; |
2236 | case CmpInst::FCMP_UNO: |
2237 | return AArch64CC::VS; |
2238 | case CmpInst::FCMP_UGE: |
2239 | return AArch64CC::PL; |
2240 | case CmpInst::ICMP_SLT: |
2241 | case CmpInst::FCMP_ULT: |
2242 | return AArch64CC::LT; |
2243 | case CmpInst::ICMP_SLE: |
2244 | case CmpInst::FCMP_ULE: |
2245 | return AArch64CC::LE; |
2246 | case CmpInst::FCMP_UNE: |
2247 | case CmpInst::ICMP_NE: |
2248 | return AArch64CC::NE; |
2249 | case CmpInst::ICMP_UGE: |
2250 | return AArch64CC::HS; |
2251 | case CmpInst::ICMP_ULT: |
2252 | return AArch64CC::LO; |
2253 | } |
2254 | } |
2255 | |
2256 | /// Try to emit a combined compare-and-branch instruction. |
2257 | bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) { |
2258 | // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions |
2259 | // will not be produced, as they are conditional branch instructions that do |
2260 | // not set flags. |
2261 | if (FuncInfo.MF->getFunction().hasFnAttribute( |
2262 | Kind: Attribute::SpeculativeLoadHardening)) |
2263 | return false; |
2264 | |
2265 | assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction" ); |
2266 | const CmpInst *CI = cast<CmpInst>(Val: BI->getCondition()); |
2267 | CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); |
2268 | |
2269 | const Value *LHS = CI->getOperand(i_nocapture: 0); |
2270 | const Value *RHS = CI->getOperand(i_nocapture: 1); |
2271 | |
2272 | MVT VT; |
2273 | if (!isTypeSupported(Ty: LHS->getType(), VT)) |
2274 | return false; |
2275 | |
2276 | unsigned BW = VT.getSizeInBits(); |
2277 | if (BW > 64) |
2278 | return false; |
2279 | |
2280 | MachineBasicBlock *TBB = FuncInfo.getMBB(BB: BI->getSuccessor(i: 0)); |
2281 | MachineBasicBlock *FBB = FuncInfo.getMBB(BB: BI->getSuccessor(i: 1)); |
2282 | |
2283 | // Try to take advantage of fallthrough opportunities. |
2284 | if (FuncInfo.MBB->isLayoutSuccessor(MBB: TBB)) { |
2285 | std::swap(a&: TBB, b&: FBB); |
2286 | Predicate = CmpInst::getInversePredicate(pred: Predicate); |
2287 | } |
2288 | |
2289 | int TestBit = -1; |
2290 | bool IsCmpNE; |
2291 | switch (Predicate) { |
2292 | default: |
2293 | return false; |
2294 | case CmpInst::ICMP_EQ: |
2295 | case CmpInst::ICMP_NE: |
2296 | if (isa<Constant>(Val: LHS) && cast<Constant>(Val: LHS)->isNullValue()) |
2297 | std::swap(a&: LHS, b&: RHS); |
2298 | |
2299 | if (!isa<Constant>(Val: RHS) || !cast<Constant>(Val: RHS)->isNullValue()) |
2300 | return false; |
2301 | |
2302 | if (const auto *AI = dyn_cast<BinaryOperator>(Val: LHS)) |
2303 | if (AI->getOpcode() == Instruction::And && isValueAvailable(V: AI)) { |
2304 | const Value *AndLHS = AI->getOperand(i_nocapture: 0); |
2305 | const Value *AndRHS = AI->getOperand(i_nocapture: 1); |
2306 | |
2307 | if (const auto *C = dyn_cast<ConstantInt>(Val: AndLHS)) |
2308 | if (C->getValue().isPowerOf2()) |
2309 | std::swap(a&: AndLHS, b&: AndRHS); |
2310 | |
2311 | if (const auto *C = dyn_cast<ConstantInt>(Val: AndRHS)) |
2312 | if (C->getValue().isPowerOf2()) { |
2313 | TestBit = C->getValue().logBase2(); |
2314 | LHS = AndLHS; |
2315 | } |
2316 | } |
2317 | |
2318 | if (VT == MVT::i1) |
2319 | TestBit = 0; |
2320 | |
2321 | IsCmpNE = Predicate == CmpInst::ICMP_NE; |
2322 | break; |
2323 | case CmpInst::ICMP_SLT: |
2324 | case CmpInst::ICMP_SGE: |
2325 | if (!isa<Constant>(Val: RHS) || !cast<Constant>(Val: RHS)->isNullValue()) |
2326 | return false; |
2327 | |
2328 | TestBit = BW - 1; |
2329 | IsCmpNE = Predicate == CmpInst::ICMP_SLT; |
2330 | break; |
2331 | case CmpInst::ICMP_SGT: |
2332 | case CmpInst::ICMP_SLE: |
2333 | if (!isa<ConstantInt>(Val: RHS)) |
2334 | return false; |
2335 | |
2336 | if (cast<ConstantInt>(Val: RHS)->getValue() != APInt(BW, -1, true)) |
2337 | return false; |
2338 | |
2339 | TestBit = BW - 1; |
2340 | IsCmpNE = Predicate == CmpInst::ICMP_SLE; |
2341 | break; |
2342 | } // end switch |
2343 | |
2344 | static const unsigned OpcTable[2][2][2] = { |
2345 | { {AArch64::CBZW, AArch64::CBZX }, |
2346 | {AArch64::CBNZW, AArch64::CBNZX} }, |
2347 | { {AArch64::TBZW, AArch64::TBZX }, |
2348 | {AArch64::TBNZW, AArch64::TBNZX} } |
2349 | }; |
2350 | |
2351 | bool IsBitTest = TestBit != -1; |
2352 | bool Is64Bit = BW == 64; |
2353 | if (TestBit < 32 && TestBit >= 0) |
2354 | Is64Bit = false; |
2355 | |
2356 | unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit]; |
2357 | const MCInstrDesc &II = TII.get(Opcode: Opc); |
2358 | |
2359 | Register SrcReg = getRegForValue(V: LHS); |
2360 | if (!SrcReg) |
2361 | return false; |
2362 | |
2363 | if (BW == 64 && !Is64Bit) |
2364 | SrcReg = fastEmitInst_extractsubreg(RetVT: MVT::i32, Op0: SrcReg, Idx: AArch64::sub_32); |
2365 | |
2366 | if ((BW < 32) && !IsBitTest) |
2367 | SrcReg = emitIntExt(SrcVT: VT, SrcReg, DestVT: MVT::i32, /*isZExt=*/true); |
2368 | |
2369 | // Emit the combined compare and branch instruction. |
2370 | SrcReg = constrainOperandRegClass(II, Op: SrcReg, OpNum: II.getNumDefs()); |
2371 | MachineInstrBuilder MIB = |
2372 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc)) |
2373 | .addReg(RegNo: SrcReg); |
2374 | if (IsBitTest) |
2375 | MIB.addImm(Val: TestBit); |
2376 | MIB.addMBB(MBB: TBB); |
2377 | |
2378 | finishCondBranch(BranchBB: BI->getParent(), TrueMBB: TBB, FalseMBB: FBB); |
2379 | return true; |
2380 | } |
2381 | |
2382 | bool AArch64FastISel::selectBranch(const Instruction *I) { |
2383 | const BranchInst *BI = cast<BranchInst>(Val: I); |
2384 | if (BI->isUnconditional()) { |
2385 | MachineBasicBlock *MSucc = FuncInfo.getMBB(BB: BI->getSuccessor(i: 0)); |
2386 | fastEmitBranch(MSucc, DbgLoc: BI->getDebugLoc()); |
2387 | return true; |
2388 | } |
2389 | |
2390 | MachineBasicBlock *TBB = FuncInfo.getMBB(BB: BI->getSuccessor(i: 0)); |
2391 | MachineBasicBlock *FBB = FuncInfo.getMBB(BB: BI->getSuccessor(i: 1)); |
2392 | |
2393 | if (const CmpInst *CI = dyn_cast<CmpInst>(Val: BI->getCondition())) { |
2394 | if (CI->hasOneUse() && isValueAvailable(V: CI)) { |
2395 | // Try to optimize or fold the cmp. |
2396 | CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); |
2397 | switch (Predicate) { |
2398 | default: |
2399 | break; |
2400 | case CmpInst::FCMP_FALSE: |
2401 | fastEmitBranch(MSucc: FBB, DbgLoc: MIMD.getDL()); |
2402 | return true; |
2403 | case CmpInst::FCMP_TRUE: |
2404 | fastEmitBranch(MSucc: TBB, DbgLoc: MIMD.getDL()); |
2405 | return true; |
2406 | } |
2407 | |
2408 | // Try to emit a combined compare-and-branch first. |
2409 | if (emitCompareAndBranch(BI)) |
2410 | return true; |
2411 | |
2412 | // Try to take advantage of fallthrough opportunities. |
2413 | if (FuncInfo.MBB->isLayoutSuccessor(MBB: TBB)) { |
2414 | std::swap(a&: TBB, b&: FBB); |
2415 | Predicate = CmpInst::getInversePredicate(pred: Predicate); |
2416 | } |
2417 | |
2418 | // Emit the cmp. |
2419 | if (!emitCmp(LHS: CI->getOperand(i_nocapture: 0), RHS: CI->getOperand(i_nocapture: 1), IsZExt: CI->isUnsigned())) |
2420 | return false; |
2421 | |
2422 | // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch |
2423 | // instruction. |
2424 | AArch64CC::CondCode CC = getCompareCC(Pred: Predicate); |
2425 | AArch64CC::CondCode = AArch64CC::AL; |
2426 | switch (Predicate) { |
2427 | default: |
2428 | break; |
2429 | case CmpInst::FCMP_UEQ: |
2430 | ExtraCC = AArch64CC::EQ; |
2431 | CC = AArch64CC::VS; |
2432 | break; |
2433 | case CmpInst::FCMP_ONE: |
2434 | ExtraCC = AArch64CC::MI; |
2435 | CC = AArch64CC::GT; |
2436 | break; |
2437 | } |
2438 | assert((CC != AArch64CC::AL) && "Unexpected condition code." ); |
2439 | |
2440 | // Emit the extra branch for FCMP_UEQ and FCMP_ONE. |
2441 | if (ExtraCC != AArch64CC::AL) { |
2442 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AArch64::Bcc)) |
2443 | .addImm(Val: ExtraCC) |
2444 | .addMBB(MBB: TBB); |
2445 | } |
2446 | |
2447 | // Emit the branch. |
2448 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AArch64::Bcc)) |
2449 | .addImm(Val: CC) |
2450 | .addMBB(MBB: TBB); |
2451 | |
2452 | finishCondBranch(BranchBB: BI->getParent(), TrueMBB: TBB, FalseMBB: FBB); |
2453 | return true; |
2454 | } |
2455 | } else if (const auto *CI = dyn_cast<ConstantInt>(Val: BI->getCondition())) { |
2456 | uint64_t Imm = CI->getZExtValue(); |
2457 | MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB; |
2458 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AArch64::B)) |
2459 | .addMBB(MBB: Target); |
2460 | |
2461 | // Obtain the branch probability and add the target to the successor list. |
2462 | if (FuncInfo.BPI) { |
2463 | auto BranchProbability = FuncInfo.BPI->getEdgeProbability( |
2464 | Src: BI->getParent(), Dst: Target->getBasicBlock()); |
2465 | FuncInfo.MBB->addSuccessor(Succ: Target, Prob: BranchProbability); |
2466 | } else |
2467 | FuncInfo.MBB->addSuccessorWithoutProb(Succ: Target); |
2468 | return true; |
2469 | } else { |
2470 | AArch64CC::CondCode CC = AArch64CC::NE; |
2471 | if (foldXALUIntrinsic(CC, I, Cond: BI->getCondition())) { |
2472 | // Fake request the condition, otherwise the intrinsic might be completely |
2473 | // optimized away. |
2474 | Register CondReg = getRegForValue(V: BI->getCondition()); |
2475 | if (!CondReg) |
2476 | return false; |
2477 | |
2478 | // Emit the branch. |
2479 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AArch64::Bcc)) |
2480 | .addImm(Val: CC) |
2481 | .addMBB(MBB: TBB); |
2482 | |
2483 | finishCondBranch(BranchBB: BI->getParent(), TrueMBB: TBB, FalseMBB: FBB); |
2484 | return true; |
2485 | } |
2486 | } |
2487 | |
2488 | Register CondReg = getRegForValue(V: BI->getCondition()); |
2489 | if (!CondReg) |
2490 | return false; |
2491 | |
2492 | // i1 conditions come as i32 values, test the lowest bit with tb(n)z. |
2493 | unsigned Opcode = AArch64::TBNZW; |
2494 | if (FuncInfo.MBB->isLayoutSuccessor(MBB: TBB)) { |
2495 | std::swap(a&: TBB, b&: FBB); |
2496 | Opcode = AArch64::TBZW; |
2497 | } |
2498 | |
2499 | const MCInstrDesc &II = TII.get(Opcode); |
2500 | Register ConstrainedCondReg |
2501 | = constrainOperandRegClass(II, Op: CondReg, OpNum: II.getNumDefs()); |
2502 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II) |
2503 | .addReg(RegNo: ConstrainedCondReg) |
2504 | .addImm(Val: 0) |
2505 | .addMBB(MBB: TBB); |
2506 | |
2507 | finishCondBranch(BranchBB: BI->getParent(), TrueMBB: TBB, FalseMBB: FBB); |
2508 | return true; |
2509 | } |
2510 | |
2511 | bool AArch64FastISel::selectIndirectBr(const Instruction *I) { |
2512 | const IndirectBrInst *BI = cast<IndirectBrInst>(Val: I); |
2513 | Register AddrReg = getRegForValue(V: BI->getOperand(i_nocapture: 0)); |
2514 | if (!AddrReg) |
2515 | return false; |
2516 | |
2517 | // Authenticated indirectbr is not implemented yet. |
2518 | if (FuncInfo.MF->getFunction().hasFnAttribute(Kind: "ptrauth-indirect-gotos" )) |
2519 | return false; |
2520 | |
2521 | // Emit the indirect branch. |
2522 | const MCInstrDesc &II = TII.get(Opcode: AArch64::BR); |
2523 | AddrReg = constrainOperandRegClass(II, Op: AddrReg, OpNum: II.getNumDefs()); |
2524 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II).addReg(RegNo: AddrReg); |
2525 | |
2526 | // Make sure the CFG is up-to-date. |
2527 | for (const auto *Succ : BI->successors()) |
2528 | FuncInfo.MBB->addSuccessor(Succ: FuncInfo.getMBB(BB: Succ)); |
2529 | |
2530 | return true; |
2531 | } |
2532 | |
2533 | bool AArch64FastISel::selectCmp(const Instruction *I) { |
2534 | const CmpInst *CI = cast<CmpInst>(Val: I); |
2535 | |
2536 | // Vectors of i1 are weird: bail out. |
2537 | if (CI->getType()->isVectorTy()) |
2538 | return false; |
2539 | |
2540 | // Try to optimize or fold the cmp. |
2541 | CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); |
2542 | Register ResultReg; |
2543 | switch (Predicate) { |
2544 | default: |
2545 | break; |
2546 | case CmpInst::FCMP_FALSE: |
2547 | ResultReg = createResultReg(RC: &AArch64::GPR32RegClass); |
2548 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, |
2549 | MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: ResultReg) |
2550 | .addReg(RegNo: AArch64::WZR, flags: getKillRegState(B: true)); |
2551 | break; |
2552 | case CmpInst::FCMP_TRUE: |
2553 | ResultReg = fastEmit_i(VT: MVT::i32, RetVT: MVT::i32, Opcode: ISD::Constant, imm0: 1); |
2554 | break; |
2555 | } |
2556 | |
2557 | if (ResultReg) { |
2558 | updateValueMap(I, Reg: ResultReg); |
2559 | return true; |
2560 | } |
2561 | |
2562 | // Emit the cmp. |
2563 | if (!emitCmp(LHS: CI->getOperand(i_nocapture: 0), RHS: CI->getOperand(i_nocapture: 1), IsZExt: CI->isUnsigned())) |
2564 | return false; |
2565 | |
2566 | ResultReg = createResultReg(RC: &AArch64::GPR32RegClass); |
2567 | |
2568 | // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These |
2569 | // condition codes are inverted, because they are used by CSINC. |
2570 | static unsigned CondCodeTable[2][2] = { |
2571 | { AArch64CC::NE, AArch64CC::VC }, |
2572 | { AArch64CC::PL, AArch64CC::LE } |
2573 | }; |
2574 | unsigned *CondCodes = nullptr; |
2575 | switch (Predicate) { |
2576 | default: |
2577 | break; |
2578 | case CmpInst::FCMP_UEQ: |
2579 | CondCodes = &CondCodeTable[0][0]; |
2580 | break; |
2581 | case CmpInst::FCMP_ONE: |
2582 | CondCodes = &CondCodeTable[1][0]; |
2583 | break; |
2584 | } |
2585 | |
2586 | if (CondCodes) { |
2587 | Register TmpReg1 = createResultReg(RC: &AArch64::GPR32RegClass); |
2588 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AArch64::CSINCWr), |
2589 | DestReg: TmpReg1) |
2590 | .addReg(RegNo: AArch64::WZR, flags: getKillRegState(B: true)) |
2591 | .addReg(RegNo: AArch64::WZR, flags: getKillRegState(B: true)) |
2592 | .addImm(Val: CondCodes[0]); |
2593 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AArch64::CSINCWr), |
2594 | DestReg: ResultReg) |
2595 | .addReg(RegNo: TmpReg1, flags: getKillRegState(B: true)) |
2596 | .addReg(RegNo: AArch64::WZR, flags: getKillRegState(B: true)) |
2597 | .addImm(Val: CondCodes[1]); |
2598 | |
2599 | updateValueMap(I, Reg: ResultReg); |
2600 | return true; |
2601 | } |
2602 | |
2603 | // Now set a register based on the comparison. |
2604 | AArch64CC::CondCode CC = getCompareCC(Pred: Predicate); |
2605 | assert((CC != AArch64CC::AL) && "Unexpected condition code." ); |
2606 | AArch64CC::CondCode invertedCC = getInvertedCondCode(Code: CC); |
2607 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AArch64::CSINCWr), |
2608 | DestReg: ResultReg) |
2609 | .addReg(RegNo: AArch64::WZR, flags: getKillRegState(B: true)) |
2610 | .addReg(RegNo: AArch64::WZR, flags: getKillRegState(B: true)) |
2611 | .addImm(Val: invertedCC); |
2612 | |
2613 | updateValueMap(I, Reg: ResultReg); |
2614 | return true; |
2615 | } |
2616 | |
2617 | /// Optimize selects of i1 if one of the operands has a 'true' or 'false' |
2618 | /// value. |
2619 | bool AArch64FastISel::optimizeSelect(const SelectInst *SI) { |
2620 | if (!SI->getType()->isIntegerTy(Bitwidth: 1)) |
2621 | return false; |
2622 | |
2623 | const Value *Src1Val, *Src2Val; |
2624 | unsigned Opc = 0; |
2625 | bool = false; |
2626 | if (auto *CI = dyn_cast<ConstantInt>(Val: SI->getTrueValue())) { |
2627 | if (CI->isOne()) { |
2628 | Src1Val = SI->getCondition(); |
2629 | Src2Val = SI->getFalseValue(); |
2630 | Opc = AArch64::ORRWrr; |
2631 | } else { |
2632 | assert(CI->isZero()); |
2633 | Src1Val = SI->getFalseValue(); |
2634 | Src2Val = SI->getCondition(); |
2635 | Opc = AArch64::BICWrr; |
2636 | } |
2637 | } else if (auto *CI = dyn_cast<ConstantInt>(Val: SI->getFalseValue())) { |
2638 | if (CI->isOne()) { |
2639 | Src1Val = SI->getCondition(); |
2640 | Src2Val = SI->getTrueValue(); |
2641 | Opc = AArch64::ORRWrr; |
2642 | NeedExtraOp = true; |
2643 | } else { |
2644 | assert(CI->isZero()); |
2645 | Src1Val = SI->getCondition(); |
2646 | Src2Val = SI->getTrueValue(); |
2647 | Opc = AArch64::ANDWrr; |
2648 | } |
2649 | } |
2650 | |
2651 | if (!Opc) |
2652 | return false; |
2653 | |
2654 | Register Src1Reg = getRegForValue(V: Src1Val); |
2655 | if (!Src1Reg) |
2656 | return false; |
2657 | |
2658 | Register Src2Reg = getRegForValue(V: Src2Val); |
2659 | if (!Src2Reg) |
2660 | return false; |
2661 | |
2662 | if (NeedExtraOp) |
2663 | Src1Reg = emitLogicalOp_ri(ISDOpc: ISD::XOR, RetVT: MVT::i32, LHSReg: Src1Reg, Imm: 1); |
2664 | |
2665 | Register ResultReg = fastEmitInst_rr(MachineInstOpcode: Opc, RC: &AArch64::GPR32RegClass, Op0: Src1Reg, |
2666 | Op1: Src2Reg); |
2667 | updateValueMap(I: SI, Reg: ResultReg); |
2668 | return true; |
2669 | } |
2670 | |
2671 | bool AArch64FastISel::selectSelect(const Instruction *I) { |
2672 | assert(isa<SelectInst>(I) && "Expected a select instruction." ); |
2673 | MVT VT; |
2674 | if (!isTypeSupported(Ty: I->getType(), VT)) |
2675 | return false; |
2676 | |
2677 | unsigned Opc; |
2678 | const TargetRegisterClass *RC; |
2679 | switch (VT.SimpleTy) { |
2680 | default: |
2681 | return false; |
2682 | case MVT::i1: |
2683 | case MVT::i8: |
2684 | case MVT::i16: |
2685 | case MVT::i32: |
2686 | Opc = AArch64::CSELWr; |
2687 | RC = &AArch64::GPR32RegClass; |
2688 | break; |
2689 | case MVT::i64: |
2690 | Opc = AArch64::CSELXr; |
2691 | RC = &AArch64::GPR64RegClass; |
2692 | break; |
2693 | case MVT::f32: |
2694 | Opc = AArch64::FCSELSrrr; |
2695 | RC = &AArch64::FPR32RegClass; |
2696 | break; |
2697 | case MVT::f64: |
2698 | Opc = AArch64::FCSELDrrr; |
2699 | RC = &AArch64::FPR64RegClass; |
2700 | break; |
2701 | } |
2702 | |
2703 | const SelectInst *SI = cast<SelectInst>(Val: I); |
2704 | const Value *Cond = SI->getCondition(); |
2705 | AArch64CC::CondCode CC = AArch64CC::NE; |
2706 | AArch64CC::CondCode = AArch64CC::AL; |
2707 | |
2708 | if (optimizeSelect(SI)) |
2709 | return true; |
2710 | |
2711 | // Try to pickup the flags, so we don't have to emit another compare. |
2712 | if (foldXALUIntrinsic(CC, I, Cond)) { |
2713 | // Fake request the condition to force emission of the XALU intrinsic. |
2714 | Register CondReg = getRegForValue(V: Cond); |
2715 | if (!CondReg) |
2716 | return false; |
2717 | } else if (isa<CmpInst>(Val: Cond) && cast<CmpInst>(Val: Cond)->hasOneUse() && |
2718 | isValueAvailable(V: Cond)) { |
2719 | const auto *Cmp = cast<CmpInst>(Val: Cond); |
2720 | // Try to optimize or fold the cmp. |
2721 | CmpInst::Predicate Predicate = optimizeCmpPredicate(CI: Cmp); |
2722 | const Value *FoldSelect = nullptr; |
2723 | switch (Predicate) { |
2724 | default: |
2725 | break; |
2726 | case CmpInst::FCMP_FALSE: |
2727 | FoldSelect = SI->getFalseValue(); |
2728 | break; |
2729 | case CmpInst::FCMP_TRUE: |
2730 | FoldSelect = SI->getTrueValue(); |
2731 | break; |
2732 | } |
2733 | |
2734 | if (FoldSelect) { |
2735 | Register SrcReg = getRegForValue(V: FoldSelect); |
2736 | if (!SrcReg) |
2737 | return false; |
2738 | |
2739 | updateValueMap(I, Reg: SrcReg); |
2740 | return true; |
2741 | } |
2742 | |
2743 | // Emit the cmp. |
2744 | if (!emitCmp(LHS: Cmp->getOperand(i_nocapture: 0), RHS: Cmp->getOperand(i_nocapture: 1), IsZExt: Cmp->isUnsigned())) |
2745 | return false; |
2746 | |
2747 | // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction. |
2748 | CC = getCompareCC(Pred: Predicate); |
2749 | switch (Predicate) { |
2750 | default: |
2751 | break; |
2752 | case CmpInst::FCMP_UEQ: |
2753 | ExtraCC = AArch64CC::EQ; |
2754 | CC = AArch64CC::VS; |
2755 | break; |
2756 | case CmpInst::FCMP_ONE: |
2757 | ExtraCC = AArch64CC::MI; |
2758 | CC = AArch64CC::GT; |
2759 | break; |
2760 | } |
2761 | assert((CC != AArch64CC::AL) && "Unexpected condition code." ); |
2762 | } else { |
2763 | Register CondReg = getRegForValue(V: Cond); |
2764 | if (!CondReg) |
2765 | return false; |
2766 | |
2767 | const MCInstrDesc &II = TII.get(Opcode: AArch64::ANDSWri); |
2768 | CondReg = constrainOperandRegClass(II, Op: CondReg, OpNum: 1); |
2769 | |
2770 | // Emit a TST instruction (ANDS wzr, reg, #imm). |
2771 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II, |
2772 | DestReg: AArch64::WZR) |
2773 | .addReg(RegNo: CondReg) |
2774 | .addImm(Val: AArch64_AM::encodeLogicalImmediate(imm: 1, regSize: 32)); |
2775 | } |
2776 | |
2777 | Register Src1Reg = getRegForValue(V: SI->getTrueValue()); |
2778 | Register Src2Reg = getRegForValue(V: SI->getFalseValue()); |
2779 | |
2780 | if (!Src1Reg || !Src2Reg) |
2781 | return false; |
2782 | |
2783 | if (ExtraCC != AArch64CC::AL) |
2784 | Src2Reg = fastEmitInst_rri(MachineInstOpcode: Opc, RC, Op0: Src1Reg, Op1: Src2Reg, Imm: ExtraCC); |
2785 | |
2786 | Register ResultReg = fastEmitInst_rri(MachineInstOpcode: Opc, RC, Op0: Src1Reg, Op1: Src2Reg, Imm: CC); |
2787 | updateValueMap(I, Reg: ResultReg); |
2788 | return true; |
2789 | } |
2790 | |
2791 | bool AArch64FastISel::selectFPExt(const Instruction *I) { |
2792 | Value *V = I->getOperand(i: 0); |
2793 | if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy()) |
2794 | return false; |
2795 | |
2796 | Register Op = getRegForValue(V); |
2797 | if (Op == 0) |
2798 | return false; |
2799 | |
2800 | Register ResultReg = createResultReg(RC: &AArch64::FPR64RegClass); |
2801 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AArch64::FCVTDSr), |
2802 | DestReg: ResultReg).addReg(RegNo: Op); |
2803 | updateValueMap(I, Reg: ResultReg); |
2804 | return true; |
2805 | } |
2806 | |
2807 | bool AArch64FastISel::selectFPTrunc(const Instruction *I) { |
2808 | Value *V = I->getOperand(i: 0); |
2809 | if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy()) |
2810 | return false; |
2811 | |
2812 | Register Op = getRegForValue(V); |
2813 | if (Op == 0) |
2814 | return false; |
2815 | |
2816 | Register ResultReg = createResultReg(RC: &AArch64::FPR32RegClass); |
2817 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AArch64::FCVTSDr), |
2818 | DestReg: ResultReg).addReg(RegNo: Op); |
2819 | updateValueMap(I, Reg: ResultReg); |
2820 | return true; |
2821 | } |
2822 | |
2823 | // FPToUI and FPToSI |
2824 | bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) { |
2825 | MVT DestVT; |
2826 | if (!isTypeLegal(Ty: I->getType(), VT&: DestVT) || DestVT.isVector()) |
2827 | return false; |
2828 | |
2829 | Register SrcReg = getRegForValue(V: I->getOperand(i: 0)); |
2830 | if (!SrcReg) |
2831 | return false; |
2832 | |
2833 | EVT SrcVT = TLI.getValueType(DL, Ty: I->getOperand(i: 0)->getType(), AllowUnknown: true); |
2834 | if (SrcVT == MVT::f128 || SrcVT == MVT::f16 || SrcVT == MVT::bf16) |
2835 | return false; |
2836 | |
2837 | unsigned Opc; |
2838 | if (SrcVT == MVT::f64) { |
2839 | if (Signed) |
2840 | Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr; |
2841 | else |
2842 | Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr; |
2843 | } else { |
2844 | if (Signed) |
2845 | Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr; |
2846 | else |
2847 | Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr; |
2848 | } |
2849 | Register ResultReg = createResultReg( |
2850 | RC: DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass); |
2851 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg: ResultReg) |
2852 | .addReg(RegNo: SrcReg); |
2853 | updateValueMap(I, Reg: ResultReg); |
2854 | return true; |
2855 | } |
2856 | |
2857 | bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) { |
2858 | MVT DestVT; |
2859 | if (!isTypeLegal(Ty: I->getType(), VT&: DestVT) || DestVT.isVector()) |
2860 | return false; |
2861 | // Let regular ISEL handle FP16 |
2862 | if (DestVT == MVT::f16 || DestVT == MVT::bf16) |
2863 | return false; |
2864 | |
2865 | assert((DestVT == MVT::f32 || DestVT == MVT::f64) && |
2866 | "Unexpected value type." ); |
2867 | |
2868 | Register SrcReg = getRegForValue(V: I->getOperand(i: 0)); |
2869 | if (!SrcReg) |
2870 | return false; |
2871 | |
2872 | EVT SrcVT = TLI.getValueType(DL, Ty: I->getOperand(i: 0)->getType(), AllowUnknown: true); |
2873 | |
2874 | // Handle sign-extension. |
2875 | if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) { |
2876 | SrcReg = |
2877 | emitIntExt(SrcVT: SrcVT.getSimpleVT(), SrcReg, DestVT: MVT::i32, /*isZExt*/ !Signed); |
2878 | if (!SrcReg) |
2879 | return false; |
2880 | } |
2881 | |
2882 | unsigned Opc; |
2883 | if (SrcVT == MVT::i64) { |
2884 | if (Signed) |
2885 | Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri; |
2886 | else |
2887 | Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri; |
2888 | } else { |
2889 | if (Signed) |
2890 | Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri; |
2891 | else |
2892 | Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri; |
2893 | } |
2894 | |
2895 | Register ResultReg = fastEmitInst_r(MachineInstOpcode: Opc, RC: TLI.getRegClassFor(VT: DestVT), Op0: SrcReg); |
2896 | updateValueMap(I, Reg: ResultReg); |
2897 | return true; |
2898 | } |
2899 | |
2900 | bool AArch64FastISel::fastLowerArguments() { |
2901 | if (!FuncInfo.CanLowerReturn) |
2902 | return false; |
2903 | |
2904 | const Function *F = FuncInfo.Fn; |
2905 | if (F->isVarArg()) |
2906 | return false; |
2907 | |
2908 | CallingConv::ID CC = F->getCallingConv(); |
2909 | if (CC != CallingConv::C && CC != CallingConv::Swift) |
2910 | return false; |
2911 | |
2912 | if (Subtarget->hasCustomCallingConv()) |
2913 | return false; |
2914 | |
2915 | // Only handle simple cases of up to 8 GPR and FPR each. |
2916 | unsigned GPRCnt = 0; |
2917 | unsigned FPRCnt = 0; |
2918 | for (auto const &Arg : F->args()) { |
2919 | if (Arg.hasAttribute(Kind: Attribute::ByVal) || |
2920 | Arg.hasAttribute(Kind: Attribute::InReg) || |
2921 | Arg.hasAttribute(Kind: Attribute::StructRet) || |
2922 | Arg.hasAttribute(Kind: Attribute::SwiftSelf) || |
2923 | Arg.hasAttribute(Kind: Attribute::SwiftAsync) || |
2924 | Arg.hasAttribute(Kind: Attribute::SwiftError) || |
2925 | Arg.hasAttribute(Kind: Attribute::Nest)) |
2926 | return false; |
2927 | |
2928 | Type *ArgTy = Arg.getType(); |
2929 | if (ArgTy->isStructTy() || ArgTy->isArrayTy()) |
2930 | return false; |
2931 | |
2932 | EVT ArgVT = TLI.getValueType(DL, Ty: ArgTy); |
2933 | if (!ArgVT.isSimple()) |
2934 | return false; |
2935 | |
2936 | MVT VT = ArgVT.getSimpleVT().SimpleTy; |
2937 | if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8()) |
2938 | return false; |
2939 | |
2940 | if (VT.isVector() && |
2941 | (!Subtarget->hasNEON() || !Subtarget->isLittleEndian())) |
2942 | return false; |
2943 | |
2944 | if (VT >= MVT::i1 && VT <= MVT::i64) |
2945 | ++GPRCnt; |
2946 | else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() || |
2947 | VT.is128BitVector()) |
2948 | ++FPRCnt; |
2949 | else |
2950 | return false; |
2951 | |
2952 | if (GPRCnt > 8 || FPRCnt > 8) |
2953 | return false; |
2954 | } |
2955 | |
2956 | static const MCPhysReg Registers[6][8] = { |
2957 | { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4, |
2958 | AArch64::W5, AArch64::W6, AArch64::W7 }, |
2959 | { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4, |
2960 | AArch64::X5, AArch64::X6, AArch64::X7 }, |
2961 | { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4, |
2962 | AArch64::H5, AArch64::H6, AArch64::H7 }, |
2963 | { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4, |
2964 | AArch64::S5, AArch64::S6, AArch64::S7 }, |
2965 | { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4, |
2966 | AArch64::D5, AArch64::D6, AArch64::D7 }, |
2967 | { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4, |
2968 | AArch64::Q5, AArch64::Q6, AArch64::Q7 } |
2969 | }; |
2970 | |
2971 | unsigned GPRIdx = 0; |
2972 | unsigned FPRIdx = 0; |
2973 | for (auto const &Arg : F->args()) { |
2974 | MVT VT = TLI.getSimpleValueType(DL, Ty: Arg.getType()); |
2975 | unsigned SrcReg; |
2976 | const TargetRegisterClass *RC; |
2977 | if (VT >= MVT::i1 && VT <= MVT::i32) { |
2978 | SrcReg = Registers[0][GPRIdx++]; |
2979 | RC = &AArch64::GPR32RegClass; |
2980 | VT = MVT::i32; |
2981 | } else if (VT == MVT::i64) { |
2982 | SrcReg = Registers[1][GPRIdx++]; |
2983 | RC = &AArch64::GPR64RegClass; |
2984 | } else if (VT == MVT::f16 || VT == MVT::bf16) { |
2985 | SrcReg = Registers[2][FPRIdx++]; |
2986 | RC = &AArch64::FPR16RegClass; |
2987 | } else if (VT == MVT::f32) { |
2988 | SrcReg = Registers[3][FPRIdx++]; |
2989 | RC = &AArch64::FPR32RegClass; |
2990 | } else if ((VT == MVT::f64) || VT.is64BitVector()) { |
2991 | SrcReg = Registers[4][FPRIdx++]; |
2992 | RC = &AArch64::FPR64RegClass; |
2993 | } else if (VT.is128BitVector()) { |
2994 | SrcReg = Registers[5][FPRIdx++]; |
2995 | RC = &AArch64::FPR128RegClass; |
2996 | } else |
2997 | llvm_unreachable("Unexpected value type." ); |
2998 | |
2999 | Register DstReg = FuncInfo.MF->addLiveIn(PReg: SrcReg, RC); |
3000 | // FIXME: Unfortunately it's necessary to emit a copy from the livein copy. |
3001 | // Without this, EmitLiveInCopies may eliminate the livein if its only |
3002 | // use is a bitcast (which isn't turned into an instruction). |
3003 | Register ResultReg = createResultReg(RC); |
3004 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, |
3005 | MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: ResultReg) |
3006 | .addReg(RegNo: DstReg, flags: getKillRegState(B: true)); |
3007 | updateValueMap(I: &Arg, Reg: ResultReg); |
3008 | } |
3009 | return true; |
3010 | } |
3011 | |
3012 | bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI, |
3013 | SmallVectorImpl<MVT> &OutVTs, |
3014 | unsigned &NumBytes) { |
3015 | CallingConv::ID CC = CLI.CallConv; |
3016 | SmallVector<CCValAssign, 16> ArgLocs; |
3017 | CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context); |
3018 | CCInfo.AnalyzeCallOperands(ArgVTs&: OutVTs, Flags&: CLI.OutFlags, Fn: CCAssignFnForCall(CC)); |
3019 | |
3020 | // Get a count of how many bytes are to be pushed on the stack. |
3021 | NumBytes = CCInfo.getStackSize(); |
3022 | |
3023 | // Issue CALLSEQ_START |
3024 | unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); |
3025 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AdjStackDown)) |
3026 | .addImm(Val: NumBytes).addImm(Val: 0); |
3027 | |
3028 | // Process the args. |
3029 | for (CCValAssign &VA : ArgLocs) { |
3030 | const Value *ArgVal = CLI.OutVals[VA.getValNo()]; |
3031 | MVT ArgVT = OutVTs[VA.getValNo()]; |
3032 | |
3033 | Register ArgReg = getRegForValue(V: ArgVal); |
3034 | if (!ArgReg) |
3035 | return false; |
3036 | |
3037 | // Handle arg promotion: SExt, ZExt, AExt. |
3038 | switch (VA.getLocInfo()) { |
3039 | case CCValAssign::Full: |
3040 | break; |
3041 | case CCValAssign::SExt: { |
3042 | MVT DestVT = VA.getLocVT(); |
3043 | MVT SrcVT = ArgVT; |
3044 | ArgReg = emitIntExt(SrcVT, SrcReg: ArgReg, DestVT, /*isZExt=*/false); |
3045 | if (!ArgReg) |
3046 | return false; |
3047 | break; |
3048 | } |
3049 | case CCValAssign::AExt: |
3050 | // Intentional fall-through. |
3051 | case CCValAssign::ZExt: { |
3052 | MVT DestVT = VA.getLocVT(); |
3053 | MVT SrcVT = ArgVT; |
3054 | ArgReg = emitIntExt(SrcVT, SrcReg: ArgReg, DestVT, /*isZExt=*/true); |
3055 | if (!ArgReg) |
3056 | return false; |
3057 | break; |
3058 | } |
3059 | default: |
3060 | llvm_unreachable("Unknown arg promotion!" ); |
3061 | } |
3062 | |
3063 | // Now copy/store arg to correct locations. |
3064 | if (VA.isRegLoc() && !VA.needsCustom()) { |
3065 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, |
3066 | MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: VA.getLocReg()).addReg(RegNo: ArgReg); |
3067 | CLI.OutRegs.push_back(Elt: VA.getLocReg()); |
3068 | } else if (VA.needsCustom()) { |
3069 | // FIXME: Handle custom args. |
3070 | return false; |
3071 | } else { |
3072 | assert(VA.isMemLoc() && "Assuming store on stack." ); |
3073 | |
3074 | // Don't emit stores for undef values. |
3075 | if (isa<UndefValue>(Val: ArgVal)) |
3076 | continue; |
3077 | |
3078 | // Need to store on the stack. |
3079 | unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8; |
3080 | |
3081 | unsigned BEAlign = 0; |
3082 | if (ArgSize < 8 && !Subtarget->isLittleEndian()) |
3083 | BEAlign = 8 - ArgSize; |
3084 | |
3085 | Address Addr; |
3086 | Addr.setKind(Address::RegBase); |
3087 | Addr.setReg(AArch64::SP); |
3088 | Addr.setOffset(VA.getLocMemOffset() + BEAlign); |
3089 | |
3090 | Align Alignment = DL.getABITypeAlign(Ty: ArgVal->getType()); |
3091 | MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand( |
3092 | PtrInfo: MachinePointerInfo::getStack(MF&: *FuncInfo.MF, Offset: Addr.getOffset()), |
3093 | F: MachineMemOperand::MOStore, Size: ArgVT.getStoreSize(), BaseAlignment: Alignment); |
3094 | |
3095 | if (!emitStore(VT: ArgVT, SrcReg: ArgReg, Addr, MMO)) |
3096 | return false; |
3097 | } |
3098 | } |
3099 | return true; |
3100 | } |
3101 | |
3102 | bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, unsigned NumBytes) { |
3103 | CallingConv::ID CC = CLI.CallConv; |
3104 | |
3105 | // Issue CALLSEQ_END |
3106 | unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); |
3107 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AdjStackUp)) |
3108 | .addImm(Val: NumBytes).addImm(Val: 0); |
3109 | |
3110 | // Now the return values. |
3111 | SmallVector<CCValAssign, 16> RVLocs; |
3112 | CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context); |
3113 | CCInfo.AnalyzeCallResult(Ins: CLI.Ins, Fn: CCAssignFnForCall(CC)); |
3114 | |
3115 | Register ResultReg = FuncInfo.CreateRegs(Ty: CLI.RetTy); |
3116 | for (unsigned i = 0; i != RVLocs.size(); ++i) { |
3117 | CCValAssign &VA = RVLocs[i]; |
3118 | MVT CopyVT = VA.getValVT(); |
3119 | Register CopyReg = ResultReg + i; |
3120 | |
3121 | // TODO: Handle big-endian results |
3122 | if (CopyVT.isVector() && !Subtarget->isLittleEndian()) |
3123 | return false; |
3124 | |
3125 | // Copy result out of their specified physreg. |
3126 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: TargetOpcode::COPY), |
3127 | DestReg: CopyReg) |
3128 | .addReg(RegNo: VA.getLocReg()); |
3129 | CLI.InRegs.push_back(Elt: VA.getLocReg()); |
3130 | } |
3131 | |
3132 | CLI.ResultReg = ResultReg; |
3133 | CLI.NumResultRegs = RVLocs.size(); |
3134 | |
3135 | return true; |
3136 | } |
3137 | |
3138 | bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) { |
3139 | CallingConv::ID CC = CLI.CallConv; |
3140 | bool IsTailCall = CLI.IsTailCall; |
3141 | bool IsVarArg = CLI.IsVarArg; |
3142 | const Value *Callee = CLI.Callee; |
3143 | MCSymbol *Symbol = CLI.Symbol; |
3144 | |
3145 | if (!Callee && !Symbol) |
3146 | return false; |
3147 | |
3148 | // Allow SelectionDAG isel to handle calls to functions like setjmp that need |
3149 | // a bti instruction following the call. |
3150 | if (CLI.CB && CLI.CB->hasFnAttr(Kind: Attribute::ReturnsTwice) && |
3151 | !Subtarget->noBTIAtReturnTwice() && |
3152 | MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement()) |
3153 | return false; |
3154 | |
3155 | // Allow SelectionDAG isel to handle indirect calls with KCFI checks. |
3156 | if (CLI.CB && CLI.CB->isIndirectCall() && |
3157 | CLI.CB->getOperandBundle(ID: LLVMContext::OB_kcfi)) |
3158 | return false; |
3159 | |
3160 | // Allow SelectionDAG isel to handle tail calls. |
3161 | if (IsTailCall) |
3162 | return false; |
3163 | |
3164 | // FIXME: we could and should support this, but for now correctness at -O0 is |
3165 | // more important. |
3166 | if (Subtarget->isTargetILP32()) |
3167 | return false; |
3168 | |
3169 | CodeModel::Model CM = TM.getCodeModel(); |
3170 | // Only support the small-addressing and large code models. |
3171 | if (CM != CodeModel::Large && !Subtarget->useSmallAddressing()) |
3172 | return false; |
3173 | |
3174 | // FIXME: Add large code model support for ELF. |
3175 | if (CM == CodeModel::Large && !Subtarget->isTargetMachO()) |
3176 | return false; |
3177 | |
3178 | // ELF -fno-plt compiled intrinsic calls do not have the nonlazybind |
3179 | // attribute. Check "RtLibUseGOT" instead. |
3180 | if (MF->getFunction().getParent()->getRtLibUseGOT()) |
3181 | return false; |
3182 | |
3183 | // Let SDISel handle vararg functions. |
3184 | if (IsVarArg) |
3185 | return false; |
3186 | |
3187 | if (Subtarget->isWindowsArm64EC()) |
3188 | return false; |
3189 | |
3190 | for (auto Flag : CLI.OutFlags) |
3191 | if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() || |
3192 | Flag.isSwiftSelf() || Flag.isSwiftAsync() || Flag.isSwiftError()) |
3193 | return false; |
3194 | |
3195 | // Set up the argument vectors. |
3196 | SmallVector<MVT, 16> OutVTs; |
3197 | OutVTs.reserve(N: CLI.OutVals.size()); |
3198 | |
3199 | for (auto *Val : CLI.OutVals) { |
3200 | MVT VT; |
3201 | if (!isTypeLegal(Ty: Val->getType(), VT) && |
3202 | !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)) |
3203 | return false; |
3204 | |
3205 | // We don't handle vector parameters yet. |
3206 | if (VT.isVector() || VT.getSizeInBits() > 64) |
3207 | return false; |
3208 | |
3209 | OutVTs.push_back(Elt: VT); |
3210 | } |
3211 | |
3212 | Address Addr; |
3213 | if (Callee && !computeCallAddress(V: Callee, Addr)) |
3214 | return false; |
3215 | |
3216 | // The weak function target may be zero; in that case we must use indirect |
3217 | // addressing via a stub on windows as it may be out of range for a |
3218 | // PC-relative jump. |
3219 | if (Subtarget->isTargetWindows() && Addr.getGlobalValue() && |
3220 | Addr.getGlobalValue()->hasExternalWeakLinkage()) |
3221 | return false; |
3222 | |
3223 | // Handle the arguments now that we've gotten them. |
3224 | unsigned NumBytes; |
3225 | if (!processCallArgs(CLI, OutVTs, NumBytes)) |
3226 | return false; |
3227 | |
3228 | const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); |
3229 | if (RegInfo->isAnyArgRegReserved(MF: *MF)) |
3230 | RegInfo->emitReservedArgRegCallError(MF: *MF); |
3231 | |
3232 | // Issue the call. |
3233 | MachineInstrBuilder MIB; |
3234 | if (Subtarget->useSmallAddressing()) { |
3235 | const MCInstrDesc &II = |
3236 | TII.get(Opcode: Addr.getReg() ? getBLRCallOpcode(MF: *MF) : (unsigned)AArch64::BL); |
3237 | MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II); |
3238 | if (Symbol) |
3239 | MIB.addSym(Sym: Symbol, TargetFlags: 0); |
3240 | else if (Addr.getGlobalValue()) |
3241 | MIB.addGlobalAddress(GV: Addr.getGlobalValue(), Offset: 0, TargetFlags: 0); |
3242 | else if (Addr.getReg()) { |
3243 | Register Reg = constrainOperandRegClass(II, Op: Addr.getReg(), OpNum: 0); |
3244 | MIB.addReg(RegNo: Reg); |
3245 | } else |
3246 | return false; |
3247 | } else { |
3248 | Register CallReg; |
3249 | if (Symbol) { |
3250 | Register ADRPReg = createResultReg(RC: &AArch64::GPR64commonRegClass); |
3251 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AArch64::ADRP), |
3252 | DestReg: ADRPReg) |
3253 | .addSym(Sym: Symbol, TargetFlags: AArch64II::MO_GOT | AArch64II::MO_PAGE); |
3254 | |
3255 | CallReg = createResultReg(RC: &AArch64::GPR64RegClass); |
3256 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, |
3257 | MCID: TII.get(Opcode: AArch64::LDRXui), DestReg: CallReg) |
3258 | .addReg(RegNo: ADRPReg) |
3259 | .addSym(Sym: Symbol, |
3260 | TargetFlags: AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); |
3261 | } else if (Addr.getGlobalValue()) |
3262 | CallReg = materializeGV(GV: Addr.getGlobalValue()); |
3263 | else if (Addr.getReg()) |
3264 | CallReg = Addr.getReg(); |
3265 | |
3266 | if (!CallReg) |
3267 | return false; |
3268 | |
3269 | const MCInstrDesc &II = TII.get(Opcode: getBLRCallOpcode(MF: *MF)); |
3270 | CallReg = constrainOperandRegClass(II, Op: CallReg, OpNum: 0); |
3271 | MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II).addReg(RegNo: CallReg); |
3272 | } |
3273 | |
3274 | // Add implicit physical register uses to the call. |
3275 | for (auto Reg : CLI.OutRegs) |
3276 | MIB.addReg(RegNo: Reg, flags: RegState::Implicit); |
3277 | |
3278 | // Add a register mask with the call-preserved registers. |
3279 | // Proper defs for return values will be added by setPhysRegsDeadExcept(). |
3280 | MIB.addRegMask(Mask: TRI.getCallPreservedMask(MF: *FuncInfo.MF, CC)); |
3281 | |
3282 | CLI.Call = MIB; |
3283 | |
3284 | // Finish off the call including any return values. |
3285 | return finishCall(CLI, NumBytes); |
3286 | } |
3287 | |
3288 | bool AArch64FastISel::isMemCpySmall(uint64_t Len, MaybeAlign Alignment) { |
3289 | if (Alignment) |
3290 | return Len / Alignment->value() <= 4; |
3291 | else |
3292 | return Len < 32; |
3293 | } |
3294 | |
3295 | bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src, |
3296 | uint64_t Len, MaybeAlign Alignment) { |
3297 | // Make sure we don't bloat code by inlining very large memcpy's. |
3298 | if (!isMemCpySmall(Len, Alignment)) |
3299 | return false; |
3300 | |
3301 | int64_t UnscaledOffset = 0; |
3302 | Address OrigDest = Dest; |
3303 | Address OrigSrc = Src; |
3304 | |
3305 | while (Len) { |
3306 | MVT VT; |
3307 | if (!Alignment || *Alignment >= 8) { |
3308 | if (Len >= 8) |
3309 | VT = MVT::i64; |
3310 | else if (Len >= 4) |
3311 | VT = MVT::i32; |
3312 | else if (Len >= 2) |
3313 | VT = MVT::i16; |
3314 | else { |
3315 | VT = MVT::i8; |
3316 | } |
3317 | } else { |
3318 | assert(Alignment && "Alignment is set in this branch" ); |
3319 | // Bound based on alignment. |
3320 | if (Len >= 4 && *Alignment == 4) |
3321 | VT = MVT::i32; |
3322 | else if (Len >= 2 && *Alignment == 2) |
3323 | VT = MVT::i16; |
3324 | else { |
3325 | VT = MVT::i8; |
3326 | } |
3327 | } |
3328 | |
3329 | Register ResultReg = emitLoad(VT, RetVT: VT, Addr: Src); |
3330 | if (!ResultReg) |
3331 | return false; |
3332 | |
3333 | if (!emitStore(VT, SrcReg: ResultReg, Addr: Dest)) |
3334 | return false; |
3335 | |
3336 | int64_t Size = VT.getSizeInBits() / 8; |
3337 | Len -= Size; |
3338 | UnscaledOffset += Size; |
3339 | |
3340 | // We need to recompute the unscaled offset for each iteration. |
3341 | Dest.setOffset(OrigDest.getOffset() + UnscaledOffset); |
3342 | Src.setOffset(OrigSrc.getOffset() + UnscaledOffset); |
3343 | } |
3344 | |
3345 | return true; |
3346 | } |
3347 | |
3348 | /// Check if it is possible to fold the condition from the XALU intrinsic |
3349 | /// into the user. The condition code will only be updated on success. |
3350 | bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC, |
3351 | const Instruction *I, |
3352 | const Value *Cond) { |
3353 | if (!isa<ExtractValueInst>(Val: Cond)) |
3354 | return false; |
3355 | |
3356 | const auto *EV = cast<ExtractValueInst>(Val: Cond); |
3357 | if (!isa<IntrinsicInst>(Val: EV->getAggregateOperand())) |
3358 | return false; |
3359 | |
3360 | const auto *II = cast<IntrinsicInst>(Val: EV->getAggregateOperand()); |
3361 | MVT RetVT; |
3362 | const Function *Callee = II->getCalledFunction(); |
3363 | Type *RetTy = |
3364 | cast<StructType>(Val: Callee->getReturnType())->getTypeAtIndex(N: 0U); |
3365 | if (!isTypeLegal(Ty: RetTy, VT&: RetVT)) |
3366 | return false; |
3367 | |
3368 | if (RetVT != MVT::i32 && RetVT != MVT::i64) |
3369 | return false; |
3370 | |
3371 | const Value *LHS = II->getArgOperand(i: 0); |
3372 | const Value *RHS = II->getArgOperand(i: 1); |
3373 | |
3374 | // Canonicalize immediate to the RHS. |
3375 | if (isa<ConstantInt>(Val: LHS) && !isa<ConstantInt>(Val: RHS) && II->isCommutative()) |
3376 | std::swap(a&: LHS, b&: RHS); |
3377 | |
3378 | // Simplify multiplies. |
3379 | Intrinsic::ID IID = II->getIntrinsicID(); |
3380 | switch (IID) { |
3381 | default: |
3382 | break; |
3383 | case Intrinsic::smul_with_overflow: |
3384 | if (const auto *C = dyn_cast<ConstantInt>(Val: RHS)) |
3385 | if (C->getValue() == 2) |
3386 | IID = Intrinsic::sadd_with_overflow; |
3387 | break; |
3388 | case Intrinsic::umul_with_overflow: |
3389 | if (const auto *C = dyn_cast<ConstantInt>(Val: RHS)) |
3390 | if (C->getValue() == 2) |
3391 | IID = Intrinsic::uadd_with_overflow; |
3392 | break; |
3393 | } |
3394 | |
3395 | AArch64CC::CondCode TmpCC; |
3396 | switch (IID) { |
3397 | default: |
3398 | return false; |
3399 | case Intrinsic::sadd_with_overflow: |
3400 | case Intrinsic::ssub_with_overflow: |
3401 | TmpCC = AArch64CC::VS; |
3402 | break; |
3403 | case Intrinsic::uadd_with_overflow: |
3404 | TmpCC = AArch64CC::HS; |
3405 | break; |
3406 | case Intrinsic::usub_with_overflow: |
3407 | TmpCC = AArch64CC::LO; |
3408 | break; |
3409 | case Intrinsic::smul_with_overflow: |
3410 | case Intrinsic::umul_with_overflow: |
3411 | TmpCC = AArch64CC::NE; |
3412 | break; |
3413 | } |
3414 | |
3415 | // Check if both instructions are in the same basic block. |
3416 | if (!isValueAvailable(V: II)) |
3417 | return false; |
3418 | |
3419 | // Make sure nothing is in the way |
3420 | BasicBlock::const_iterator Start(I); |
3421 | BasicBlock::const_iterator End(II); |
3422 | for (auto Itr = std::prev(x: Start); Itr != End; --Itr) { |
3423 | // We only expect extractvalue instructions between the intrinsic and the |
3424 | // instruction to be selected. |
3425 | if (!isa<ExtractValueInst>(Val: Itr)) |
3426 | return false; |
3427 | |
3428 | // Check that the extractvalue operand comes from the intrinsic. |
3429 | const auto *EVI = cast<ExtractValueInst>(Val&: Itr); |
3430 | if (EVI->getAggregateOperand() != II) |
3431 | return false; |
3432 | } |
3433 | |
3434 | CC = TmpCC; |
3435 | return true; |
3436 | } |
3437 | |
3438 | bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { |
3439 | // FIXME: Handle more intrinsics. |
3440 | switch (II->getIntrinsicID()) { |
3441 | default: return false; |
3442 | case Intrinsic::frameaddress: { |
3443 | MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo(); |
3444 | MFI.setFrameAddressIsTaken(true); |
3445 | |
3446 | const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); |
3447 | Register FramePtr = RegInfo->getFrameRegister(MF: *(FuncInfo.MF)); |
3448 | Register SrcReg = MRI.createVirtualRegister(RegClass: &AArch64::GPR64RegClass); |
3449 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, |
3450 | MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: SrcReg).addReg(RegNo: FramePtr); |
3451 | // Recursively load frame address |
3452 | // ldr x0, [fp] |
3453 | // ldr x0, [x0] |
3454 | // ldr x0, [x0] |
3455 | // ... |
3456 | Register DestReg; |
3457 | unsigned Depth = cast<ConstantInt>(Val: II->getOperand(i_nocapture: 0))->getZExtValue(); |
3458 | while (Depth--) { |
3459 | DestReg = fastEmitInst_ri(MachineInstOpcode: AArch64::LDRXui, RC: &AArch64::GPR64RegClass, |
3460 | Op0: SrcReg, Imm: 0); |
3461 | assert(DestReg && "Unexpected LDR instruction emission failure." ); |
3462 | SrcReg = DestReg; |
3463 | } |
3464 | |
3465 | updateValueMap(I: II, Reg: SrcReg); |
3466 | return true; |
3467 | } |
3468 | case Intrinsic::sponentry: { |
3469 | MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo(); |
3470 | |
3471 | // SP = FP + Fixed Object + 16 |
3472 | int FI = MFI.CreateFixedObject(Size: 4, SPOffset: 0, IsImmutable: false); |
3473 | Register ResultReg = createResultReg(RC: &AArch64::GPR64spRegClass); |
3474 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, |
3475 | MCID: TII.get(Opcode: AArch64::ADDXri), DestReg: ResultReg) |
3476 | .addFrameIndex(Idx: FI) |
3477 | .addImm(Val: 0) |
3478 | .addImm(Val: 0); |
3479 | |
3480 | updateValueMap(I: II, Reg: ResultReg); |
3481 | return true; |
3482 | } |
3483 | case Intrinsic::memcpy: |
3484 | case Intrinsic::memmove: { |
3485 | const auto *MTI = cast<MemTransferInst>(Val: II); |
3486 | // Don't handle volatile. |
3487 | if (MTI->isVolatile()) |
3488 | return false; |
3489 | |
3490 | // Disable inlining for memmove before calls to ComputeAddress. Otherwise, |
3491 | // we would emit dead code because we don't currently handle memmoves. |
3492 | bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy); |
3493 | if (isa<ConstantInt>(Val: MTI->getLength()) && IsMemCpy) { |
3494 | // Small memcpy's are common enough that we want to do them without a call |
3495 | // if possible. |
3496 | uint64_t Len = cast<ConstantInt>(Val: MTI->getLength())->getZExtValue(); |
3497 | MaybeAlign Alignment; |
3498 | if (MTI->getDestAlign() || MTI->getSourceAlign()) |
3499 | Alignment = std::min(a: MTI->getDestAlign().valueOrOne(), |
3500 | b: MTI->getSourceAlign().valueOrOne()); |
3501 | if (isMemCpySmall(Len, Alignment)) { |
3502 | Address Dest, Src; |
3503 | if (!computeAddress(Obj: MTI->getRawDest(), Addr&: Dest) || |
3504 | !computeAddress(Obj: MTI->getRawSource(), Addr&: Src)) |
3505 | return false; |
3506 | if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment)) |
3507 | return true; |
3508 | } |
3509 | } |
3510 | |
3511 | if (!MTI->getLength()->getType()->isIntegerTy(Bitwidth: 64)) |
3512 | return false; |
3513 | |
3514 | if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255) |
3515 | // Fast instruction selection doesn't support the special |
3516 | // address spaces. |
3517 | return false; |
3518 | |
3519 | const char *IntrMemName = isa<MemCpyInst>(Val: II) ? "memcpy" : "memmove" ; |
3520 | return lowerCallTo(CI: II, SymName: IntrMemName, NumArgs: II->arg_size() - 1); |
3521 | } |
3522 | case Intrinsic::memset: { |
3523 | const MemSetInst *MSI = cast<MemSetInst>(Val: II); |
3524 | // Don't handle volatile. |
3525 | if (MSI->isVolatile()) |
3526 | return false; |
3527 | |
3528 | if (!MSI->getLength()->getType()->isIntegerTy(Bitwidth: 64)) |
3529 | return false; |
3530 | |
3531 | if (MSI->getDestAddressSpace() > 255) |
3532 | // Fast instruction selection doesn't support the special |
3533 | // address spaces. |
3534 | return false; |
3535 | |
3536 | return lowerCallTo(CI: II, SymName: "memset" , NumArgs: II->arg_size() - 1); |
3537 | } |
3538 | case Intrinsic::sin: |
3539 | case Intrinsic::cos: |
3540 | case Intrinsic::tan: |
3541 | case Intrinsic::pow: { |
3542 | MVT RetVT; |
3543 | if (!isTypeLegal(Ty: II->getType(), VT&: RetVT)) |
3544 | return false; |
3545 | |
3546 | if (RetVT != MVT::f32 && RetVT != MVT::f64) |
3547 | return false; |
3548 | |
3549 | static const RTLIB::Libcall LibCallTable[4][2] = { |
3550 | {RTLIB::SIN_F32, RTLIB::SIN_F64}, |
3551 | {RTLIB::COS_F32, RTLIB::COS_F64}, |
3552 | {RTLIB::TAN_F32, RTLIB::TAN_F64}, |
3553 | {RTLIB::POW_F32, RTLIB::POW_F64}}; |
3554 | RTLIB::Libcall LC; |
3555 | bool Is64Bit = RetVT == MVT::f64; |
3556 | switch (II->getIntrinsicID()) { |
3557 | default: |
3558 | llvm_unreachable("Unexpected intrinsic." ); |
3559 | case Intrinsic::sin: |
3560 | LC = LibCallTable[0][Is64Bit]; |
3561 | break; |
3562 | case Intrinsic::cos: |
3563 | LC = LibCallTable[1][Is64Bit]; |
3564 | break; |
3565 | case Intrinsic::tan: |
3566 | LC = LibCallTable[2][Is64Bit]; |
3567 | break; |
3568 | case Intrinsic::pow: |
3569 | LC = LibCallTable[3][Is64Bit]; |
3570 | break; |
3571 | } |
3572 | |
3573 | ArgListTy Args; |
3574 | Args.reserve(n: II->arg_size()); |
3575 | |
3576 | // Populate the argument list. |
3577 | for (auto &Arg : II->args()) { |
3578 | ArgListEntry Entry; |
3579 | Entry.Val = Arg; |
3580 | Entry.Ty = Arg->getType(); |
3581 | Args.push_back(x: Entry); |
3582 | } |
3583 | |
3584 | CallLoweringInfo CLI; |
3585 | MCContext &Ctx = MF->getContext(); |
3586 | CLI.setCallee(DL, Ctx, CC: TLI.getLibcallCallingConv(Call: LC), ResultTy: II->getType(), |
3587 | Target: TLI.getLibcallName(Call: LC), ArgsList: std::move(Args)); |
3588 | if (!lowerCallTo(CLI)) |
3589 | return false; |
3590 | updateValueMap(I: II, Reg: CLI.ResultReg); |
3591 | return true; |
3592 | } |
3593 | case Intrinsic::fabs: { |
3594 | MVT VT; |
3595 | if (!isTypeLegal(Ty: II->getType(), VT)) |
3596 | return false; |
3597 | |
3598 | unsigned Opc; |
3599 | switch (VT.SimpleTy) { |
3600 | default: |
3601 | return false; |
3602 | case MVT::f32: |
3603 | Opc = AArch64::FABSSr; |
3604 | break; |
3605 | case MVT::f64: |
3606 | Opc = AArch64::FABSDr; |
3607 | break; |
3608 | } |
3609 | Register SrcReg = getRegForValue(V: II->getOperand(i_nocapture: 0)); |
3610 | if (!SrcReg) |
3611 | return false; |
3612 | Register ResultReg = createResultReg(RC: TLI.getRegClassFor(VT)); |
3613 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: Opc), DestReg: ResultReg) |
3614 | .addReg(RegNo: SrcReg); |
3615 | updateValueMap(I: II, Reg: ResultReg); |
3616 | return true; |
3617 | } |
3618 | case Intrinsic::trap: |
3619 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AArch64::BRK)) |
3620 | .addImm(Val: 1); |
3621 | return true; |
3622 | case Intrinsic::debugtrap: |
3623 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AArch64::BRK)) |
3624 | .addImm(Val: 0xF000); |
3625 | return true; |
3626 | |
3627 | case Intrinsic::sqrt: { |
3628 | Type *RetTy = II->getCalledFunction()->getReturnType(); |
3629 | |
3630 | MVT VT; |
3631 | if (!isTypeLegal(Ty: RetTy, VT)) |
3632 | return false; |
3633 | |
3634 | Register Op0Reg = getRegForValue(V: II->getOperand(i_nocapture: 0)); |
3635 | if (!Op0Reg) |
3636 | return false; |
3637 | |
3638 | Register ResultReg = fastEmit_r(VT, RetVT: VT, Opcode: ISD::FSQRT, Op0: Op0Reg); |
3639 | if (!ResultReg) |
3640 | return false; |
3641 | |
3642 | updateValueMap(I: II, Reg: ResultReg); |
3643 | return true; |
3644 | } |
3645 | case Intrinsic::sadd_with_overflow: |
3646 | case Intrinsic::uadd_with_overflow: |
3647 | case Intrinsic::ssub_with_overflow: |
3648 | case Intrinsic::usub_with_overflow: |
3649 | case Intrinsic::smul_with_overflow: |
3650 | case Intrinsic::umul_with_overflow: { |
3651 | // This implements the basic lowering of the xalu with overflow intrinsics. |
3652 | const Function *Callee = II->getCalledFunction(); |
3653 | auto *Ty = cast<StructType>(Val: Callee->getReturnType()); |
3654 | Type *RetTy = Ty->getTypeAtIndex(N: 0U); |
3655 | |
3656 | MVT VT; |
3657 | if (!isTypeLegal(Ty: RetTy, VT)) |
3658 | return false; |
3659 | |
3660 | if (VT != MVT::i32 && VT != MVT::i64) |
3661 | return false; |
3662 | |
3663 | const Value *LHS = II->getArgOperand(i: 0); |
3664 | const Value *RHS = II->getArgOperand(i: 1); |
3665 | // Canonicalize immediate to the RHS. |
3666 | if (isa<ConstantInt>(Val: LHS) && !isa<ConstantInt>(Val: RHS) && II->isCommutative()) |
3667 | std::swap(a&: LHS, b&: RHS); |
3668 | |
3669 | // Simplify multiplies. |
3670 | Intrinsic::ID IID = II->getIntrinsicID(); |
3671 | switch (IID) { |
3672 | default: |
3673 | break; |
3674 | case Intrinsic::smul_with_overflow: |
3675 | if (const auto *C = dyn_cast<ConstantInt>(Val: RHS)) |
3676 | if (C->getValue() == 2) { |
3677 | IID = Intrinsic::sadd_with_overflow; |
3678 | RHS = LHS; |
3679 | } |
3680 | break; |
3681 | case Intrinsic::umul_with_overflow: |
3682 | if (const auto *C = dyn_cast<ConstantInt>(Val: RHS)) |
3683 | if (C->getValue() == 2) { |
3684 | IID = Intrinsic::uadd_with_overflow; |
3685 | RHS = LHS; |
3686 | } |
3687 | break; |
3688 | } |
3689 | |
3690 | Register ResultReg1, ResultReg2, MulReg; |
3691 | AArch64CC::CondCode CC = AArch64CC::Invalid; |
3692 | switch (IID) { |
3693 | default: llvm_unreachable("Unexpected intrinsic!" ); |
3694 | case Intrinsic::sadd_with_overflow: |
3695 | ResultReg1 = emitAdd(RetVT: VT, LHS, RHS, /*SetFlags=*/true); |
3696 | CC = AArch64CC::VS; |
3697 | break; |
3698 | case Intrinsic::uadd_with_overflow: |
3699 | ResultReg1 = emitAdd(RetVT: VT, LHS, RHS, /*SetFlags=*/true); |
3700 | CC = AArch64CC::HS; |
3701 | break; |
3702 | case Intrinsic::ssub_with_overflow: |
3703 | ResultReg1 = emitSub(RetVT: VT, LHS, RHS, /*SetFlags=*/true); |
3704 | CC = AArch64CC::VS; |
3705 | break; |
3706 | case Intrinsic::usub_with_overflow: |
3707 | ResultReg1 = emitSub(RetVT: VT, LHS, RHS, /*SetFlags=*/true); |
3708 | CC = AArch64CC::LO; |
3709 | break; |
3710 | case Intrinsic::smul_with_overflow: { |
3711 | CC = AArch64CC::NE; |
3712 | Register LHSReg = getRegForValue(V: LHS); |
3713 | if (!LHSReg) |
3714 | return false; |
3715 | |
3716 | Register RHSReg = getRegForValue(V: RHS); |
3717 | if (!RHSReg) |
3718 | return false; |
3719 | |
3720 | if (VT == MVT::i32) { |
3721 | MulReg = emitSMULL_rr(RetVT: MVT::i64, Op0: LHSReg, Op1: RHSReg); |
3722 | Register MulSubReg = |
3723 | fastEmitInst_extractsubreg(RetVT: VT, Op0: MulReg, Idx: AArch64::sub_32); |
3724 | // cmp xreg, wreg, sxtw |
3725 | emitAddSub_rx(/*UseAdd=*/false, RetVT: MVT::i64, LHSReg: MulReg, RHSReg: MulSubReg, |
3726 | ExtType: AArch64_AM::SXTW, /*ShiftImm=*/0, /*SetFlags=*/true, |
3727 | /*WantResult=*/false); |
3728 | MulReg = MulSubReg; |
3729 | } else { |
3730 | assert(VT == MVT::i64 && "Unexpected value type." ); |
3731 | // LHSReg and RHSReg cannot be killed by this Mul, since they are |
3732 | // reused in the next instruction. |
3733 | MulReg = emitMul_rr(RetVT: VT, Op0: LHSReg, Op1: RHSReg); |
3734 | Register SMULHReg = fastEmit_rr(VT, RetVT: VT, Opcode: ISD::MULHS, Op0: LHSReg, Op1: RHSReg); |
3735 | emitSubs_rs(RetVT: VT, LHSReg: SMULHReg, RHSReg: MulReg, ShiftType: AArch64_AM::ASR, ShiftImm: 63, |
3736 | /*WantResult=*/false); |
3737 | } |
3738 | break; |
3739 | } |
3740 | case Intrinsic::umul_with_overflow: { |
3741 | CC = AArch64CC::NE; |
3742 | Register LHSReg = getRegForValue(V: LHS); |
3743 | if (!LHSReg) |
3744 | return false; |
3745 | |
3746 | Register RHSReg = getRegForValue(V: RHS); |
3747 | if (!RHSReg) |
3748 | return false; |
3749 | |
3750 | if (VT == MVT::i32) { |
3751 | MulReg = emitUMULL_rr(RetVT: MVT::i64, Op0: LHSReg, Op1: RHSReg); |
3752 | // tst xreg, #0xffffffff00000000 |
3753 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, |
3754 | MCID: TII.get(Opcode: AArch64::ANDSXri), DestReg: AArch64::XZR) |
3755 | .addReg(RegNo: MulReg) |
3756 | .addImm(Val: AArch64_AM::encodeLogicalImmediate(imm: 0xFFFFFFFF00000000, regSize: 64)); |
3757 | MulReg = fastEmitInst_extractsubreg(RetVT: VT, Op0: MulReg, Idx: AArch64::sub_32); |
3758 | } else { |
3759 | assert(VT == MVT::i64 && "Unexpected value type." ); |
3760 | // LHSReg and RHSReg cannot be killed by this Mul, since they are |
3761 | // reused in the next instruction. |
3762 | MulReg = emitMul_rr(RetVT: VT, Op0: LHSReg, Op1: RHSReg); |
3763 | Register UMULHReg = fastEmit_rr(VT, RetVT: VT, Opcode: ISD::MULHU, Op0: LHSReg, Op1: RHSReg); |
3764 | emitSubs_rr(RetVT: VT, LHSReg: AArch64::XZR, RHSReg: UMULHReg, /*WantResult=*/false); |
3765 | } |
3766 | break; |
3767 | } |
3768 | } |
3769 | |
3770 | if (MulReg) { |
3771 | ResultReg1 = createResultReg(RC: TLI.getRegClassFor(VT)); |
3772 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, |
3773 | MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: ResultReg1).addReg(RegNo: MulReg); |
3774 | } |
3775 | |
3776 | if (!ResultReg1) |
3777 | return false; |
3778 | |
3779 | ResultReg2 = fastEmitInst_rri(MachineInstOpcode: AArch64::CSINCWr, RC: &AArch64::GPR32RegClass, |
3780 | Op0: AArch64::WZR, Op1: AArch64::WZR, |
3781 | Imm: getInvertedCondCode(Code: CC)); |
3782 | (void)ResultReg2; |
3783 | assert((ResultReg1 + 1) == ResultReg2 && |
3784 | "Nonconsecutive result registers." ); |
3785 | updateValueMap(I: II, Reg: ResultReg1, NumRegs: 2); |
3786 | return true; |
3787 | } |
3788 | case Intrinsic::aarch64_crc32b: |
3789 | case Intrinsic::aarch64_crc32h: |
3790 | case Intrinsic::aarch64_crc32w: |
3791 | case Intrinsic::aarch64_crc32x: |
3792 | case Intrinsic::aarch64_crc32cb: |
3793 | case Intrinsic::aarch64_crc32ch: |
3794 | case Intrinsic::aarch64_crc32cw: |
3795 | case Intrinsic::aarch64_crc32cx: { |
3796 | if (!Subtarget->hasCRC()) |
3797 | return false; |
3798 | |
3799 | unsigned Opc; |
3800 | switch (II->getIntrinsicID()) { |
3801 | default: |
3802 | llvm_unreachable("Unexpected intrinsic!" ); |
3803 | case Intrinsic::aarch64_crc32b: |
3804 | Opc = AArch64::CRC32Brr; |
3805 | break; |
3806 | case Intrinsic::aarch64_crc32h: |
3807 | Opc = AArch64::CRC32Hrr; |
3808 | break; |
3809 | case Intrinsic::aarch64_crc32w: |
3810 | Opc = AArch64::CRC32Wrr; |
3811 | break; |
3812 | case Intrinsic::aarch64_crc32x: |
3813 | Opc = AArch64::CRC32Xrr; |
3814 | break; |
3815 | case Intrinsic::aarch64_crc32cb: |
3816 | Opc = AArch64::CRC32CBrr; |
3817 | break; |
3818 | case Intrinsic::aarch64_crc32ch: |
3819 | Opc = AArch64::CRC32CHrr; |
3820 | break; |
3821 | case Intrinsic::aarch64_crc32cw: |
3822 | Opc = AArch64::CRC32CWrr; |
3823 | break; |
3824 | case Intrinsic::aarch64_crc32cx: |
3825 | Opc = AArch64::CRC32CXrr; |
3826 | break; |
3827 | } |
3828 | |
3829 | Register LHSReg = getRegForValue(V: II->getArgOperand(i: 0)); |
3830 | Register RHSReg = getRegForValue(V: II->getArgOperand(i: 1)); |
3831 | if (!LHSReg || !RHSReg) |
3832 | return false; |
3833 | |
3834 | Register ResultReg = |
3835 | fastEmitInst_rr(MachineInstOpcode: Opc, RC: &AArch64::GPR32RegClass, Op0: LHSReg, Op1: RHSReg); |
3836 | updateValueMap(I: II, Reg: ResultReg); |
3837 | return true; |
3838 | } |
3839 | } |
3840 | return false; |
3841 | } |
3842 | |
3843 | bool AArch64FastISel::selectRet(const Instruction *I) { |
3844 | const ReturnInst *Ret = cast<ReturnInst>(Val: I); |
3845 | const Function &F = *I->getParent()->getParent(); |
3846 | |
3847 | if (!FuncInfo.CanLowerReturn) |
3848 | return false; |
3849 | |
3850 | if (F.isVarArg()) |
3851 | return false; |
3852 | |
3853 | if (TLI.supportSwiftError() && |
3854 | F.getAttributes().hasAttrSomewhere(Kind: Attribute::SwiftError)) |
3855 | return false; |
3856 | |
3857 | if (TLI.supportSplitCSR(MF: FuncInfo.MF)) |
3858 | return false; |
3859 | |
3860 | // Build a list of return value registers. |
3861 | SmallVector<Register, 4> RetRegs; |
3862 | |
3863 | if (Ret->getNumOperands() > 0) { |
3864 | CallingConv::ID CC = F.getCallingConv(); |
3865 | SmallVector<ISD::OutputArg, 4> Outs; |
3866 | GetReturnInfo(CC, ReturnType: F.getReturnType(), attr: F.getAttributes(), Outs, TLI, DL); |
3867 | |
3868 | // Analyze operands of the call, assigning locations to each operand. |
3869 | SmallVector<CCValAssign, 16> ValLocs; |
3870 | CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext()); |
3871 | CCInfo.AnalyzeReturn(Outs, Fn: RetCC_AArch64_AAPCS); |
3872 | |
3873 | // Only handle a single return value for now. |
3874 | if (ValLocs.size() != 1) |
3875 | return false; |
3876 | |
3877 | CCValAssign &VA = ValLocs[0]; |
3878 | const Value *RV = Ret->getOperand(i_nocapture: 0); |
3879 | |
3880 | // Don't bother handling odd stuff for now. |
3881 | if ((VA.getLocInfo() != CCValAssign::Full) && |
3882 | (VA.getLocInfo() != CCValAssign::BCvt)) |
3883 | return false; |
3884 | |
3885 | // Only handle register returns for now. |
3886 | if (!VA.isRegLoc()) |
3887 | return false; |
3888 | |
3889 | Register Reg = getRegForValue(V: RV); |
3890 | if (!Reg) |
3891 | return false; |
3892 | |
3893 | Register SrcReg = Reg + VA.getValNo(); |
3894 | Register DestReg = VA.getLocReg(); |
3895 | // Avoid a cross-class copy. This is very unlikely. |
3896 | if (!MRI.getRegClass(Reg: SrcReg)->contains(Reg: DestReg)) |
3897 | return false; |
3898 | |
3899 | EVT RVEVT = TLI.getValueType(DL, Ty: RV->getType()); |
3900 | if (!RVEVT.isSimple()) |
3901 | return false; |
3902 | |
3903 | // Vectors (of > 1 lane) in big endian need tricky handling. |
3904 | if (RVEVT.isVector() && RVEVT.getVectorElementCount().isVector() && |
3905 | !Subtarget->isLittleEndian()) |
3906 | return false; |
3907 | |
3908 | MVT RVVT = RVEVT.getSimpleVT(); |
3909 | if (RVVT == MVT::f128) |
3910 | return false; |
3911 | |
3912 | MVT DestVT = VA.getValVT(); |
3913 | // Special handling for extended integers. |
3914 | if (RVVT != DestVT) { |
3915 | if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16) |
3916 | return false; |
3917 | |
3918 | if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt()) |
3919 | return false; |
3920 | |
3921 | bool IsZExt = Outs[0].Flags.isZExt(); |
3922 | SrcReg = emitIntExt(SrcVT: RVVT, SrcReg, DestVT, isZExt: IsZExt); |
3923 | if (!SrcReg) |
3924 | return false; |
3925 | } |
3926 | |
3927 | // "Callee" (i.e. value producer) zero extends pointers at function |
3928 | // boundary. |
3929 | if (Subtarget->isTargetILP32() && RV->getType()->isPointerTy()) |
3930 | SrcReg = emitAnd_ri(RetVT: MVT::i64, LHSReg: SrcReg, Imm: 0xffffffff); |
3931 | |
3932 | // Make the copy. |
3933 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, |
3934 | MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg).addReg(RegNo: SrcReg); |
3935 | |
3936 | // Add register to return instruction. |
3937 | RetRegs.push_back(Elt: VA.getLocReg()); |
3938 | } |
3939 | |
3940 | MachineInstrBuilder MIB = BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, |
3941 | MCID: TII.get(Opcode: AArch64::RET_ReallyLR)); |
3942 | for (Register RetReg : RetRegs) |
3943 | MIB.addReg(RegNo: RetReg, flags: RegState::Implicit); |
3944 | return true; |
3945 | } |
3946 | |
3947 | bool AArch64FastISel::selectTrunc(const Instruction *I) { |
3948 | Type *DestTy = I->getType(); |
3949 | Value *Op = I->getOperand(i: 0); |
3950 | Type *SrcTy = Op->getType(); |
3951 | |
3952 | EVT SrcEVT = TLI.getValueType(DL, Ty: SrcTy, AllowUnknown: true); |
3953 | EVT DestEVT = TLI.getValueType(DL, Ty: DestTy, AllowUnknown: true); |
3954 | if (!SrcEVT.isSimple()) |
3955 | return false; |
3956 | if (!DestEVT.isSimple()) |
3957 | return false; |
3958 | |
3959 | MVT SrcVT = SrcEVT.getSimpleVT(); |
3960 | MVT DestVT = DestEVT.getSimpleVT(); |
3961 | |
3962 | if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 && |
3963 | SrcVT != MVT::i8) |
3964 | return false; |
3965 | if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 && |
3966 | DestVT != MVT::i1) |
3967 | return false; |
3968 | |
3969 | Register SrcReg = getRegForValue(V: Op); |
3970 | if (!SrcReg) |
3971 | return false; |
3972 | |
3973 | // If we're truncating from i64 to a smaller non-legal type then generate an |
3974 | // AND. Otherwise, we know the high bits are undefined and a truncate only |
3975 | // generate a COPY. We cannot mark the source register also as result |
3976 | // register, because this can incorrectly transfer the kill flag onto the |
3977 | // source register. |
3978 | Register ResultReg; |
3979 | if (SrcVT == MVT::i64) { |
3980 | uint64_t Mask = 0; |
3981 | switch (DestVT.SimpleTy) { |
3982 | default: |
3983 | // Trunc i64 to i32 is handled by the target-independent fast-isel. |
3984 | return false; |
3985 | case MVT::i1: |
3986 | Mask = 0x1; |
3987 | break; |
3988 | case MVT::i8: |
3989 | Mask = 0xff; |
3990 | break; |
3991 | case MVT::i16: |
3992 | Mask = 0xffff; |
3993 | break; |
3994 | } |
3995 | // Issue an extract_subreg to get the lower 32-bits. |
3996 | Register Reg32 = fastEmitInst_extractsubreg(RetVT: MVT::i32, Op0: SrcReg, |
3997 | Idx: AArch64::sub_32); |
3998 | // Create the AND instruction which performs the actual truncation. |
3999 | ResultReg = emitAnd_ri(RetVT: MVT::i32, LHSReg: Reg32, Imm: Mask); |
4000 | assert(ResultReg && "Unexpected AND instruction emission failure." ); |
4001 | } else { |
4002 | ResultReg = createResultReg(RC: &AArch64::GPR32RegClass); |
4003 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, |
4004 | MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: ResultReg) |
4005 | .addReg(RegNo: SrcReg); |
4006 | } |
4007 | |
4008 | updateValueMap(I, Reg: ResultReg); |
4009 | return true; |
4010 | } |
4011 | |
4012 | Register AArch64FastISel::emiti1Ext(Register SrcReg, MVT DestVT, bool IsZExt) { |
4013 | assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 || |
4014 | DestVT == MVT::i64) && |
4015 | "Unexpected value type." ); |
4016 | // Handle i8 and i16 as i32. |
4017 | if (DestVT == MVT::i8 || DestVT == MVT::i16) |
4018 | DestVT = MVT::i32; |
4019 | |
4020 | if (IsZExt) { |
4021 | Register ResultReg = emitAnd_ri(RetVT: MVT::i32, LHSReg: SrcReg, Imm: 1); |
4022 | assert(ResultReg && "Unexpected AND instruction emission failure." ); |
4023 | if (DestVT == MVT::i64) { |
4024 | // We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the |
4025 | // upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd. |
4026 | Register Reg64 = MRI.createVirtualRegister(RegClass: &AArch64::GPR64RegClass); |
4027 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, |
4028 | MCID: TII.get(Opcode: AArch64::SUBREG_TO_REG), DestReg: Reg64) |
4029 | .addImm(Val: 0) |
4030 | .addReg(RegNo: ResultReg) |
4031 | .addImm(Val: AArch64::sub_32); |
4032 | ResultReg = Reg64; |
4033 | } |
4034 | return ResultReg; |
4035 | } else { |
4036 | if (DestVT == MVT::i64) { |
4037 | // FIXME: We're SExt i1 to i64. |
4038 | return Register(); |
4039 | } |
4040 | return fastEmitInst_rii(MachineInstOpcode: AArch64::SBFMWri, RC: &AArch64::GPR32RegClass, Op0: SrcReg, |
4041 | Imm1: 0, Imm2: 0); |
4042 | } |
4043 | } |
4044 | |
4045 | Register AArch64FastISel::emitMul_rr(MVT RetVT, Register Op0, Register Op1) { |
4046 | unsigned Opc; |
4047 | Register ZReg; |
4048 | switch (RetVT.SimpleTy) { |
4049 | default: |
4050 | return Register(); |
4051 | case MVT::i8: |
4052 | case MVT::i16: |
4053 | case MVT::i32: |
4054 | RetVT = MVT::i32; |
4055 | Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break; |
4056 | case MVT::i64: |
4057 | Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break; |
4058 | } |
4059 | |
4060 | const TargetRegisterClass *RC = |
4061 | (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; |
4062 | return fastEmitInst_rrr(MachineInstOpcode: Opc, RC, Op0, Op1, Op2: ZReg); |
4063 | } |
4064 | |
4065 | Register AArch64FastISel::emitSMULL_rr(MVT RetVT, Register Op0, Register Op1) { |
4066 | if (RetVT != MVT::i64) |
4067 | return Register(); |
4068 | |
4069 | return fastEmitInst_rrr(MachineInstOpcode: AArch64::SMADDLrrr, RC: &AArch64::GPR64RegClass, |
4070 | Op0, Op1, Op2: AArch64::XZR); |
4071 | } |
4072 | |
4073 | Register AArch64FastISel::emitUMULL_rr(MVT RetVT, Register Op0, Register Op1) { |
4074 | if (RetVT != MVT::i64) |
4075 | return Register(); |
4076 | |
4077 | return fastEmitInst_rrr(MachineInstOpcode: AArch64::UMADDLrrr, RC: &AArch64::GPR64RegClass, |
4078 | Op0, Op1, Op2: AArch64::XZR); |
4079 | } |
4080 | |
4081 | Register AArch64FastISel::emitLSL_rr(MVT RetVT, Register Op0Reg, |
4082 | Register Op1Reg) { |
4083 | unsigned Opc = 0; |
4084 | bool NeedTrunc = false; |
4085 | uint64_t Mask = 0; |
4086 | switch (RetVT.SimpleTy) { |
4087 | default: |
4088 | return Register(); |
4089 | case MVT::i8: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff; break; |
4090 | case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break; |
4091 | case MVT::i32: Opc = AArch64::LSLVWr; break; |
4092 | case MVT::i64: Opc = AArch64::LSLVXr; break; |
4093 | } |
4094 | |
4095 | const TargetRegisterClass *RC = |
4096 | (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; |
4097 | if (NeedTrunc) |
4098 | Op1Reg = emitAnd_ri(RetVT: MVT::i32, LHSReg: Op1Reg, Imm: Mask); |
4099 | |
4100 | Register ResultReg = fastEmitInst_rr(MachineInstOpcode: Opc, RC, Op0: Op0Reg, Op1: Op1Reg); |
4101 | if (NeedTrunc) |
4102 | ResultReg = emitAnd_ri(RetVT: MVT::i32, LHSReg: ResultReg, Imm: Mask); |
4103 | return ResultReg; |
4104 | } |
4105 | |
4106 | Register AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, Register Op0, |
4107 | uint64_t Shift, bool IsZExt) { |
4108 | assert(RetVT.SimpleTy >= SrcVT.SimpleTy && |
4109 | "Unexpected source/return type pair." ); |
4110 | assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 || |
4111 | SrcVT == MVT::i32 || SrcVT == MVT::i64) && |
4112 | "Unexpected source value type." ); |
4113 | assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 || |
4114 | RetVT == MVT::i64) && "Unexpected return value type." ); |
4115 | |
4116 | bool Is64Bit = (RetVT == MVT::i64); |
4117 | unsigned RegSize = Is64Bit ? 64 : 32; |
4118 | unsigned DstBits = RetVT.getSizeInBits(); |
4119 | unsigned SrcBits = SrcVT.getSizeInBits(); |
4120 | const TargetRegisterClass *RC = |
4121 | Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; |
4122 | |
4123 | // Just emit a copy for "zero" shifts. |
4124 | if (Shift == 0) { |
4125 | if (RetVT == SrcVT) { |
4126 | Register ResultReg = createResultReg(RC); |
4127 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, |
4128 | MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: ResultReg) |
4129 | .addReg(RegNo: Op0); |
4130 | return ResultReg; |
4131 | } else |
4132 | return emitIntExt(SrcVT, SrcReg: Op0, DestVT: RetVT, isZExt: IsZExt); |
4133 | } |
4134 | |
4135 | // Don't deal with undefined shifts. |
4136 | if (Shift >= DstBits) |
4137 | return Register(); |
4138 | |
4139 | // For immediate shifts we can fold the zero-/sign-extension into the shift. |
4140 | // {S|U}BFM Wd, Wn, #r, #s |
4141 | // Wd<32+s-r,32-r> = Wn<s:0> when r > s |
4142 | |
4143 | // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 |
4144 | // %2 = shl i16 %1, 4 |
4145 | // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7 |
4146 | // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext |
4147 | // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext |
4148 | // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext |
4149 | |
4150 | // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 |
4151 | // %2 = shl i16 %1, 8 |
4152 | // Wd<32+7-24,32-24> = Wn<7:0> |
4153 | // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext |
4154 | // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext |
4155 | // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext |
4156 | |
4157 | // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 |
4158 | // %2 = shl i16 %1, 12 |
4159 | // Wd<32+3-20,32-20> = Wn<3:0> |
4160 | // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext |
4161 | // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext |
4162 | // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext |
4163 | |
4164 | unsigned ImmR = RegSize - Shift; |
4165 | // Limit the width to the length of the source type. |
4166 | unsigned ImmS = std::min<unsigned>(a: SrcBits - 1, b: DstBits - 1 - Shift); |
4167 | static const unsigned OpcTable[2][2] = { |
4168 | {AArch64::SBFMWri, AArch64::SBFMXri}, |
4169 | {AArch64::UBFMWri, AArch64::UBFMXri} |
4170 | }; |
4171 | unsigned Opc = OpcTable[IsZExt][Is64Bit]; |
4172 | if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { |
4173 | Register TmpReg = MRI.createVirtualRegister(RegClass: RC); |
4174 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, |
4175 | MCID: TII.get(Opcode: AArch64::SUBREG_TO_REG), DestReg: TmpReg) |
4176 | .addImm(Val: 0) |
4177 | .addReg(RegNo: Op0) |
4178 | .addImm(Val: AArch64::sub_32); |
4179 | Op0 = TmpReg; |
4180 | } |
4181 | return fastEmitInst_rii(MachineInstOpcode: Opc, RC, Op0, Imm1: ImmR, Imm2: ImmS); |
4182 | } |
4183 | |
4184 | Register AArch64FastISel::emitLSR_rr(MVT RetVT, Register Op0Reg, |
4185 | Register Op1Reg) { |
4186 | unsigned Opc = 0; |
4187 | bool NeedTrunc = false; |
4188 | uint64_t Mask = 0; |
4189 | switch (RetVT.SimpleTy) { |
4190 | default: |
4191 | return Register(); |
4192 | case MVT::i8: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff; break; |
4193 | case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break; |
4194 | case MVT::i32: Opc = AArch64::LSRVWr; break; |
4195 | case MVT::i64: Opc = AArch64::LSRVXr; break; |
4196 | } |
4197 | |
4198 | const TargetRegisterClass *RC = |
4199 | (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; |
4200 | if (NeedTrunc) { |
4201 | Op0Reg = emitAnd_ri(RetVT: MVT::i32, LHSReg: Op0Reg, Imm: Mask); |
4202 | Op1Reg = emitAnd_ri(RetVT: MVT::i32, LHSReg: Op1Reg, Imm: Mask); |
4203 | } |
4204 | Register ResultReg = fastEmitInst_rr(MachineInstOpcode: Opc, RC, Op0: Op0Reg, Op1: Op1Reg); |
4205 | if (NeedTrunc) |
4206 | ResultReg = emitAnd_ri(RetVT: MVT::i32, LHSReg: ResultReg, Imm: Mask); |
4207 | return ResultReg; |
4208 | } |
4209 | |
4210 | Register AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, Register Op0, |
4211 | uint64_t Shift, bool IsZExt) { |
4212 | assert(RetVT.SimpleTy >= SrcVT.SimpleTy && |
4213 | "Unexpected source/return type pair." ); |
4214 | assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 || |
4215 | SrcVT == MVT::i32 || SrcVT == MVT::i64) && |
4216 | "Unexpected source value type." ); |
4217 | assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 || |
4218 | RetVT == MVT::i64) && "Unexpected return value type." ); |
4219 | |
4220 | bool Is64Bit = (RetVT == MVT::i64); |
4221 | unsigned RegSize = Is64Bit ? 64 : 32; |
4222 | unsigned DstBits = RetVT.getSizeInBits(); |
4223 | unsigned SrcBits = SrcVT.getSizeInBits(); |
4224 | const TargetRegisterClass *RC = |
4225 | Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; |
4226 | |
4227 | // Just emit a copy for "zero" shifts. |
4228 | if (Shift == 0) { |
4229 | if (RetVT == SrcVT) { |
4230 | Register ResultReg = createResultReg(RC); |
4231 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, |
4232 | MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: ResultReg) |
4233 | .addReg(RegNo: Op0); |
4234 | return ResultReg; |
4235 | } else |
4236 | return emitIntExt(SrcVT, SrcReg: Op0, DestVT: RetVT, isZExt: IsZExt); |
4237 | } |
4238 | |
4239 | // Don't deal with undefined shifts. |
4240 | if (Shift >= DstBits) |
4241 | return Register(); |
4242 | |
4243 | // For immediate shifts we can fold the zero-/sign-extension into the shift. |
4244 | // {S|U}BFM Wd, Wn, #r, #s |
4245 | // Wd<s-r:0> = Wn<s:r> when r <= s |
4246 | |
4247 | // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 |
4248 | // %2 = lshr i16 %1, 4 |
4249 | // Wd<7-4:0> = Wn<7:4> |
4250 | // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext |
4251 | // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext |
4252 | // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext |
4253 | |
4254 | // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 |
4255 | // %2 = lshr i16 %1, 8 |
4256 | // Wd<7-7,0> = Wn<7:7> |
4257 | // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext |
4258 | // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext |
4259 | // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext |
4260 | |
4261 | // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 |
4262 | // %2 = lshr i16 %1, 12 |
4263 | // Wd<7-7,0> = Wn<7:7> <- clamp r to 7 |
4264 | // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext |
4265 | // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext |
4266 | // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext |
4267 | |
4268 | if (Shift >= SrcBits && IsZExt) |
4269 | return materializeInt(CI: ConstantInt::get(Context&: *Context, V: APInt(RegSize, 0)), VT: RetVT); |
4270 | |
4271 | // It is not possible to fold a sign-extend into the LShr instruction. In this |
4272 | // case emit a sign-extend. |
4273 | if (!IsZExt) { |
4274 | Op0 = emitIntExt(SrcVT, SrcReg: Op0, DestVT: RetVT, isZExt: IsZExt); |
4275 | if (!Op0) |
4276 | return Register(); |
4277 | SrcVT = RetVT; |
4278 | SrcBits = SrcVT.getSizeInBits(); |
4279 | IsZExt = true; |
4280 | } |
4281 | |
4282 | unsigned ImmR = std::min<unsigned>(a: SrcBits - 1, b: Shift); |
4283 | unsigned ImmS = SrcBits - 1; |
4284 | static const unsigned OpcTable[2][2] = { |
4285 | {AArch64::SBFMWri, AArch64::SBFMXri}, |
4286 | {AArch64::UBFMWri, AArch64::UBFMXri} |
4287 | }; |
4288 | unsigned Opc = OpcTable[IsZExt][Is64Bit]; |
4289 | if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { |
4290 | Register TmpReg = MRI.createVirtualRegister(RegClass: RC); |
4291 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, |
4292 | MCID: TII.get(Opcode: AArch64::SUBREG_TO_REG), DestReg: TmpReg) |
4293 | .addImm(Val: 0) |
4294 | .addReg(RegNo: Op0) |
4295 | .addImm(Val: AArch64::sub_32); |
4296 | Op0 = TmpReg; |
4297 | } |
4298 | return fastEmitInst_rii(MachineInstOpcode: Opc, RC, Op0, Imm1: ImmR, Imm2: ImmS); |
4299 | } |
4300 | |
4301 | Register AArch64FastISel::emitASR_rr(MVT RetVT, Register Op0Reg, |
4302 | Register Op1Reg) { |
4303 | unsigned Opc = 0; |
4304 | bool NeedTrunc = false; |
4305 | uint64_t Mask = 0; |
4306 | switch (RetVT.SimpleTy) { |
4307 | default: |
4308 | return Register(); |
4309 | case MVT::i8: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff; break; |
4310 | case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break; |
4311 | case MVT::i32: Opc = AArch64::ASRVWr; break; |
4312 | case MVT::i64: Opc = AArch64::ASRVXr; break; |
4313 | } |
4314 | |
4315 | const TargetRegisterClass *RC = |
4316 | (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; |
4317 | if (NeedTrunc) { |
4318 | Op0Reg = emitIntExt(SrcVT: RetVT, SrcReg: Op0Reg, DestVT: MVT::i32, /*isZExt=*/false); |
4319 | Op1Reg = emitAnd_ri(RetVT: MVT::i32, LHSReg: Op1Reg, Imm: Mask); |
4320 | } |
4321 | Register ResultReg = fastEmitInst_rr(MachineInstOpcode: Opc, RC, Op0: Op0Reg, Op1: Op1Reg); |
4322 | if (NeedTrunc) |
4323 | ResultReg = emitAnd_ri(RetVT: MVT::i32, LHSReg: ResultReg, Imm: Mask); |
4324 | return ResultReg; |
4325 | } |
4326 | |
4327 | Register AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, Register Op0, |
4328 | uint64_t Shift, bool IsZExt) { |
4329 | assert(RetVT.SimpleTy >= SrcVT.SimpleTy && |
4330 | "Unexpected source/return type pair." ); |
4331 | assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 || |
4332 | SrcVT == MVT::i32 || SrcVT == MVT::i64) && |
4333 | "Unexpected source value type." ); |
4334 | assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 || |
4335 | RetVT == MVT::i64) && "Unexpected return value type." ); |
4336 | |
4337 | bool Is64Bit = (RetVT == MVT::i64); |
4338 | unsigned RegSize = Is64Bit ? 64 : 32; |
4339 | unsigned DstBits = RetVT.getSizeInBits(); |
4340 | unsigned SrcBits = SrcVT.getSizeInBits(); |
4341 | const TargetRegisterClass *RC = |
4342 | Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; |
4343 | |
4344 | // Just emit a copy for "zero" shifts. |
4345 | if (Shift == 0) { |
4346 | if (RetVT == SrcVT) { |
4347 | Register ResultReg = createResultReg(RC); |
4348 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, |
4349 | MCID: TII.get(Opcode: TargetOpcode::COPY), DestReg: ResultReg) |
4350 | .addReg(RegNo: Op0); |
4351 | return ResultReg; |
4352 | } else |
4353 | return emitIntExt(SrcVT, SrcReg: Op0, DestVT: RetVT, isZExt: IsZExt); |
4354 | } |
4355 | |
4356 | // Don't deal with undefined shifts. |
4357 | if (Shift >= DstBits) |
4358 | return Register(); |
4359 | |
4360 | // For immediate shifts we can fold the zero-/sign-extension into the shift. |
4361 | // {S|U}BFM Wd, Wn, #r, #s |
4362 | // Wd<s-r:0> = Wn<s:r> when r <= s |
4363 | |
4364 | // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 |
4365 | // %2 = ashr i16 %1, 4 |
4366 | // Wd<7-4:0> = Wn<7:4> |
4367 | // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext |
4368 | // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext |
4369 | // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext |
4370 | |
4371 | // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 |
4372 | // %2 = ashr i16 %1, 8 |
4373 | // Wd<7-7,0> = Wn<7:7> |
4374 | // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext |
4375 | // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext |
4376 | // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext |
4377 | |
4378 | // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 |
4379 | // %2 = ashr i16 %1, 12 |
4380 | // Wd<7-7,0> = Wn<7:7> <- clamp r to 7 |
4381 | // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext |
4382 | // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext |
4383 | // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext |
4384 | |
4385 | if (Shift >= SrcBits && IsZExt) |
4386 | return materializeInt(CI: ConstantInt::get(Context&: *Context, V: APInt(RegSize, 0)), VT: RetVT); |
4387 | |
4388 | unsigned ImmR = std::min<unsigned>(a: SrcBits - 1, b: Shift); |
4389 | unsigned ImmS = SrcBits - 1; |
4390 | static const unsigned OpcTable[2][2] = { |
4391 | {AArch64::SBFMWri, AArch64::SBFMXri}, |
4392 | {AArch64::UBFMWri, AArch64::UBFMXri} |
4393 | }; |
4394 | unsigned Opc = OpcTable[IsZExt][Is64Bit]; |
4395 | if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { |
4396 | Register TmpReg = MRI.createVirtualRegister(RegClass: RC); |
4397 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, |
4398 | MCID: TII.get(Opcode: AArch64::SUBREG_TO_REG), DestReg: TmpReg) |
4399 | .addImm(Val: 0) |
4400 | .addReg(RegNo: Op0) |
4401 | .addImm(Val: AArch64::sub_32); |
4402 | Op0 = TmpReg; |
4403 | } |
4404 | return fastEmitInst_rii(MachineInstOpcode: Opc, RC, Op0, Imm1: ImmR, Imm2: ImmS); |
4405 | } |
4406 | |
4407 | Register AArch64FastISel::emitIntExt(MVT SrcVT, Register SrcReg, MVT DestVT, |
4408 | bool IsZExt) { |
4409 | assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?" ); |
4410 | |
4411 | // FastISel does not have plumbing to deal with extensions where the SrcVT or |
4412 | // DestVT are odd things, so test to make sure that they are both types we can |
4413 | // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise |
4414 | // bail out to SelectionDAG. |
4415 | if (((DestVT != MVT::i8) && (DestVT != MVT::i16) && |
4416 | (DestVT != MVT::i32) && (DestVT != MVT::i64)) || |
4417 | ((SrcVT != MVT::i1) && (SrcVT != MVT::i8) && |
4418 | (SrcVT != MVT::i16) && (SrcVT != MVT::i32))) |
4419 | return Register(); |
4420 | |
4421 | unsigned Opc; |
4422 | unsigned Imm = 0; |
4423 | |
4424 | switch (SrcVT.SimpleTy) { |
4425 | default: |
4426 | return Register(); |
4427 | case MVT::i1: |
4428 | return emiti1Ext(SrcReg, DestVT, IsZExt); |
4429 | case MVT::i8: |
4430 | if (DestVT == MVT::i64) |
4431 | Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri; |
4432 | else |
4433 | Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri; |
4434 | Imm = 7; |
4435 | break; |
4436 | case MVT::i16: |
4437 | if (DestVT == MVT::i64) |
4438 | Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri; |
4439 | else |
4440 | Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri; |
4441 | Imm = 15; |
4442 | break; |
4443 | case MVT::i32: |
4444 | assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?" ); |
4445 | Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri; |
4446 | Imm = 31; |
4447 | break; |
4448 | } |
4449 | |
4450 | // Handle i8 and i16 as i32. |
4451 | if (DestVT == MVT::i8 || DestVT == MVT::i16) |
4452 | DestVT = MVT::i32; |
4453 | else if (DestVT == MVT::i64) { |
4454 | Register Src64 = MRI.createVirtualRegister(RegClass: &AArch64::GPR64RegClass); |
4455 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, |
4456 | MCID: TII.get(Opcode: AArch64::SUBREG_TO_REG), DestReg: Src64) |
4457 | .addImm(Val: 0) |
4458 | .addReg(RegNo: SrcReg) |
4459 | .addImm(Val: AArch64::sub_32); |
4460 | SrcReg = Src64; |
4461 | } |
4462 | |
4463 | const TargetRegisterClass *RC = |
4464 | (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; |
4465 | return fastEmitInst_rii(MachineInstOpcode: Opc, RC, Op0: SrcReg, Imm1: 0, Imm2: Imm); |
4466 | } |
4467 | |
4468 | static bool isZExtLoad(const MachineInstr *LI) { |
4469 | switch (LI->getOpcode()) { |
4470 | default: |
4471 | return false; |
4472 | case AArch64::LDURBBi: |
4473 | case AArch64::LDURHHi: |
4474 | case AArch64::LDURWi: |
4475 | case AArch64::LDRBBui: |
4476 | case AArch64::LDRHHui: |
4477 | case AArch64::LDRWui: |
4478 | case AArch64::LDRBBroX: |
4479 | case AArch64::LDRHHroX: |
4480 | case AArch64::LDRWroX: |
4481 | case AArch64::LDRBBroW: |
4482 | case AArch64::LDRHHroW: |
4483 | case AArch64::LDRWroW: |
4484 | return true; |
4485 | } |
4486 | } |
4487 | |
4488 | static bool isSExtLoad(const MachineInstr *LI) { |
4489 | switch (LI->getOpcode()) { |
4490 | default: |
4491 | return false; |
4492 | case AArch64::LDURSBWi: |
4493 | case AArch64::LDURSHWi: |
4494 | case AArch64::LDURSBXi: |
4495 | case AArch64::LDURSHXi: |
4496 | case AArch64::LDURSWi: |
4497 | case AArch64::LDRSBWui: |
4498 | case AArch64::LDRSHWui: |
4499 | case AArch64::LDRSBXui: |
4500 | case AArch64::LDRSHXui: |
4501 | case AArch64::LDRSWui: |
4502 | case AArch64::LDRSBWroX: |
4503 | case AArch64::LDRSHWroX: |
4504 | case AArch64::LDRSBXroX: |
4505 | case AArch64::LDRSHXroX: |
4506 | case AArch64::LDRSWroX: |
4507 | case AArch64::LDRSBWroW: |
4508 | case AArch64::LDRSHWroW: |
4509 | case AArch64::LDRSBXroW: |
4510 | case AArch64::LDRSHXroW: |
4511 | case AArch64::LDRSWroW: |
4512 | return true; |
4513 | } |
4514 | } |
4515 | |
4516 | bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT, |
4517 | MVT SrcVT) { |
4518 | const auto *LI = dyn_cast<LoadInst>(Val: I->getOperand(i: 0)); |
4519 | if (!LI || !LI->hasOneUse()) |
4520 | return false; |
4521 | |
4522 | // Check if the load instruction has already been selected. |
4523 | Register Reg = lookUpRegForValue(V: LI); |
4524 | if (!Reg) |
4525 | return false; |
4526 | |
4527 | MachineInstr *MI = MRI.getUniqueVRegDef(Reg); |
4528 | if (!MI) |
4529 | return false; |
4530 | |
4531 | // Check if the correct load instruction has been emitted - SelectionDAG might |
4532 | // have emitted a zero-extending load, but we need a sign-extending load. |
4533 | bool IsZExt = isa<ZExtInst>(Val: I); |
4534 | const auto *LoadMI = MI; |
4535 | if (LoadMI->getOpcode() == TargetOpcode::COPY && |
4536 | LoadMI->getOperand(i: 1).getSubReg() == AArch64::sub_32) { |
4537 | Register LoadReg = MI->getOperand(i: 1).getReg(); |
4538 | LoadMI = MRI.getUniqueVRegDef(Reg: LoadReg); |
4539 | assert(LoadMI && "Expected valid instruction" ); |
4540 | } |
4541 | if (!(IsZExt && isZExtLoad(LI: LoadMI)) && !(!IsZExt && isSExtLoad(LI: LoadMI))) |
4542 | return false; |
4543 | |
4544 | // Nothing to be done. |
4545 | if (RetVT != MVT::i64 || SrcVT > MVT::i32) { |
4546 | updateValueMap(I, Reg); |
4547 | return true; |
4548 | } |
4549 | |
4550 | if (IsZExt) { |
4551 | Register Reg64 = createResultReg(RC: &AArch64::GPR64RegClass); |
4552 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, |
4553 | MCID: TII.get(Opcode: AArch64::SUBREG_TO_REG), DestReg: Reg64) |
4554 | .addImm(Val: 0) |
4555 | .addReg(RegNo: Reg, flags: getKillRegState(B: true)) |
4556 | .addImm(Val: AArch64::sub_32); |
4557 | Reg = Reg64; |
4558 | } else { |
4559 | assert((MI->getOpcode() == TargetOpcode::COPY && |
4560 | MI->getOperand(1).getSubReg() == AArch64::sub_32) && |
4561 | "Expected copy instruction" ); |
4562 | Reg = MI->getOperand(i: 1).getReg(); |
4563 | MachineBasicBlock::iterator I(MI); |
4564 | removeDeadCode(I, E: std::next(x: I)); |
4565 | } |
4566 | updateValueMap(I, Reg); |
4567 | return true; |
4568 | } |
4569 | |
4570 | bool AArch64FastISel::selectIntExt(const Instruction *I) { |
4571 | assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) && |
4572 | "Unexpected integer extend instruction." ); |
4573 | MVT RetVT; |
4574 | MVT SrcVT; |
4575 | if (!isTypeSupported(Ty: I->getType(), VT&: RetVT)) |
4576 | return false; |
4577 | |
4578 | if (!isTypeSupported(Ty: I->getOperand(i: 0)->getType(), VT&: SrcVT)) |
4579 | return false; |
4580 | |
4581 | // Try to optimize already sign-/zero-extended values from load instructions. |
4582 | if (optimizeIntExtLoad(I, RetVT, SrcVT)) |
4583 | return true; |
4584 | |
4585 | Register SrcReg = getRegForValue(V: I->getOperand(i: 0)); |
4586 | if (!SrcReg) |
4587 | return false; |
4588 | |
4589 | // Try to optimize already sign-/zero-extended values from function arguments. |
4590 | bool IsZExt = isa<ZExtInst>(Val: I); |
4591 | if (const auto *Arg = dyn_cast<Argument>(Val: I->getOperand(i: 0))) { |
4592 | if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) { |
4593 | if (RetVT == MVT::i64 && SrcVT != MVT::i64) { |
4594 | Register ResultReg = createResultReg(RC: &AArch64::GPR64RegClass); |
4595 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, |
4596 | MCID: TII.get(Opcode: AArch64::SUBREG_TO_REG), DestReg: ResultReg) |
4597 | .addImm(Val: 0) |
4598 | .addReg(RegNo: SrcReg) |
4599 | .addImm(Val: AArch64::sub_32); |
4600 | SrcReg = ResultReg; |
4601 | } |
4602 | |
4603 | updateValueMap(I, Reg: SrcReg); |
4604 | return true; |
4605 | } |
4606 | } |
4607 | |
4608 | Register ResultReg = emitIntExt(SrcVT, SrcReg, DestVT: RetVT, IsZExt); |
4609 | if (!ResultReg) |
4610 | return false; |
4611 | |
4612 | updateValueMap(I, Reg: ResultReg); |
4613 | return true; |
4614 | } |
4615 | |
4616 | bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) { |
4617 | EVT DestEVT = TLI.getValueType(DL, Ty: I->getType(), AllowUnknown: true); |
4618 | if (!DestEVT.isSimple()) |
4619 | return false; |
4620 | |
4621 | MVT DestVT = DestEVT.getSimpleVT(); |
4622 | if (DestVT != MVT::i64 && DestVT != MVT::i32) |
4623 | return false; |
4624 | |
4625 | unsigned DivOpc; |
4626 | bool Is64bit = (DestVT == MVT::i64); |
4627 | switch (ISDOpcode) { |
4628 | default: |
4629 | return false; |
4630 | case ISD::SREM: |
4631 | DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr; |
4632 | break; |
4633 | case ISD::UREM: |
4634 | DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr; |
4635 | break; |
4636 | } |
4637 | unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr; |
4638 | Register Src0Reg = getRegForValue(V: I->getOperand(i: 0)); |
4639 | if (!Src0Reg) |
4640 | return false; |
4641 | |
4642 | Register Src1Reg = getRegForValue(V: I->getOperand(i: 1)); |
4643 | if (!Src1Reg) |
4644 | return false; |
4645 | |
4646 | const TargetRegisterClass *RC = |
4647 | (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; |
4648 | Register QuotReg = fastEmitInst_rr(MachineInstOpcode: DivOpc, RC, Op0: Src0Reg, Op1: Src1Reg); |
4649 | assert(QuotReg && "Unexpected DIV instruction emission failure." ); |
4650 | // The remainder is computed as numerator - (quotient * denominator) using the |
4651 | // MSUB instruction. |
4652 | Register ResultReg = fastEmitInst_rrr(MachineInstOpcode: MSubOpc, RC, Op0: QuotReg, Op1: Src1Reg, Op2: Src0Reg); |
4653 | updateValueMap(I, Reg: ResultReg); |
4654 | return true; |
4655 | } |
4656 | |
4657 | bool AArch64FastISel::selectMul(const Instruction *I) { |
4658 | MVT VT; |
4659 | if (!isTypeSupported(Ty: I->getType(), VT, /*IsVectorAllowed=*/true)) |
4660 | return false; |
4661 | |
4662 | if (VT.isVector()) |
4663 | return selectBinaryOp(I, ISDOpcode: ISD::MUL); |
4664 | |
4665 | const Value *Src0 = I->getOperand(i: 0); |
4666 | const Value *Src1 = I->getOperand(i: 1); |
4667 | if (const auto *C = dyn_cast<ConstantInt>(Val: Src0)) |
4668 | if (C->getValue().isPowerOf2()) |
4669 | std::swap(a&: Src0, b&: Src1); |
4670 | |
4671 | // Try to simplify to a shift instruction. |
4672 | if (const auto *C = dyn_cast<ConstantInt>(Val: Src1)) |
4673 | if (C->getValue().isPowerOf2()) { |
4674 | uint64_t ShiftVal = C->getValue().logBase2(); |
4675 | MVT SrcVT = VT; |
4676 | bool IsZExt = true; |
4677 | if (const auto *ZExt = dyn_cast<ZExtInst>(Val: Src0)) { |
4678 | if (!isIntExtFree(I: ZExt)) { |
4679 | MVT VT; |
4680 | if (isValueAvailable(V: ZExt) && isTypeSupported(Ty: ZExt->getSrcTy(), VT)) { |
4681 | SrcVT = VT; |
4682 | IsZExt = true; |
4683 | Src0 = ZExt->getOperand(i_nocapture: 0); |
4684 | } |
4685 | } |
4686 | } else if (const auto *SExt = dyn_cast<SExtInst>(Val: Src0)) { |
4687 | if (!isIntExtFree(I: SExt)) { |
4688 | MVT VT; |
4689 | if (isValueAvailable(V: SExt) && isTypeSupported(Ty: SExt->getSrcTy(), VT)) { |
4690 | SrcVT = VT; |
4691 | IsZExt = false; |
4692 | Src0 = SExt->getOperand(i_nocapture: 0); |
4693 | } |
4694 | } |
4695 | } |
4696 | |
4697 | Register Src0Reg = getRegForValue(V: Src0); |
4698 | if (!Src0Reg) |
4699 | return false; |
4700 | |
4701 | Register ResultReg = emitLSL_ri(RetVT: VT, SrcVT, Op0: Src0Reg, Shift: ShiftVal, IsZExt); |
4702 | |
4703 | if (ResultReg) { |
4704 | updateValueMap(I, Reg: ResultReg); |
4705 | return true; |
4706 | } |
4707 | } |
4708 | |
4709 | Register Src0Reg = getRegForValue(V: I->getOperand(i: 0)); |
4710 | if (!Src0Reg) |
4711 | return false; |
4712 | |
4713 | Register Src1Reg = getRegForValue(V: I->getOperand(i: 1)); |
4714 | if (!Src1Reg) |
4715 | return false; |
4716 | |
4717 | Register ResultReg = emitMul_rr(RetVT: VT, Op0: Src0Reg, Op1: Src1Reg); |
4718 | |
4719 | if (!ResultReg) |
4720 | return false; |
4721 | |
4722 | updateValueMap(I, Reg: ResultReg); |
4723 | return true; |
4724 | } |
4725 | |
4726 | bool AArch64FastISel::selectShift(const Instruction *I) { |
4727 | MVT RetVT; |
4728 | if (!isTypeSupported(Ty: I->getType(), VT&: RetVT, /*IsVectorAllowed=*/true)) |
4729 | return false; |
4730 | |
4731 | if (RetVT.isVector()) |
4732 | return selectOperator(I, Opcode: I->getOpcode()); |
4733 | |
4734 | if (const auto *C = dyn_cast<ConstantInt>(Val: I->getOperand(i: 1))) { |
4735 | Register ResultReg; |
4736 | uint64_t ShiftVal = C->getZExtValue(); |
4737 | MVT SrcVT = RetVT; |
4738 | bool IsZExt = I->getOpcode() != Instruction::AShr; |
4739 | const Value *Op0 = I->getOperand(i: 0); |
4740 | if (const auto *ZExt = dyn_cast<ZExtInst>(Val: Op0)) { |
4741 | if (!isIntExtFree(I: ZExt)) { |
4742 | MVT TmpVT; |
4743 | if (isValueAvailable(V: ZExt) && isTypeSupported(Ty: ZExt->getSrcTy(), VT&: TmpVT)) { |
4744 | SrcVT = TmpVT; |
4745 | IsZExt = true; |
4746 | Op0 = ZExt->getOperand(i_nocapture: 0); |
4747 | } |
4748 | } |
4749 | } else if (const auto *SExt = dyn_cast<SExtInst>(Val: Op0)) { |
4750 | if (!isIntExtFree(I: SExt)) { |
4751 | MVT TmpVT; |
4752 | if (isValueAvailable(V: SExt) && isTypeSupported(Ty: SExt->getSrcTy(), VT&: TmpVT)) { |
4753 | SrcVT = TmpVT; |
4754 | IsZExt = false; |
4755 | Op0 = SExt->getOperand(i_nocapture: 0); |
4756 | } |
4757 | } |
4758 | } |
4759 | |
4760 | Register Op0Reg = getRegForValue(V: Op0); |
4761 | if (!Op0Reg) |
4762 | return false; |
4763 | |
4764 | switch (I->getOpcode()) { |
4765 | default: llvm_unreachable("Unexpected instruction." ); |
4766 | case Instruction::Shl: |
4767 | ResultReg = emitLSL_ri(RetVT, SrcVT, Op0: Op0Reg, Shift: ShiftVal, IsZExt); |
4768 | break; |
4769 | case Instruction::AShr: |
4770 | ResultReg = emitASR_ri(RetVT, SrcVT, Op0: Op0Reg, Shift: ShiftVal, IsZExt); |
4771 | break; |
4772 | case Instruction::LShr: |
4773 | ResultReg = emitLSR_ri(RetVT, SrcVT, Op0: Op0Reg, Shift: ShiftVal, IsZExt); |
4774 | break; |
4775 | } |
4776 | if (!ResultReg) |
4777 | return false; |
4778 | |
4779 | updateValueMap(I, Reg: ResultReg); |
4780 | return true; |
4781 | } |
4782 | |
4783 | Register Op0Reg = getRegForValue(V: I->getOperand(i: 0)); |
4784 | if (!Op0Reg) |
4785 | return false; |
4786 | |
4787 | Register Op1Reg = getRegForValue(V: I->getOperand(i: 1)); |
4788 | if (!Op1Reg) |
4789 | return false; |
4790 | |
4791 | Register ResultReg; |
4792 | switch (I->getOpcode()) { |
4793 | default: llvm_unreachable("Unexpected instruction." ); |
4794 | case Instruction::Shl: |
4795 | ResultReg = emitLSL_rr(RetVT, Op0Reg, Op1Reg); |
4796 | break; |
4797 | case Instruction::AShr: |
4798 | ResultReg = emitASR_rr(RetVT, Op0Reg, Op1Reg); |
4799 | break; |
4800 | case Instruction::LShr: |
4801 | ResultReg = emitLSR_rr(RetVT, Op0Reg, Op1Reg); |
4802 | break; |
4803 | } |
4804 | |
4805 | if (!ResultReg) |
4806 | return false; |
4807 | |
4808 | updateValueMap(I, Reg: ResultReg); |
4809 | return true; |
4810 | } |
4811 | |
4812 | bool AArch64FastISel::selectBitCast(const Instruction *I) { |
4813 | MVT RetVT, SrcVT; |
4814 | |
4815 | if (!isTypeLegal(Ty: I->getOperand(i: 0)->getType(), VT&: SrcVT)) |
4816 | return false; |
4817 | if (!isTypeLegal(Ty: I->getType(), VT&: RetVT)) |
4818 | return false; |
4819 | |
4820 | unsigned Opc; |
4821 | if (RetVT == MVT::f32 && SrcVT == MVT::i32) |
4822 | Opc = AArch64::FMOVWSr; |
4823 | else if (RetVT == MVT::f64 && SrcVT == MVT::i64) |
4824 | Opc = AArch64::FMOVXDr; |
4825 | else if (RetVT == MVT::i32 && SrcVT == MVT::f32) |
4826 | Opc = AArch64::FMOVSWr; |
4827 | else if (RetVT == MVT::i64 && SrcVT == MVT::f64) |
4828 | Opc = AArch64::FMOVDXr; |
4829 | else |
4830 | return false; |
4831 | |
4832 | const TargetRegisterClass *RC = nullptr; |
4833 | switch (RetVT.SimpleTy) { |
4834 | default: llvm_unreachable("Unexpected value type." ); |
4835 | case MVT::i32: RC = &AArch64::GPR32RegClass; break; |
4836 | case MVT::i64: RC = &AArch64::GPR64RegClass; break; |
4837 | case MVT::f32: RC = &AArch64::FPR32RegClass; break; |
4838 | case MVT::f64: RC = &AArch64::FPR64RegClass; break; |
4839 | } |
4840 | Register Op0Reg = getRegForValue(V: I->getOperand(i: 0)); |
4841 | if (!Op0Reg) |
4842 | return false; |
4843 | |
4844 | Register ResultReg = fastEmitInst_r(MachineInstOpcode: Opc, RC, Op0: Op0Reg); |
4845 | if (!ResultReg) |
4846 | return false; |
4847 | |
4848 | updateValueMap(I, Reg: ResultReg); |
4849 | return true; |
4850 | } |
4851 | |
4852 | bool AArch64FastISel::selectFRem(const Instruction *I) { |
4853 | MVT RetVT; |
4854 | if (!isTypeLegal(Ty: I->getType(), VT&: RetVT)) |
4855 | return false; |
4856 | |
4857 | RTLIB::Libcall LC; |
4858 | switch (RetVT.SimpleTy) { |
4859 | default: |
4860 | return false; |
4861 | case MVT::f32: |
4862 | LC = RTLIB::REM_F32; |
4863 | break; |
4864 | case MVT::f64: |
4865 | LC = RTLIB::REM_F64; |
4866 | break; |
4867 | } |
4868 | |
4869 | ArgListTy Args; |
4870 | Args.reserve(n: I->getNumOperands()); |
4871 | |
4872 | // Populate the argument list. |
4873 | for (auto &Arg : I->operands()) { |
4874 | ArgListEntry Entry; |
4875 | Entry.Val = Arg; |
4876 | Entry.Ty = Arg->getType(); |
4877 | Args.push_back(x: Entry); |
4878 | } |
4879 | |
4880 | CallLoweringInfo CLI; |
4881 | MCContext &Ctx = MF->getContext(); |
4882 | CLI.setCallee(DL, Ctx, CC: TLI.getLibcallCallingConv(Call: LC), ResultTy: I->getType(), |
4883 | Target: TLI.getLibcallName(Call: LC), ArgsList: std::move(Args)); |
4884 | if (!lowerCallTo(CLI)) |
4885 | return false; |
4886 | updateValueMap(I, Reg: CLI.ResultReg); |
4887 | return true; |
4888 | } |
4889 | |
4890 | bool AArch64FastISel::selectSDiv(const Instruction *I) { |
4891 | MVT VT; |
4892 | if (!isTypeLegal(Ty: I->getType(), VT)) |
4893 | return false; |
4894 | |
4895 | if (!isa<ConstantInt>(Val: I->getOperand(i: 1))) |
4896 | return selectBinaryOp(I, ISDOpcode: ISD::SDIV); |
4897 | |
4898 | const APInt &C = cast<ConstantInt>(Val: I->getOperand(i: 1))->getValue(); |
4899 | if ((VT != MVT::i32 && VT != MVT::i64) || !C || |
4900 | !(C.isPowerOf2() || C.isNegatedPowerOf2())) |
4901 | return selectBinaryOp(I, ISDOpcode: ISD::SDIV); |
4902 | |
4903 | unsigned Lg2 = C.countr_zero(); |
4904 | Register Src0Reg = getRegForValue(V: I->getOperand(i: 0)); |
4905 | if (!Src0Reg) |
4906 | return false; |
4907 | |
4908 | if (cast<BinaryOperator>(Val: I)->isExact()) { |
4909 | Register ResultReg = emitASR_ri(RetVT: VT, SrcVT: VT, Op0: Src0Reg, Shift: Lg2); |
4910 | if (!ResultReg) |
4911 | return false; |
4912 | updateValueMap(I, Reg: ResultReg); |
4913 | return true; |
4914 | } |
4915 | |
4916 | int64_t Pow2MinusOne = (1ULL << Lg2) - 1; |
4917 | Register AddReg = emitAdd_ri_(VT, Op0: Src0Reg, Imm: Pow2MinusOne); |
4918 | if (!AddReg) |
4919 | return false; |
4920 | |
4921 | // (Src0 < 0) ? Pow2 - 1 : 0; |
4922 | if (!emitICmp_ri(RetVT: VT, LHSReg: Src0Reg, Imm: 0)) |
4923 | return false; |
4924 | |
4925 | unsigned SelectOpc; |
4926 | const TargetRegisterClass *RC; |
4927 | if (VT == MVT::i64) { |
4928 | SelectOpc = AArch64::CSELXr; |
4929 | RC = &AArch64::GPR64RegClass; |
4930 | } else { |
4931 | SelectOpc = AArch64::CSELWr; |
4932 | RC = &AArch64::GPR32RegClass; |
4933 | } |
4934 | Register SelectReg = fastEmitInst_rri(MachineInstOpcode: SelectOpc, RC, Op0: AddReg, Op1: Src0Reg, |
4935 | Imm: AArch64CC::LT); |
4936 | if (!SelectReg) |
4937 | return false; |
4938 | |
4939 | // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also |
4940 | // negate the result. |
4941 | Register ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; |
4942 | Register ResultReg; |
4943 | if (C.isNegative()) |
4944 | ResultReg = emitAddSub_rs(/*UseAdd=*/false, RetVT: VT, LHSReg: ZeroReg, RHSReg: SelectReg, |
4945 | ShiftType: AArch64_AM::ASR, ShiftImm: Lg2); |
4946 | else |
4947 | ResultReg = emitASR_ri(RetVT: VT, SrcVT: VT, Op0: SelectReg, Shift: Lg2); |
4948 | |
4949 | if (!ResultReg) |
4950 | return false; |
4951 | |
4952 | updateValueMap(I, Reg: ResultReg); |
4953 | return true; |
4954 | } |
4955 | |
4956 | /// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We |
4957 | /// have to duplicate it for AArch64, because otherwise we would fail during the |
4958 | /// sign-extend emission. |
4959 | Register AArch64FastISel::getRegForGEPIndex(const Value *Idx) { |
4960 | Register IdxN = getRegForValue(V: Idx); |
4961 | if (!IdxN) |
4962 | // Unhandled operand. Halt "fast" selection and bail. |
4963 | return Register(); |
4964 | |
4965 | // If the index is smaller or larger than intptr_t, truncate or extend it. |
4966 | MVT PtrVT = TLI.getPointerTy(DL); |
4967 | EVT IdxVT = EVT::getEVT(Ty: Idx->getType(), /*HandleUnknown=*/false); |
4968 | if (IdxVT.bitsLT(VT: PtrVT)) { |
4969 | IdxN = emitIntExt(SrcVT: IdxVT.getSimpleVT(), SrcReg: IdxN, DestVT: PtrVT, /*isZExt=*/IsZExt: false); |
4970 | } else if (IdxVT.bitsGT(VT: PtrVT)) |
4971 | llvm_unreachable("AArch64 FastISel doesn't support types larger than i64" ); |
4972 | return IdxN; |
4973 | } |
4974 | |
4975 | /// This is mostly a copy of the existing FastISel GEP code, but we have to |
4976 | /// duplicate it for AArch64, because otherwise we would bail out even for |
4977 | /// simple cases. This is because the standard fastEmit functions don't cover |
4978 | /// MUL at all and ADD is lowered very inefficientily. |
4979 | bool AArch64FastISel::selectGetElementPtr(const Instruction *I) { |
4980 | if (Subtarget->isTargetILP32()) |
4981 | return false; |
4982 | |
4983 | Register N = getRegForValue(V: I->getOperand(i: 0)); |
4984 | if (!N) |
4985 | return false; |
4986 | |
4987 | // Keep a running tab of the total offset to coalesce multiple N = N + Offset |
4988 | // into a single N = N + TotalOffset. |
4989 | uint64_t TotalOffs = 0; |
4990 | MVT VT = TLI.getPointerTy(DL); |
4991 | for (gep_type_iterator GTI = gep_type_begin(GEP: I), E = gep_type_end(GEP: I); |
4992 | GTI != E; ++GTI) { |
4993 | const Value *Idx = GTI.getOperand(); |
4994 | if (auto *StTy = GTI.getStructTypeOrNull()) { |
4995 | unsigned Field = cast<ConstantInt>(Val: Idx)->getZExtValue(); |
4996 | // N = N + Offset |
4997 | if (Field) |
4998 | TotalOffs += DL.getStructLayout(Ty: StTy)->getElementOffset(Idx: Field); |
4999 | } else { |
5000 | // If this is a constant subscript, handle it quickly. |
5001 | if (const auto *CI = dyn_cast<ConstantInt>(Val: Idx)) { |
5002 | if (CI->isZero()) |
5003 | continue; |
5004 | // N = N + Offset |
5005 | TotalOffs += GTI.getSequentialElementStride(DL) * |
5006 | cast<ConstantInt>(Val: CI)->getSExtValue(); |
5007 | continue; |
5008 | } |
5009 | if (TotalOffs) { |
5010 | N = emitAdd_ri_(VT, Op0: N, Imm: TotalOffs); |
5011 | if (!N) |
5012 | return false; |
5013 | TotalOffs = 0; |
5014 | } |
5015 | |
5016 | // N = N + Idx * ElementSize; |
5017 | uint64_t ElementSize = GTI.getSequentialElementStride(DL); |
5018 | Register IdxN = getRegForGEPIndex(Idx); |
5019 | if (!IdxN) |
5020 | return false; |
5021 | |
5022 | if (ElementSize != 1) { |
5023 | Register C = fastEmit_i(VT, RetVT: VT, Opcode: ISD::Constant, imm0: ElementSize); |
5024 | if (!C) |
5025 | return false; |
5026 | IdxN = emitMul_rr(RetVT: VT, Op0: IdxN, Op1: C); |
5027 | if (!IdxN) |
5028 | return false; |
5029 | } |
5030 | N = fastEmit_rr(VT, RetVT: VT, Opcode: ISD::ADD, Op0: N, Op1: IdxN); |
5031 | if (!N) |
5032 | return false; |
5033 | } |
5034 | } |
5035 | if (TotalOffs) { |
5036 | N = emitAdd_ri_(VT, Op0: N, Imm: TotalOffs); |
5037 | if (!N) |
5038 | return false; |
5039 | } |
5040 | updateValueMap(I, Reg: N); |
5041 | return true; |
5042 | } |
5043 | |
5044 | bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) { |
5045 | assert(TM.getOptLevel() == CodeGenOptLevel::None && |
5046 | "cmpxchg survived AtomicExpand at optlevel > -O0" ); |
5047 | |
5048 | auto *RetPairTy = cast<StructType>(Val: I->getType()); |
5049 | Type *RetTy = RetPairTy->getTypeAtIndex(N: 0U); |
5050 | assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) && |
5051 | "cmpxchg has a non-i1 status result" ); |
5052 | |
5053 | MVT VT; |
5054 | if (!isTypeLegal(Ty: RetTy, VT)) |
5055 | return false; |
5056 | |
5057 | const TargetRegisterClass *ResRC; |
5058 | unsigned Opc, CmpOpc; |
5059 | // This only supports i32/i64, because i8/i16 aren't legal, and the generic |
5060 | // extractvalue selection doesn't support that. |
5061 | if (VT == MVT::i32) { |
5062 | Opc = AArch64::CMP_SWAP_32; |
5063 | CmpOpc = AArch64::SUBSWrs; |
5064 | ResRC = &AArch64::GPR32RegClass; |
5065 | } else if (VT == MVT::i64) { |
5066 | Opc = AArch64::CMP_SWAP_64; |
5067 | CmpOpc = AArch64::SUBSXrs; |
5068 | ResRC = &AArch64::GPR64RegClass; |
5069 | } else { |
5070 | return false; |
5071 | } |
5072 | |
5073 | const MCInstrDesc &II = TII.get(Opcode: Opc); |
5074 | |
5075 | Register AddrReg = getRegForValue(V: I->getPointerOperand()); |
5076 | Register DesiredReg = getRegForValue(V: I->getCompareOperand()); |
5077 | Register NewReg = getRegForValue(V: I->getNewValOperand()); |
5078 | |
5079 | if (!AddrReg || !DesiredReg || !NewReg) |
5080 | return false; |
5081 | |
5082 | AddrReg = constrainOperandRegClass(II, Op: AddrReg, OpNum: II.getNumDefs()); |
5083 | DesiredReg = constrainOperandRegClass(II, Op: DesiredReg, OpNum: II.getNumDefs() + 1); |
5084 | NewReg = constrainOperandRegClass(II, Op: NewReg, OpNum: II.getNumDefs() + 2); |
5085 | |
5086 | const Register ResultReg1 = createResultReg(RC: ResRC); |
5087 | const Register ResultReg2 = createResultReg(RC: &AArch64::GPR32RegClass); |
5088 | const Register ScratchReg = createResultReg(RC: &AArch64::GPR32RegClass); |
5089 | |
5090 | // FIXME: MachineMemOperand doesn't support cmpxchg yet. |
5091 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: II) |
5092 | .addDef(RegNo: ResultReg1) |
5093 | .addDef(RegNo: ScratchReg) |
5094 | .addUse(RegNo: AddrReg) |
5095 | .addUse(RegNo: DesiredReg) |
5096 | .addUse(RegNo: NewReg); |
5097 | |
5098 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: CmpOpc)) |
5099 | .addDef(RegNo: VT == MVT::i32 ? AArch64::WZR : AArch64::XZR) |
5100 | .addUse(RegNo: ResultReg1) |
5101 | .addUse(RegNo: DesiredReg) |
5102 | .addImm(Val: 0); |
5103 | |
5104 | BuildMI(BB&: *FuncInfo.MBB, I: FuncInfo.InsertPt, MIMD, MCID: TII.get(Opcode: AArch64::CSINCWr)) |
5105 | .addDef(RegNo: ResultReg2) |
5106 | .addUse(RegNo: AArch64::WZR) |
5107 | .addUse(RegNo: AArch64::WZR) |
5108 | .addImm(Val: AArch64CC::NE); |
5109 | |
5110 | assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers." ); |
5111 | updateValueMap(I, Reg: ResultReg1, NumRegs: 2); |
5112 | return true; |
5113 | } |
5114 | |
5115 | bool AArch64FastISel::fastSelectInstruction(const Instruction *I) { |
5116 | if (TLI.fallBackToDAGISel(Inst: *I)) |
5117 | return false; |
5118 | switch (I->getOpcode()) { |
5119 | default: |
5120 | break; |
5121 | case Instruction::Add: |
5122 | case Instruction::Sub: |
5123 | return selectAddSub(I); |
5124 | case Instruction::Mul: |
5125 | return selectMul(I); |
5126 | case Instruction::SDiv: |
5127 | return selectSDiv(I); |
5128 | case Instruction::SRem: |
5129 | if (!selectBinaryOp(I, ISDOpcode: ISD::SREM)) |
5130 | return selectRem(I, ISDOpcode: ISD::SREM); |
5131 | return true; |
5132 | case Instruction::URem: |
5133 | if (!selectBinaryOp(I, ISDOpcode: ISD::UREM)) |
5134 | return selectRem(I, ISDOpcode: ISD::UREM); |
5135 | return true; |
5136 | case Instruction::Shl: |
5137 | case Instruction::LShr: |
5138 | case Instruction::AShr: |
5139 | return selectShift(I); |
5140 | case Instruction::And: |
5141 | case Instruction::Or: |
5142 | case Instruction::Xor: |
5143 | return selectLogicalOp(I); |
5144 | case Instruction::Br: |
5145 | return selectBranch(I); |
5146 | case Instruction::IndirectBr: |
5147 | return selectIndirectBr(I); |
5148 | case Instruction::BitCast: |
5149 | if (!FastISel::selectBitCast(I)) |
5150 | return selectBitCast(I); |
5151 | return true; |
5152 | case Instruction::FPToSI: |
5153 | if (!selectCast(I, Opcode: ISD::FP_TO_SINT)) |
5154 | return selectFPToInt(I, /*Signed=*/true); |
5155 | return true; |
5156 | case Instruction::FPToUI: |
5157 | return selectFPToInt(I, /*Signed=*/false); |
5158 | case Instruction::ZExt: |
5159 | case Instruction::SExt: |
5160 | return selectIntExt(I); |
5161 | case Instruction::Trunc: |
5162 | if (!selectCast(I, Opcode: ISD::TRUNCATE)) |
5163 | return selectTrunc(I); |
5164 | return true; |
5165 | case Instruction::FPExt: |
5166 | return selectFPExt(I); |
5167 | case Instruction::FPTrunc: |
5168 | return selectFPTrunc(I); |
5169 | case Instruction::SIToFP: |
5170 | if (!selectCast(I, Opcode: ISD::SINT_TO_FP)) |
5171 | return selectIntToFP(I, /*Signed=*/true); |
5172 | return true; |
5173 | case Instruction::UIToFP: |
5174 | return selectIntToFP(I, /*Signed=*/false); |
5175 | case Instruction::Load: |
5176 | return selectLoad(I); |
5177 | case Instruction::Store: |
5178 | return selectStore(I); |
5179 | case Instruction::FCmp: |
5180 | case Instruction::ICmp: |
5181 | return selectCmp(I); |
5182 | case Instruction::Select: |
5183 | return selectSelect(I); |
5184 | case Instruction::Ret: |
5185 | return selectRet(I); |
5186 | case Instruction::FRem: |
5187 | return selectFRem(I); |
5188 | case Instruction::GetElementPtr: |
5189 | return selectGetElementPtr(I); |
5190 | case Instruction::AtomicCmpXchg: |
5191 | return selectAtomicCmpXchg(I: cast<AtomicCmpXchgInst>(Val: I)); |
5192 | } |
5193 | |
5194 | // fall-back to target-independent instruction selection. |
5195 | return selectOperator(I, Opcode: I->getOpcode()); |
5196 | } |
5197 | |
5198 | FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo, |
5199 | const TargetLibraryInfo *LibInfo) { |
5200 | |
5201 | SMEAttrs CallerAttrs = |
5202 | FuncInfo.MF->getInfo<AArch64FunctionInfo>()->getSMEFnAttrs(); |
5203 | if (CallerAttrs.hasZAState() || CallerAttrs.hasZT0State() || |
5204 | CallerAttrs.hasStreamingInterfaceOrBody() || |
5205 | CallerAttrs.hasStreamingCompatibleInterface() || |
5206 | CallerAttrs.hasAgnosticZAInterface()) |
5207 | return nullptr; |
5208 | return new AArch64FastISel(FuncInfo, LibInfo); |
5209 | } |
5210 | |