1//===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the InstructionSelector class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
14#include "AArch64GlobalISelUtils.h"
15#include "AArch64InstrInfo.h"
16#include "AArch64MachineFunctionInfo.h"
17#include "AArch64RegisterBankInfo.h"
18#include "AArch64RegisterInfo.h"
19#include "AArch64Subtarget.h"
20#include "AArch64TargetMachine.h"
21#include "MCTargetDesc/AArch64AddressingModes.h"
22#include "MCTargetDesc/AArch64MCTargetDesc.h"
23#include "llvm/BinaryFormat/Dwarf.h"
24#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
25#include "llvm/CodeGen/GlobalISel/GISelValueTracking.h"
26#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
27#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
28#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
29#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
30#include "llvm/CodeGen/GlobalISel/Utils.h"
31#include "llvm/CodeGen/MachineBasicBlock.h"
32#include "llvm/CodeGen/MachineConstantPool.h"
33#include "llvm/CodeGen/MachineFrameInfo.h"
34#include "llvm/CodeGen/MachineFunction.h"
35#include "llvm/CodeGen/MachineInstr.h"
36#include "llvm/CodeGen/MachineInstrBuilder.h"
37#include "llvm/CodeGen/MachineMemOperand.h"
38#include "llvm/CodeGen/MachineOperand.h"
39#include "llvm/CodeGen/MachineRegisterInfo.h"
40#include "llvm/CodeGen/TargetOpcodes.h"
41#include "llvm/CodeGen/TargetRegisterInfo.h"
42#include "llvm/IR/Constants.h"
43#include "llvm/IR/DerivedTypes.h"
44#include "llvm/IR/Instructions.h"
45#include "llvm/IR/IntrinsicsAArch64.h"
46#include "llvm/IR/Type.h"
47#include "llvm/Pass.h"
48#include "llvm/Support/Debug.h"
49#include "llvm/Support/raw_ostream.h"
50#include <optional>
51
52#define DEBUG_TYPE "aarch64-isel"
53
54using namespace llvm;
55using namespace MIPatternMatch;
56using namespace AArch64GISelUtils;
57
58namespace llvm {
59class BlockFrequencyInfo;
60class ProfileSummaryInfo;
61}
62
63namespace {
64
65#define GET_GLOBALISEL_PREDICATE_BITSET
66#include "AArch64GenGlobalISel.inc"
67#undef GET_GLOBALISEL_PREDICATE_BITSET
68
69
70class AArch64InstructionSelector : public InstructionSelector {
71public:
72 AArch64InstructionSelector(const AArch64TargetMachine &TM,
73 const AArch64Subtarget &STI,
74 const AArch64RegisterBankInfo &RBI);
75
76 bool select(MachineInstr &I) override;
77 static const char *getName() { return DEBUG_TYPE; }
78
79 void setupMF(MachineFunction &MF, GISelValueTracking *VT,
80 CodeGenCoverage *CoverageInfo, ProfileSummaryInfo *PSI,
81 BlockFrequencyInfo *BFI) override {
82 InstructionSelector::setupMF(mf&: MF, vt: VT, covinfo: CoverageInfo, psi: PSI, bfi: BFI);
83 MIB.setMF(MF);
84
85 // hasFnAttribute() is expensive to call on every BRCOND selection, so
86 // cache it here for each run of the selector.
87 ProduceNonFlagSettingCondBr =
88 !MF.getFunction().hasFnAttribute(Kind: Attribute::SpeculativeLoadHardening);
89 MFReturnAddr = Register();
90
91 processPHIs(MF);
92 }
93
94private:
95 /// tblgen-erated 'select' implementation, used as the initial selector for
96 /// the patterns that don't require complex C++.
97 bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
98
99 // A lowering phase that runs before any selection attempts.
100 // Returns true if the instruction was modified.
101 bool preISelLower(MachineInstr &I);
102
103 // An early selection function that runs before the selectImpl() call.
104 bool earlySelect(MachineInstr &I);
105
106 /// Save state that is shared between select calls, call select on \p I and
107 /// then restore the saved state. This can be used to recursively call select
108 /// within a select call.
109 bool selectAndRestoreState(MachineInstr &I);
110
111 // Do some preprocessing of G_PHIs before we begin selection.
112 void processPHIs(MachineFunction &MF);
113
114 bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI);
115
116 /// Eliminate same-sized cross-bank copies into stores before selectImpl().
117 bool contractCrossBankCopyIntoStore(MachineInstr &I,
118 MachineRegisterInfo &MRI);
119
120 bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI);
121
122 bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
123 MachineRegisterInfo &MRI) const;
124 bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
125 MachineRegisterInfo &MRI) const;
126
127 ///@{
128 /// Helper functions for selectCompareBranch.
129 bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp,
130 MachineIRBuilder &MIB) const;
131 bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
132 MachineIRBuilder &MIB) const;
133 bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
134 MachineIRBuilder &MIB) const;
135 bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert,
136 MachineBasicBlock *DstMBB,
137 MachineIRBuilder &MIB) const;
138 ///@}
139
140 bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
141 MachineRegisterInfo &MRI);
142
143 bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI);
144 bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI);
145
146 // Helper to generate an equivalent of scalar_to_vector into a new register,
147 // returned via 'Dst'.
148 MachineInstr *emitScalarToVector(unsigned EltSize,
149 const TargetRegisterClass *DstRC,
150 Register Scalar,
151 MachineIRBuilder &MIRBuilder) const;
152 /// Helper to narrow vector that was widened by emitScalarToVector.
153 /// Copy lowest part of 128-bit or 64-bit vector to 64-bit or 32-bit
154 /// vector, correspondingly.
155 MachineInstr *emitNarrowVector(Register DstReg, Register SrcReg,
156 MachineIRBuilder &MIRBuilder,
157 MachineRegisterInfo &MRI) const;
158
159 /// Emit a lane insert into \p DstReg, or a new vector register if
160 /// std::nullopt is provided.
161 ///
162 /// The lane inserted into is defined by \p LaneIdx. The vector source
163 /// register is given by \p SrcReg. The register containing the element is
164 /// given by \p EltReg.
165 MachineInstr *emitLaneInsert(std::optional<Register> DstReg, Register SrcReg,
166 Register EltReg, unsigned LaneIdx,
167 const RegisterBank &RB,
168 MachineIRBuilder &MIRBuilder) const;
169
170 /// Emit a sequence of instructions representing a constant \p CV for a
171 /// vector register \p Dst. (E.g. a MOV, or a load from a constant pool.)
172 ///
173 /// \returns the last instruction in the sequence on success, and nullptr
174 /// otherwise.
175 MachineInstr *emitConstantVector(Register Dst, Constant *CV,
176 MachineIRBuilder &MIRBuilder,
177 MachineRegisterInfo &MRI);
178
179 MachineInstr *tryAdvSIMDModImm8(Register Dst, unsigned DstSize, APInt Bits,
180 MachineIRBuilder &MIRBuilder);
181
182 MachineInstr *tryAdvSIMDModImm16(Register Dst, unsigned DstSize, APInt Bits,
183 MachineIRBuilder &MIRBuilder, bool Inv);
184
185 MachineInstr *tryAdvSIMDModImm32(Register Dst, unsigned DstSize, APInt Bits,
186 MachineIRBuilder &MIRBuilder, bool Inv);
187 MachineInstr *tryAdvSIMDModImm64(Register Dst, unsigned DstSize, APInt Bits,
188 MachineIRBuilder &MIRBuilder);
189 MachineInstr *tryAdvSIMDModImm321s(Register Dst, unsigned DstSize, APInt Bits,
190 MachineIRBuilder &MIRBuilder, bool Inv);
191 MachineInstr *tryAdvSIMDModImmFP(Register Dst, unsigned DstSize, APInt Bits,
192 MachineIRBuilder &MIRBuilder);
193
194 bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
195 MachineRegisterInfo &MRI);
196 /// \returns true if a G_BUILD_VECTOR instruction \p MI can be selected as a
197 /// SUBREG_TO_REG.
198 bool tryOptBuildVecToSubregToReg(MachineInstr &MI, MachineRegisterInfo &MRI);
199 bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI);
200 bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI);
201 bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI);
202
203 bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI);
204 bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI);
205 bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI);
206 bool selectSplitVectorUnmerge(MachineInstr &I, MachineRegisterInfo &MRI);
207
208 /// Helper function to select vector load intrinsics like
209 /// @llvm.aarch64.neon.ld2.*, @llvm.aarch64.neon.ld4.*, etc.
210 /// \p Opc is the opcode that the selected instruction should use.
211 /// \p NumVecs is the number of vector destinations for the instruction.
212 /// \p I is the original G_INTRINSIC_W_SIDE_EFFECTS instruction.
213 bool selectVectorLoadIntrinsic(unsigned Opc, unsigned NumVecs,
214 MachineInstr &I);
215 bool selectVectorLoadLaneIntrinsic(unsigned Opc, unsigned NumVecs,
216 MachineInstr &I);
217 void selectVectorStoreIntrinsic(MachineInstr &I, unsigned NumVecs,
218 unsigned Opc);
219 bool selectVectorStoreLaneIntrinsic(MachineInstr &I, unsigned NumVecs,
220 unsigned Opc);
221 bool selectIntrinsicWithSideEffects(MachineInstr &I,
222 MachineRegisterInfo &MRI);
223 bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI);
224 bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI);
225 bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI);
226 bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI);
227 bool selectPtrAuthGlobalValue(MachineInstr &I,
228 MachineRegisterInfo &MRI) const;
229 bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI);
230 bool selectMOPS(MachineInstr &I, MachineRegisterInfo &MRI);
231 bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI);
232 void SelectTable(MachineInstr &I, MachineRegisterInfo &MRI, unsigned NumVecs,
233 unsigned Opc1, unsigned Opc2, bool isExt);
234
235 bool selectIndexedExtLoad(MachineInstr &I, MachineRegisterInfo &MRI);
236 bool selectIndexedLoad(MachineInstr &I, MachineRegisterInfo &MRI);
237 bool selectIndexedStore(GIndexedStore &I, MachineRegisterInfo &MRI);
238
239 unsigned emitConstantPoolEntry(const Constant *CPVal,
240 MachineFunction &MF) const;
241 MachineInstr *emitLoadFromConstantPool(const Constant *CPVal,
242 MachineIRBuilder &MIRBuilder) const;
243
244 // Emit a vector concat operation.
245 MachineInstr *emitVectorConcat(std::optional<Register> Dst, Register Op1,
246 Register Op2,
247 MachineIRBuilder &MIRBuilder) const;
248
249 // Emit an integer compare between LHS and RHS, which checks for Predicate.
250 MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
251 MachineOperand &Predicate,
252 MachineIRBuilder &MIRBuilder) const;
253
254 /// Emit a floating point comparison between \p LHS and \p RHS.
255 /// \p Pred if given is the intended predicate to use.
256 MachineInstr *
257 emitFPCompare(Register LHS, Register RHS, MachineIRBuilder &MIRBuilder,
258 std::optional<CmpInst::Predicate> = std::nullopt) const;
259
260 MachineInstr *
261 emitInstr(unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
262 std::initializer_list<llvm::SrcOp> SrcOps,
263 MachineIRBuilder &MIRBuilder,
264 const ComplexRendererFns &RenderFns = std::nullopt) const;
265 /// Helper function to emit an add or sub instruction.
266 ///
267 /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above
268 /// in a specific order.
269 ///
270 /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode.
271 ///
272 /// \code
273 /// const std::array<std::array<unsigned, 2>, 4> Table {
274 /// {{AArch64::ADDXri, AArch64::ADDWri},
275 /// {AArch64::ADDXrs, AArch64::ADDWrs},
276 /// {AArch64::ADDXrr, AArch64::ADDWrr},
277 /// {AArch64::SUBXri, AArch64::SUBWri},
278 /// {AArch64::ADDXrx, AArch64::ADDWrx}}};
279 /// \endcode
280 ///
281 /// Each row in the table corresponds to a different addressing mode. Each
282 /// column corresponds to a different register size.
283 ///
284 /// \attention Rows must be structured as follows:
285 /// - Row 0: The ri opcode variants
286 /// - Row 1: The rs opcode variants
287 /// - Row 2: The rr opcode variants
288 /// - Row 3: The ri opcode variants for negative immediates
289 /// - Row 4: The rx opcode variants
290 ///
291 /// \attention Columns must be structured as follows:
292 /// - Column 0: The 64-bit opcode variants
293 /// - Column 1: The 32-bit opcode variants
294 ///
295 /// \p Dst is the destination register of the binop to emit.
296 /// \p LHS is the left-hand operand of the binop to emit.
297 /// \p RHS is the right-hand operand of the binop to emit.
298 MachineInstr *emitAddSub(
299 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
300 Register Dst, MachineOperand &LHS, MachineOperand &RHS,
301 MachineIRBuilder &MIRBuilder) const;
302 MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,
303 MachineOperand &RHS,
304 MachineIRBuilder &MIRBuilder) const;
305 MachineInstr *emitADDS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
306 MachineIRBuilder &MIRBuilder) const;
307 MachineInstr *emitSUBS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
308 MachineIRBuilder &MIRBuilder) const;
309 MachineInstr *emitADCS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
310 MachineIRBuilder &MIRBuilder) const;
311 MachineInstr *emitSBCS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
312 MachineIRBuilder &MIRBuilder) const;
313 MachineInstr *emitCMP(MachineOperand &LHS, MachineOperand &RHS,
314 MachineIRBuilder &MIRBuilder) const;
315 MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS,
316 MachineIRBuilder &MIRBuilder) const;
317 MachineInstr *emitTST(MachineOperand &LHS, MachineOperand &RHS,
318 MachineIRBuilder &MIRBuilder) const;
319 MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS,
320 AArch64CC::CondCode CC,
321 MachineIRBuilder &MIRBuilder) const;
322 MachineInstr *emitExtractVectorElt(std::optional<Register> DstReg,
323 const RegisterBank &DstRB, LLT ScalarTy,
324 Register VecReg, unsigned LaneIdx,
325 MachineIRBuilder &MIRBuilder) const;
326 MachineInstr *emitCSINC(Register Dst, Register Src1, Register Src2,
327 AArch64CC::CondCode Pred,
328 MachineIRBuilder &MIRBuilder) const;
329 /// Emit a CSet for a FP compare.
330 ///
331 /// \p Dst is expected to be a 32-bit scalar register.
332 MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred,
333 MachineIRBuilder &MIRBuilder) const;
334
335 /// Emit an instruction that sets NZCV to the carry-in expected by \p I.
336 /// Might elide the instruction if the previous instruction already sets NZCV
337 /// correctly.
338 MachineInstr *emitCarryIn(MachineInstr &I, Register CarryReg);
339
340 /// Emit the overflow op for \p Opcode.
341 ///
342 /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO,
343 /// G_USUBO, etc.
344 std::pair<MachineInstr *, AArch64CC::CondCode>
345 emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS,
346 MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const;
347
348 bool selectOverflowOp(MachineInstr &I, MachineRegisterInfo &MRI);
349
350 /// Emit expression as a conjunction (a series of CCMP/CFCMP ops).
351 /// In some cases this is even possible with OR operations in the expression.
352 MachineInstr *emitConjunction(Register Val, AArch64CC::CondCode &OutCC,
353 MachineIRBuilder &MIB) const;
354 MachineInstr *emitConditionalComparison(Register LHS, Register RHS,
355 CmpInst::Predicate CC,
356 AArch64CC::CondCode Predicate,
357 AArch64CC::CondCode OutCC,
358 MachineIRBuilder &MIB) const;
359 MachineInstr *emitConjunctionRec(Register Val, AArch64CC::CondCode &OutCC,
360 bool Negate, Register CCOp,
361 AArch64CC::CondCode Predicate,
362 MachineIRBuilder &MIB) const;
363
364 /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.
365 /// \p IsNegative is true if the test should be "not zero".
366 /// This will also optimize the test bit instruction when possible.
367 MachineInstr *emitTestBit(Register TestReg, uint64_t Bit, bool IsNegative,
368 MachineBasicBlock *DstMBB,
369 MachineIRBuilder &MIB) const;
370
371 /// Emit a CB(N)Z instruction which branches to \p DestMBB.
372 MachineInstr *emitCBZ(Register CompareReg, bool IsNegative,
373 MachineBasicBlock *DestMBB,
374 MachineIRBuilder &MIB) const;
375
376 // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
377 // We use these manually instead of using the importer since it doesn't
378 // support SDNodeXForm.
379 ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
380 ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
381 ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
382 ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
383
384 ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const;
385 ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
386 ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const;
387
388 ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
389 unsigned Size) const;
390
391 ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
392 return selectAddrModeUnscaled(Root, Size: 1);
393 }
394 ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
395 return selectAddrModeUnscaled(Root, Size: 2);
396 }
397 ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
398 return selectAddrModeUnscaled(Root, Size: 4);
399 }
400 ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
401 return selectAddrModeUnscaled(Root, Size: 8);
402 }
403 ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
404 return selectAddrModeUnscaled(Root, Size: 16);
405 }
406
407 /// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used
408 /// from complex pattern matchers like selectAddrModeIndexed().
409 ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size,
410 MachineRegisterInfo &MRI) const;
411
412 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
413 unsigned Size) const;
414 template <int Width>
415 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
416 return selectAddrModeIndexed(Root, Size: Width / 8);
417 }
418
419 std::optional<bool>
420 isWorthFoldingIntoAddrMode(const MachineInstr &MI,
421 const MachineRegisterInfo &MRI) const;
422
423 bool isWorthFoldingIntoExtendedReg(const MachineInstr &MI,
424 const MachineRegisterInfo &MRI,
425 bool IsAddrOperand) const;
426 ComplexRendererFns
427 selectAddrModeShiftedExtendXReg(MachineOperand &Root,
428 unsigned SizeInBytes) const;
429
430 /// Returns a \p ComplexRendererFns which contains a base, offset, and whether
431 /// or not a shift + extend should be folded into an addressing mode. Returns
432 /// None when this is not profitable or possible.
433 ComplexRendererFns
434 selectExtendedSHL(MachineOperand &Root, MachineOperand &Base,
435 MachineOperand &Offset, unsigned SizeInBytes,
436 bool WantsExt) const;
437 ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
438 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
439 unsigned SizeInBytes) const;
440 template <int Width>
441 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const {
442 return selectAddrModeXRO(Root, SizeInBytes: Width / 8);
443 }
444
445 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root,
446 unsigned SizeInBytes) const;
447 template <int Width>
448 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const {
449 return selectAddrModeWRO(Root, SizeInBytes: Width / 8);
450 }
451
452 ComplexRendererFns selectShiftedRegister(MachineOperand &Root,
453 bool AllowROR = false) const;
454
455 ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
456 return selectShiftedRegister(Root);
457 }
458
459 ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
460 return selectShiftedRegister(Root, AllowROR: true);
461 }
462
463 /// Given an extend instruction, determine the correct shift-extend type for
464 /// that instruction.
465 ///
466 /// If the instruction is going to be used in a load or store, pass
467 /// \p IsLoadStore = true.
468 AArch64_AM::ShiftExtendType
469 getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI,
470 bool IsLoadStore = false) const;
471
472 /// Move \p Reg to \p RC if \p Reg is not already on \p RC.
473 ///
474 /// \returns Either \p Reg if no change was necessary, or the new register
475 /// created by moving \p Reg.
476 ///
477 /// Note: This uses emitCopy right now.
478 Register moveScalarRegClass(Register Reg, const TargetRegisterClass &RC,
479 MachineIRBuilder &MIB) const;
480
481 ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
482
483 ComplexRendererFns selectExtractHigh(MachineOperand &Root) const;
484
485 ComplexRendererFns selectCVTFixedPointVec(MachineOperand &Root) const;
486 ComplexRendererFns
487 selectCVTFixedPointVecBase(const MachineOperand &Root) const;
488 void renderFixedPointXForm(MachineInstrBuilder &MIB, const MachineInstr &MI,
489 int OpIdx = -1) const;
490
491 void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
492 int OpIdx = -1) const;
493 void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I,
494 int OpIdx = -1) const;
495 void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I,
496 int OpIdx = -1) const;
497 void renderUbsanTrap(MachineInstrBuilder &MIB, const MachineInstr &MI,
498 int OpIdx) const;
499 void renderFPImm16(MachineInstrBuilder &MIB, const MachineInstr &MI,
500 int OpIdx = -1) const;
501 void renderFPImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
502 int OpIdx = -1) const;
503 void renderFPImm64(MachineInstrBuilder &MIB, const MachineInstr &MI,
504 int OpIdx = -1) const;
505 void renderFPImm32SIMDModImmType4(MachineInstrBuilder &MIB,
506 const MachineInstr &MI,
507 int OpIdx = -1) const;
508
509 // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
510 void materializeLargeCMVal(MachineInstr &I, const Value *V, unsigned OpFlags);
511
512 // Optimization methods.
513 bool tryOptSelect(GSelect &Sel);
514 bool tryOptSelectConjunction(GSelect &Sel, MachineInstr &CondMI);
515 MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
516 MachineOperand &Predicate,
517 MachineIRBuilder &MIRBuilder) const;
518
519 /// Return true if \p MI is a load or store of \p NumBytes bytes.
520 bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
521
522 /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit
523 /// register zeroed out. In other words, the result of MI has been explicitly
524 /// zero extended.
525 bool isDef32(const MachineInstr &MI) const;
526
527 const AArch64TargetMachine &TM;
528 const AArch64Subtarget &STI;
529 const AArch64InstrInfo &TII;
530 const AArch64RegisterInfo &TRI;
531 const AArch64RegisterBankInfo &RBI;
532
533 bool ProduceNonFlagSettingCondBr = false;
534
535 // Some cached values used during selection.
536 // We use LR as a live-in register, and we keep track of it here as it can be
537 // clobbered by calls.
538 Register MFReturnAddr;
539
540 MachineIRBuilder MIB;
541
542#define GET_GLOBALISEL_PREDICATES_DECL
543#include "AArch64GenGlobalISel.inc"
544#undef GET_GLOBALISEL_PREDICATES_DECL
545
546// We declare the temporaries used by selectImpl() in the class to minimize the
547// cost of constructing placeholder values.
548#define GET_GLOBALISEL_TEMPORARIES_DECL
549#include "AArch64GenGlobalISel.inc"
550#undef GET_GLOBALISEL_TEMPORARIES_DECL
551};
552
553} // end anonymous namespace
554
555#define GET_GLOBALISEL_IMPL
556#include "AArch64GenGlobalISel.inc"
557#undef GET_GLOBALISEL_IMPL
558
559AArch64InstructionSelector::AArch64InstructionSelector(
560 const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
561 const AArch64RegisterBankInfo &RBI)
562 : TM(TM), STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()),
563 RBI(RBI),
564#define GET_GLOBALISEL_PREDICATES_INIT
565#include "AArch64GenGlobalISel.inc"
566#undef GET_GLOBALISEL_PREDICATES_INIT
567#define GET_GLOBALISEL_TEMPORARIES_INIT
568#include "AArch64GenGlobalISel.inc"
569#undef GET_GLOBALISEL_TEMPORARIES_INIT
570{
571}
572
573// FIXME: This should be target-independent, inferred from the types declared
574// for each class in the bank.
575//
576/// Given a register bank, and a type, return the smallest register class that
577/// can represent that combination.
578static const TargetRegisterClass *
579getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
580 bool GetAllRegSet = false) {
581 if (RB.getID() == AArch64::GPRRegBankID) {
582 if (Ty.getSizeInBits() <= 32)
583 return GetAllRegSet ? &AArch64::GPR32allRegClass
584 : &AArch64::GPR32RegClass;
585 if (Ty.getSizeInBits() == 64)
586 return GetAllRegSet ? &AArch64::GPR64allRegClass
587 : &AArch64::GPR64RegClass;
588 if (Ty.getSizeInBits() == 128)
589 return &AArch64::XSeqPairsClassRegClass;
590 return nullptr;
591 }
592
593 if (RB.getID() == AArch64::FPRRegBankID) {
594 switch (Ty.getSizeInBits()) {
595 case 8:
596 return &AArch64::FPR8RegClass;
597 case 16:
598 return &AArch64::FPR16RegClass;
599 case 32:
600 return &AArch64::FPR32RegClass;
601 case 64:
602 return &AArch64::FPR64RegClass;
603 case 128:
604 return &AArch64::FPR128RegClass;
605 }
606 return nullptr;
607 }
608
609 return nullptr;
610}
611
612/// Given a register bank, and size in bits, return the smallest register class
613/// that can represent that combination.
614static const TargetRegisterClass *
615getMinClassForRegBank(const RegisterBank &RB, TypeSize SizeInBits,
616 bool GetAllRegSet = false) {
617 if (SizeInBits.isScalable()) {
618 assert(RB.getID() == AArch64::FPRRegBankID &&
619 "Expected FPR regbank for scalable type size");
620 return &AArch64::ZPRRegClass;
621 }
622
623 unsigned RegBankID = RB.getID();
624
625 if (RegBankID == AArch64::GPRRegBankID) {
626 assert(!SizeInBits.isScalable() && "Unexpected scalable register size");
627 if (SizeInBits <= 32)
628 return GetAllRegSet ? &AArch64::GPR32allRegClass
629 : &AArch64::GPR32RegClass;
630 if (SizeInBits == 64)
631 return GetAllRegSet ? &AArch64::GPR64allRegClass
632 : &AArch64::GPR64RegClass;
633 if (SizeInBits == 128)
634 return &AArch64::XSeqPairsClassRegClass;
635 }
636
637 if (RegBankID == AArch64::FPRRegBankID) {
638 if (SizeInBits.isScalable()) {
639 assert(SizeInBits == TypeSize::getScalable(128) &&
640 "Unexpected scalable register size");
641 return &AArch64::ZPRRegClass;
642 }
643
644 switch (SizeInBits) {
645 default:
646 return nullptr;
647 case 8:
648 return &AArch64::FPR8RegClass;
649 case 16:
650 return &AArch64::FPR16RegClass;
651 case 32:
652 return &AArch64::FPR32RegClass;
653 case 64:
654 return &AArch64::FPR64RegClass;
655 case 128:
656 return &AArch64::FPR128RegClass;
657 }
658 }
659
660 return nullptr;
661}
662
663/// Returns the correct subregister to use for a given register class.
664static bool getSubRegForClass(const TargetRegisterClass *RC,
665 const TargetRegisterInfo &TRI, unsigned &SubReg) {
666 switch (TRI.getRegSizeInBits(RC: *RC)) {
667 case 8:
668 SubReg = AArch64::bsub;
669 break;
670 case 16:
671 SubReg = AArch64::hsub;
672 break;
673 case 32:
674 if (RC != &AArch64::FPR32RegClass)
675 SubReg = AArch64::sub_32;
676 else
677 SubReg = AArch64::ssub;
678 break;
679 case 64:
680 SubReg = AArch64::dsub;
681 break;
682 default:
683 LLVM_DEBUG(
684 dbgs() << "Couldn't find appropriate subregister for register class.");
685 return false;
686 }
687
688 return true;
689}
690
691/// Returns the minimum size the given register bank can hold.
692static unsigned getMinSizeForRegBank(const RegisterBank &RB) {
693 switch (RB.getID()) {
694 case AArch64::GPRRegBankID:
695 return 32;
696 case AArch64::FPRRegBankID:
697 return 8;
698 default:
699 llvm_unreachable("Tried to get minimum size for unknown register bank.");
700 }
701}
702
703/// Create a REG_SEQUENCE instruction using the registers in \p Regs.
704/// Helper function for functions like createDTuple and createQTuple.
705///
706/// \p RegClassIDs - The list of register class IDs available for some tuple of
707/// a scalar class. E.g. QQRegClassID, QQQRegClassID, QQQQRegClassID. This is
708/// expected to contain between 2 and 4 tuple classes.
709///
710/// \p SubRegs - The list of subregister classes associated with each register
711/// class ID in \p RegClassIDs. E.g., QQRegClassID should use the qsub0
712/// subregister class. The index of each subregister class is expected to
713/// correspond with the index of each register class.
714///
715/// \returns Either the destination register of REG_SEQUENCE instruction that
716/// was created, or the 0th element of \p Regs if \p Regs contains a single
717/// element.
718static Register createTuple(ArrayRef<Register> Regs,
719 const unsigned RegClassIDs[],
720 const unsigned SubRegs[], MachineIRBuilder &MIB) {
721 unsigned NumRegs = Regs.size();
722 if (NumRegs == 1)
723 return Regs[0];
724 assert(NumRegs >= 2 && NumRegs <= 4 &&
725 "Only support between two and 4 registers in a tuple!");
726 const TargetRegisterInfo *TRI = MIB.getMF().getSubtarget().getRegisterInfo();
727 auto *DesiredClass = TRI->getRegClass(i: RegClassIDs[NumRegs - 2]);
728 auto RegSequence =
729 MIB.buildInstr(Opc: TargetOpcode::REG_SEQUENCE, DstOps: {DesiredClass}, SrcOps: {});
730 for (unsigned I = 0, E = Regs.size(); I < E; ++I) {
731 RegSequence.addUse(RegNo: Regs[I]);
732 RegSequence.addImm(Val: SubRegs[I]);
733 }
734 return RegSequence.getReg(Idx: 0);
735}
736
737/// Create a tuple of D-registers using the registers in \p Regs.
738static Register createDTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) {
739 static const unsigned RegClassIDs[] = {
740 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
741 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
742 AArch64::dsub2, AArch64::dsub3};
743 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
744}
745
746/// Create a tuple of Q-registers using the registers in \p Regs.
747static Register createQTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) {
748 static const unsigned RegClassIDs[] = {
749 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
750 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
751 AArch64::qsub2, AArch64::qsub3};
752 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
753}
754
755static std::optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
756 auto &MI = *Root.getParent();
757 auto &MBB = *MI.getParent();
758 auto &MF = *MBB.getParent();
759 auto &MRI = MF.getRegInfo();
760 uint64_t Immed;
761 if (Root.isImm())
762 Immed = Root.getImm();
763 else if (Root.isCImm())
764 Immed = Root.getCImm()->getZExtValue();
765 else if (Root.isReg()) {
766 auto ValAndVReg =
767 getIConstantVRegValWithLookThrough(VReg: Root.getReg(), MRI, LookThroughInstrs: true);
768 if (!ValAndVReg)
769 return std::nullopt;
770 Immed = ValAndVReg->Value.getSExtValue();
771 } else
772 return std::nullopt;
773 return Immed;
774}
775
776/// Check whether \p I is a currently unsupported binary operation:
777/// - it has an unsized type
778/// - an operand is not a vreg
779/// - all operands are not in the same bank
780/// These are checks that should someday live in the verifier, but right now,
781/// these are mostly limitations of the aarch64 selector.
782static bool unsupportedBinOp(const MachineInstr &I,
783 const AArch64RegisterBankInfo &RBI,
784 const MachineRegisterInfo &MRI,
785 const AArch64RegisterInfo &TRI) {
786 LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg());
787 if (!Ty.isValid()) {
788 LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n");
789 return true;
790 }
791
792 const RegisterBank *PrevOpBank = nullptr;
793 for (auto &MO : I.operands()) {
794 // FIXME: Support non-register operands.
795 if (!MO.isReg()) {
796 LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n");
797 return true;
798 }
799
800 // FIXME: Can generic operations have physical registers operands? If
801 // so, this will need to be taught about that, and we'll need to get the
802 // bank out of the minimal class for the register.
803 // Either way, this needs to be documented (and possibly verified).
804 if (!MO.getReg().isVirtual()) {
805 LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n");
806 return true;
807 }
808
809 const RegisterBank *OpBank = RBI.getRegBank(Reg: MO.getReg(), MRI, TRI);
810 if (!OpBank) {
811 LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n");
812 return true;
813 }
814
815 if (PrevOpBank && OpBank != PrevOpBank) {
816 LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n");
817 return true;
818 }
819 PrevOpBank = OpBank;
820 }
821 return false;
822}
823
824/// Select the AArch64 opcode for the basic binary operation \p GenericOpc
825/// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
826/// and of size \p OpSize.
827/// \returns \p GenericOpc if the combination is unsupported.
828static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
829 unsigned OpSize) {
830 switch (RegBankID) {
831 case AArch64::GPRRegBankID:
832 if (OpSize == 32) {
833 switch (GenericOpc) {
834 case TargetOpcode::G_SHL:
835 return AArch64::LSLVWr;
836 case TargetOpcode::G_LSHR:
837 return AArch64::LSRVWr;
838 case TargetOpcode::G_ASHR:
839 return AArch64::ASRVWr;
840 default:
841 return GenericOpc;
842 }
843 } else if (OpSize == 64) {
844 switch (GenericOpc) {
845 case TargetOpcode::G_PTR_ADD:
846 return AArch64::ADDXrr;
847 case TargetOpcode::G_SHL:
848 return AArch64::LSLVXr;
849 case TargetOpcode::G_LSHR:
850 return AArch64::LSRVXr;
851 case TargetOpcode::G_ASHR:
852 return AArch64::ASRVXr;
853 default:
854 return GenericOpc;
855 }
856 }
857 break;
858 case AArch64::FPRRegBankID:
859 switch (OpSize) {
860 case 32:
861 switch (GenericOpc) {
862 case TargetOpcode::G_FADD:
863 return AArch64::FADDSrr;
864 case TargetOpcode::G_FSUB:
865 return AArch64::FSUBSrr;
866 case TargetOpcode::G_FMUL:
867 return AArch64::FMULSrr;
868 case TargetOpcode::G_FDIV:
869 return AArch64::FDIVSrr;
870 default:
871 return GenericOpc;
872 }
873 case 64:
874 switch (GenericOpc) {
875 case TargetOpcode::G_FADD:
876 return AArch64::FADDDrr;
877 case TargetOpcode::G_FSUB:
878 return AArch64::FSUBDrr;
879 case TargetOpcode::G_FMUL:
880 return AArch64::FMULDrr;
881 case TargetOpcode::G_FDIV:
882 return AArch64::FDIVDrr;
883 case TargetOpcode::G_OR:
884 return AArch64::ORRv8i8;
885 default:
886 return GenericOpc;
887 }
888 }
889 break;
890 }
891 return GenericOpc;
892}
893
894/// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
895/// appropriate for the (value) register bank \p RegBankID and of memory access
896/// size \p OpSize. This returns the variant with the base+unsigned-immediate
897/// addressing mode (e.g., LDRXui).
898/// \returns \p GenericOpc if the combination is unsupported.
899static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
900 unsigned OpSize) {
901 const bool isStore = GenericOpc == TargetOpcode::G_STORE;
902 switch (RegBankID) {
903 case AArch64::GPRRegBankID:
904 switch (OpSize) {
905 case 8:
906 return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
907 case 16:
908 return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
909 case 32:
910 return isStore ? AArch64::STRWui : AArch64::LDRWui;
911 case 64:
912 return isStore ? AArch64::STRXui : AArch64::LDRXui;
913 }
914 break;
915 case AArch64::FPRRegBankID:
916 switch (OpSize) {
917 case 8:
918 return isStore ? AArch64::STRBui : AArch64::LDRBui;
919 case 16:
920 return isStore ? AArch64::STRHui : AArch64::LDRHui;
921 case 32:
922 return isStore ? AArch64::STRSui : AArch64::LDRSui;
923 case 64:
924 return isStore ? AArch64::STRDui : AArch64::LDRDui;
925 case 128:
926 return isStore ? AArch64::STRQui : AArch64::LDRQui;
927 }
928 break;
929 }
930 return GenericOpc;
931}
932
933/// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg
934/// to \p *To.
935///
936/// E.g "To = COPY SrcReg:SubReg"
937static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI,
938 const RegisterBankInfo &RBI, Register SrcReg,
939 const TargetRegisterClass *To, unsigned SubReg) {
940 assert(SrcReg.isValid() && "Expected a valid source register?");
941 assert(To && "Destination register class cannot be null");
942 assert(SubReg && "Expected a valid subregister");
943
944 MachineIRBuilder MIB(I);
945 auto SubRegCopy =
946 MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {To}, SrcOps: {}).addReg(RegNo: SrcReg, Flags: {}, SubReg);
947 MachineOperand &RegOp = I.getOperand(i: 1);
948 RegOp.setReg(SubRegCopy.getReg(Idx: 0));
949
950 // It's possible that the destination register won't be constrained. Make
951 // sure that happens.
952 if (!I.getOperand(i: 0).getReg().isPhysical())
953 RBI.constrainGenericRegister(Reg: I.getOperand(i: 0).getReg(), RC: *To, MRI);
954
955 return true;
956}
957
958/// Helper function to get the source and destination register classes for a
959/// copy. Returns a std::pair containing the source register class for the
960/// copy, and the destination register class for the copy. If a register class
961/// cannot be determined, then it will be nullptr.
962static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
963getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,
964 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
965 const RegisterBankInfo &RBI) {
966 Register DstReg = I.getOperand(i: 0).getReg();
967 Register SrcReg = I.getOperand(i: 1).getReg();
968 const RegisterBank &DstRegBank = *RBI.getRegBank(Reg: DstReg, MRI, TRI);
969 const RegisterBank &SrcRegBank = *RBI.getRegBank(Reg: SrcReg, MRI, TRI);
970
971 TypeSize DstSize = RBI.getSizeInBits(Reg: DstReg, MRI, TRI);
972 TypeSize SrcSize = RBI.getSizeInBits(Reg: SrcReg, MRI, TRI);
973
974 // Special casing for cross-bank copies of s1s. We can technically represent
975 // a 1-bit value with any size of register. The minimum size for a GPR is 32
976 // bits. So, we need to put the FPR on 32 bits as well.
977 //
978 // FIXME: I'm not sure if this case holds true outside of copies. If it does,
979 // then we can pull it into the helpers that get the appropriate class for a
980 // register bank. Or make a new helper that carries along some constraint
981 // information.
982 if (SrcRegBank != DstRegBank &&
983 (DstSize == TypeSize::getFixed(ExactSize: 1) && SrcSize == TypeSize::getFixed(ExactSize: 1)))
984 SrcSize = DstSize = TypeSize::getFixed(ExactSize: 32);
985
986 return {getMinClassForRegBank(RB: SrcRegBank, SizeInBits: SrcSize, GetAllRegSet: true),
987 getMinClassForRegBank(RB: DstRegBank, SizeInBits: DstSize, GetAllRegSet: true)};
988}
989
990// FIXME: We need some sort of API in RBI/TRI to allow generic code to
991// constrain operands of simple instructions given a TargetRegisterClass
992// and LLT
993static bool selectDebugInstr(MachineInstr &I, MachineRegisterInfo &MRI,
994 const RegisterBankInfo &RBI) {
995 for (MachineOperand &MO : I.operands()) {
996 if (!MO.isReg())
997 continue;
998 Register Reg = MO.getReg();
999 if (!Reg)
1000 continue;
1001 if (Reg.isPhysical())
1002 continue;
1003 LLT Ty = MRI.getType(Reg);
1004 const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
1005 const TargetRegisterClass *RC =
1006 dyn_cast<const TargetRegisterClass *>(Val: RegClassOrBank);
1007 if (!RC) {
1008 const RegisterBank &RB = *cast<const RegisterBank *>(Val: RegClassOrBank);
1009 RC = getRegClassForTypeOnBank(Ty, RB);
1010 if (!RC) {
1011 LLVM_DEBUG(
1012 dbgs() << "Warning: DBG_VALUE operand has unexpected size/bank\n");
1013 break;
1014 }
1015 }
1016 RBI.constrainGenericRegister(Reg, RC: *RC, MRI);
1017 }
1018
1019 return true;
1020}
1021
1022static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
1023 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
1024 const RegisterBankInfo &RBI) {
1025 Register DstReg = I.getOperand(i: 0).getReg();
1026 Register SrcReg = I.getOperand(i: 1).getReg();
1027 const RegisterBank &DstRegBank = *RBI.getRegBank(Reg: DstReg, MRI, TRI);
1028 const RegisterBank &SrcRegBank = *RBI.getRegBank(Reg: SrcReg, MRI, TRI);
1029
1030 // Find the correct register classes for the source and destination registers.
1031 const TargetRegisterClass *SrcRC;
1032 const TargetRegisterClass *DstRC;
1033 std::tie(args&: SrcRC, args&: DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
1034
1035 if (!DstRC) {
1036 LLVM_DEBUG(dbgs() << "Unexpected dest size "
1037 << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n');
1038 return false;
1039 }
1040
1041 // Is this a copy? If so, then we may need to insert a subregister copy.
1042 if (I.isCopy()) {
1043 // Yes. Check if there's anything to fix up.
1044 if (!SrcRC) {
1045 LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
1046 return false;
1047 }
1048
1049 const TypeSize SrcSize = TRI.getRegSizeInBits(RC: *SrcRC);
1050 const TypeSize DstSize = TRI.getRegSizeInBits(RC: *DstRC);
1051 unsigned SubReg;
1052
1053 // If the source bank doesn't support a subregister copy small enough,
1054 // then we first need to copy to the destination bank.
1055 if (getMinSizeForRegBank(RB: SrcRegBank) > DstSize) {
1056 const TargetRegisterClass *DstTempRC =
1057 getMinClassForRegBank(RB: DstRegBank, SizeInBits: SrcSize, /* GetAllRegSet */ true);
1058 getSubRegForClass(RC: DstRC, TRI, SubReg);
1059
1060 MachineIRBuilder MIB(I);
1061 auto Copy = MIB.buildCopy(Res: {DstTempRC}, Op: {SrcReg});
1062 copySubReg(I, MRI, RBI, SrcReg: Copy.getReg(Idx: 0), To: DstRC, SubReg);
1063 } else if (SrcSize > DstSize) {
1064 // If the source register is bigger than the destination we need to
1065 // perform a subregister copy.
1066 const TargetRegisterClass *SubRegRC =
1067 getMinClassForRegBank(RB: SrcRegBank, SizeInBits: DstSize, /* GetAllRegSet */ true);
1068 getSubRegForClass(RC: SubRegRC, TRI, SubReg);
1069 copySubReg(I, MRI, RBI, SrcReg, To: DstRC, SubReg);
1070 } else if (DstSize > SrcSize) {
1071 // If the destination register is bigger than the source we need to do
1072 // a promotion using SUBREG_TO_REG.
1073 const TargetRegisterClass *PromotionRC =
1074 getMinClassForRegBank(RB: SrcRegBank, SizeInBits: DstSize, /* GetAllRegSet */ true);
1075 getSubRegForClass(RC: SrcRC, TRI, SubReg);
1076
1077 Register PromoteReg = MRI.createVirtualRegister(RegClass: PromotionRC);
1078 BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(),
1079 MCID: TII.get(Opcode: AArch64::SUBREG_TO_REG), DestReg: PromoteReg)
1080 .addUse(RegNo: SrcReg)
1081 .addImm(Val: SubReg);
1082 MachineOperand &RegOp = I.getOperand(i: 1);
1083 RegOp.setReg(PromoteReg);
1084 }
1085
1086 // If the destination is a physical register, then there's nothing to
1087 // change, so we're done.
1088 if (DstReg.isPhysical())
1089 return true;
1090 }
1091
1092 // No need to constrain SrcReg. It will get constrained when we hit another
1093 // of its use or its defs. Copies do not have constraints.
1094 if (!RBI.constrainGenericRegister(Reg: DstReg, RC: *DstRC, MRI)) {
1095 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
1096 << " operand\n");
1097 return false;
1098 }
1099
1100 // If this a GPR ZEXT that we want to just reduce down into a copy.
1101 // The sizes will be mismatched with the source < 32b but that's ok.
1102 if (I.getOpcode() == TargetOpcode::G_ZEXT) {
1103 I.setDesc(TII.get(Opcode: AArch64::COPY));
1104 assert(SrcRegBank.getID() == AArch64::GPRRegBankID);
1105 return selectCopy(I, TII, MRI, TRI, RBI);
1106 }
1107
1108 I.setDesc(TII.get(Opcode: AArch64::COPY));
1109 return true;
1110}
1111
1112MachineInstr *
1113AArch64InstructionSelector::emitSelect(Register Dst, Register True,
1114 Register False, AArch64CC::CondCode CC,
1115 MachineIRBuilder &MIB) const {
1116 MachineRegisterInfo &MRI = *MIB.getMRI();
1117 assert(RBI.getRegBank(False, MRI, TRI)->getID() ==
1118 RBI.getRegBank(True, MRI, TRI)->getID() &&
1119 "Expected both select operands to have the same regbank?");
1120 LLT Ty = MRI.getType(Reg: True);
1121 if (Ty.isVector())
1122 return nullptr;
1123 const unsigned Size = Ty.getSizeInBits();
1124 assert((Size == 32 || Size == 64) &&
1125 "Expected 32 bit or 64 bit select only?");
1126 const bool Is32Bit = Size == 32;
1127 if (RBI.getRegBank(Reg: True, MRI, TRI)->getID() != AArch64::GPRRegBankID) {
1128 unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
1129 auto FCSel = MIB.buildInstr(Opc, DstOps: {Dst}, SrcOps: {True, False}).addImm(Val: CC);
1130 constrainSelectedInstRegOperands(I&: *FCSel, TII, TRI, RBI);
1131 return &*FCSel;
1132 }
1133
1134 // By default, we'll try and emit a CSEL.
1135 unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
1136 bool Optimized = false;
1137 auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI,
1138 &Optimized](Register &Reg, Register &OtherReg,
1139 bool Invert) {
1140 if (Optimized)
1141 return false;
1142
1143 // Attempt to fold:
1144 //
1145 // %sub = G_SUB 0, %x
1146 // %select = G_SELECT cc, %reg, %sub
1147 //
1148 // Into:
1149 // %select = CSNEG %reg, %x, cc
1150 Register MatchReg;
1151 if (mi_match(R: Reg, MRI, P: m_Neg(Src: m_Reg(R&: MatchReg)))) {
1152 Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
1153 Reg = MatchReg;
1154 if (Invert) {
1155 CC = AArch64CC::getInvertedCondCode(Code: CC);
1156 std::swap(a&: Reg, b&: OtherReg);
1157 }
1158 return true;
1159 }
1160
1161 // Attempt to fold:
1162 //
1163 // %xor = G_XOR %x, -1
1164 // %select = G_SELECT cc, %reg, %xor
1165 //
1166 // Into:
1167 // %select = CSINV %reg, %x, cc
1168 if (mi_match(R: Reg, MRI, P: m_Not(Src: m_Reg(R&: MatchReg)))) {
1169 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1170 Reg = MatchReg;
1171 if (Invert) {
1172 CC = AArch64CC::getInvertedCondCode(Code: CC);
1173 std::swap(a&: Reg, b&: OtherReg);
1174 }
1175 return true;
1176 }
1177
1178 // Attempt to fold:
1179 //
1180 // %add = G_ADD %x, 1
1181 // %select = G_SELECT cc, %reg, %add
1182 //
1183 // Into:
1184 // %select = CSINC %reg, %x, cc
1185 if (mi_match(R: Reg, MRI,
1186 P: m_any_of(preds: m_GAdd(L: m_Reg(R&: MatchReg), R: m_SpecificICst(RequestedValue: 1)),
1187 preds: m_GPtrAdd(L: m_Reg(R&: MatchReg), R: m_SpecificICst(RequestedValue: 1))))) {
1188 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1189 Reg = MatchReg;
1190 if (Invert) {
1191 CC = AArch64CC::getInvertedCondCode(Code: CC);
1192 std::swap(a&: Reg, b&: OtherReg);
1193 }
1194 return true;
1195 }
1196
1197 return false;
1198 };
1199
1200 // Helper lambda which tries to use CSINC/CSINV for the instruction when its
1201 // true/false values are constants.
1202 // FIXME: All of these patterns already exist in tablegen. We should be
1203 // able to import these.
1204 auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI,
1205 &Optimized]() {
1206 if (Optimized)
1207 return false;
1208 auto TrueCst = getIConstantVRegValWithLookThrough(VReg: True, MRI);
1209 auto FalseCst = getIConstantVRegValWithLookThrough(VReg: False, MRI);
1210 if (!TrueCst && !FalseCst)
1211 return false;
1212
1213 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
1214 if (TrueCst && FalseCst) {
1215 int64_t T = TrueCst->Value.getSExtValue();
1216 int64_t F = FalseCst->Value.getSExtValue();
1217
1218 if (T == 0 && F == 1) {
1219 // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
1220 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1221 True = ZReg;
1222 False = ZReg;
1223 return true;
1224 }
1225
1226 if (T == 0 && F == -1) {
1227 // G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
1228 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1229 True = ZReg;
1230 False = ZReg;
1231 return true;
1232 }
1233 }
1234
1235 if (TrueCst) {
1236 int64_t T = TrueCst->Value.getSExtValue();
1237 if (T == 1) {
1238 // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
1239 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1240 True = False;
1241 False = ZReg;
1242 CC = AArch64CC::getInvertedCondCode(Code: CC);
1243 return true;
1244 }
1245
1246 if (T == -1) {
1247 // G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
1248 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1249 True = False;
1250 False = ZReg;
1251 CC = AArch64CC::getInvertedCondCode(Code: CC);
1252 return true;
1253 }
1254 }
1255
1256 if (FalseCst) {
1257 int64_t F = FalseCst->Value.getSExtValue();
1258 if (F == 1) {
1259 // G_SELECT cc, t, 1 -> CSINC t, zreg, cc
1260 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1261 False = ZReg;
1262 return true;
1263 }
1264
1265 if (F == -1) {
1266 // G_SELECT cc, t, -1 -> CSINC t, zreg, cc
1267 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1268 False = ZReg;
1269 return true;
1270 }
1271 }
1272 return false;
1273 };
1274
1275 Optimized |= TryFoldBinOpIntoSelect(False, True, /*Invert = */ false);
1276 Optimized |= TryFoldBinOpIntoSelect(True, False, /*Invert = */ true);
1277 Optimized |= TryOptSelectCst();
1278 auto SelectInst = MIB.buildInstr(Opc, DstOps: {Dst}, SrcOps: {True, False}).addImm(Val: CC);
1279 constrainSelectedInstRegOperands(I&: *SelectInst, TII, TRI, RBI);
1280 return &*SelectInst;
1281}
1282
1283static AArch64CC::CondCode
1284changeICMPPredToAArch64CC(CmpInst::Predicate P, Register RHS = {},
1285 MachineRegisterInfo *MRI = nullptr) {
1286 switch (P) {
1287 default:
1288 llvm_unreachable("Unknown condition code!");
1289 case CmpInst::ICMP_NE:
1290 return AArch64CC::NE;
1291 case CmpInst::ICMP_EQ:
1292 return AArch64CC::EQ;
1293 case CmpInst::ICMP_SGT:
1294 return AArch64CC::GT;
1295 case CmpInst::ICMP_SGE:
1296 if (RHS && MRI) {
1297 auto ValAndVReg = getIConstantVRegValWithLookThrough(VReg: RHS, MRI: *MRI);
1298 if (ValAndVReg && ValAndVReg->Value == 0)
1299 return AArch64CC::PL;
1300 }
1301 return AArch64CC::GE;
1302 case CmpInst::ICMP_SLT:
1303 if (RHS && MRI) {
1304 auto ValAndVReg = getIConstantVRegValWithLookThrough(VReg: RHS, MRI: *MRI);
1305 if (ValAndVReg && ValAndVReg->Value == 0)
1306 return AArch64CC::MI;
1307 }
1308 return AArch64CC::LT;
1309 case CmpInst::ICMP_SLE:
1310 return AArch64CC::LE;
1311 case CmpInst::ICMP_UGT:
1312 return AArch64CC::HI;
1313 case CmpInst::ICMP_UGE:
1314 return AArch64CC::HS;
1315 case CmpInst::ICMP_ULT:
1316 return AArch64CC::LO;
1317 case CmpInst::ICMP_ULE:
1318 return AArch64CC::LS;
1319 }
1320}
1321
1322/// changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC.
1323static void changeFPCCToORAArch64CC(CmpInst::Predicate CC,
1324 AArch64CC::CondCode &CondCode,
1325 AArch64CC::CondCode &CondCode2) {
1326 CondCode2 = AArch64CC::AL;
1327 switch (CC) {
1328 default:
1329 llvm_unreachable("Unknown FP condition!");
1330 case CmpInst::FCMP_OEQ:
1331 CondCode = AArch64CC::EQ;
1332 break;
1333 case CmpInst::FCMP_OGT:
1334 CondCode = AArch64CC::GT;
1335 break;
1336 case CmpInst::FCMP_OGE:
1337 CondCode = AArch64CC::GE;
1338 break;
1339 case CmpInst::FCMP_OLT:
1340 CondCode = AArch64CC::MI;
1341 break;
1342 case CmpInst::FCMP_OLE:
1343 CondCode = AArch64CC::LS;
1344 break;
1345 case CmpInst::FCMP_ONE:
1346 CondCode = AArch64CC::MI;
1347 CondCode2 = AArch64CC::GT;
1348 break;
1349 case CmpInst::FCMP_ORD:
1350 CondCode = AArch64CC::VC;
1351 break;
1352 case CmpInst::FCMP_UNO:
1353 CondCode = AArch64CC::VS;
1354 break;
1355 case CmpInst::FCMP_UEQ:
1356 CondCode = AArch64CC::EQ;
1357 CondCode2 = AArch64CC::VS;
1358 break;
1359 case CmpInst::FCMP_UGT:
1360 CondCode = AArch64CC::HI;
1361 break;
1362 case CmpInst::FCMP_UGE:
1363 CondCode = AArch64CC::PL;
1364 break;
1365 case CmpInst::FCMP_ULT:
1366 CondCode = AArch64CC::LT;
1367 break;
1368 case CmpInst::FCMP_ULE:
1369 CondCode = AArch64CC::LE;
1370 break;
1371 case CmpInst::FCMP_UNE:
1372 CondCode = AArch64CC::NE;
1373 break;
1374 }
1375}
1376
1377/// Convert an IR fp condition code to an AArch64 CC.
1378/// This differs from changeFPCCToAArch64CC in that it returns cond codes that
1379/// should be AND'ed instead of OR'ed.
1380static void changeFPCCToANDAArch64CC(CmpInst::Predicate CC,
1381 AArch64CC::CondCode &CondCode,
1382 AArch64CC::CondCode &CondCode2) {
1383 CondCode2 = AArch64CC::AL;
1384 switch (CC) {
1385 default:
1386 changeFPCCToORAArch64CC(CC, CondCode, CondCode2);
1387 assert(CondCode2 == AArch64CC::AL);
1388 break;
1389 case CmpInst::FCMP_ONE:
1390 // (a one b)
1391 // == ((a olt b) || (a ogt b))
1392 // == ((a ord b) && (a une b))
1393 CondCode = AArch64CC::VC;
1394 CondCode2 = AArch64CC::NE;
1395 break;
1396 case CmpInst::FCMP_UEQ:
1397 // (a ueq b)
1398 // == ((a uno b) || (a oeq b))
1399 // == ((a ule b) && (a uge b))
1400 CondCode = AArch64CC::PL;
1401 CondCode2 = AArch64CC::LE;
1402 break;
1403 }
1404}
1405
1406/// Return a register which can be used as a bit to test in a TB(N)Z.
1407static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
1408 MachineRegisterInfo &MRI) {
1409 assert(Reg.isValid() && "Expected valid register!");
1410 bool HasZext = false;
1411 while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) {
1412 unsigned Opc = MI->getOpcode();
1413
1414 if (!MI->getOperand(i: 0).isReg() ||
1415 !MRI.hasOneNonDBGUse(RegNo: MI->getOperand(i: 0).getReg()))
1416 break;
1417
1418 // (tbz (any_ext x), b) -> (tbz x, b) and
1419 // (tbz (zext x), b) -> (tbz x, b) if we don't use the extended bits.
1420 //
1421 // (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number
1422 // on the truncated x is the same as the bit number on x.
1423 if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
1424 Opc == TargetOpcode::G_TRUNC) {
1425 if (Opc == TargetOpcode::G_ZEXT)
1426 HasZext = true;
1427
1428 Register NextReg = MI->getOperand(i: 1).getReg();
1429 // Did we find something worth folding?
1430 if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(RegNo: NextReg))
1431 break;
1432 TypeSize InSize = MRI.getType(Reg: NextReg).getSizeInBits();
1433 if (Bit >= InSize)
1434 break;
1435
1436 // NextReg is worth folding. Keep looking.
1437 Reg = NextReg;
1438 continue;
1439 }
1440
1441 // Attempt to find a suitable operation with a constant on one side.
1442 std::optional<uint64_t> C;
1443 Register TestReg;
1444 switch (Opc) {
1445 default:
1446 break;
1447 case TargetOpcode::G_AND:
1448 case TargetOpcode::G_XOR: {
1449 TestReg = MI->getOperand(i: 1).getReg();
1450 Register ConstantReg = MI->getOperand(i: 2).getReg();
1451 auto VRegAndVal = getIConstantVRegValWithLookThrough(VReg: ConstantReg, MRI);
1452 if (!VRegAndVal) {
1453 // AND commutes, check the other side for a constant.
1454 // FIXME: Can we canonicalize the constant so that it's always on the
1455 // same side at some point earlier?
1456 std::swap(a&: ConstantReg, b&: TestReg);
1457 VRegAndVal = getIConstantVRegValWithLookThrough(VReg: ConstantReg, MRI);
1458 }
1459 if (VRegAndVal) {
1460 if (HasZext)
1461 C = VRegAndVal->Value.getZExtValue();
1462 else
1463 C = VRegAndVal->Value.getSExtValue();
1464 }
1465 break;
1466 }
1467 case TargetOpcode::G_ASHR:
1468 case TargetOpcode::G_LSHR:
1469 case TargetOpcode::G_SHL: {
1470 TestReg = MI->getOperand(i: 1).getReg();
1471 auto VRegAndVal =
1472 getIConstantVRegValWithLookThrough(VReg: MI->getOperand(i: 2).getReg(), MRI);
1473 if (VRegAndVal)
1474 C = VRegAndVal->Value.getSExtValue();
1475 break;
1476 }
1477 }
1478
1479 // Didn't find a constant or viable register. Bail out of the loop.
1480 if (!C || !TestReg.isValid())
1481 break;
1482
1483 // We found a suitable instruction with a constant. Check to see if we can
1484 // walk through the instruction.
1485 Register NextReg;
1486 unsigned TestRegSize = MRI.getType(Reg: TestReg).getSizeInBits();
1487 switch (Opc) {
1488 default:
1489 break;
1490 case TargetOpcode::G_AND:
1491 // (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set.
1492 if ((*C >> Bit) & 1)
1493 NextReg = TestReg;
1494 break;
1495 case TargetOpcode::G_SHL:
1496 // (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in
1497 // the type of the register.
1498 if (*C <= Bit && (Bit - *C) < TestRegSize) {
1499 NextReg = TestReg;
1500 Bit = Bit - *C;
1501 }
1502 break;
1503 case TargetOpcode::G_ASHR:
1504 // (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits
1505 // in x
1506 NextReg = TestReg;
1507 Bit = Bit + *C;
1508 if (Bit >= TestRegSize)
1509 Bit = TestRegSize - 1;
1510 break;
1511 case TargetOpcode::G_LSHR:
1512 // (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x
1513 if ((Bit + *C) < TestRegSize) {
1514 NextReg = TestReg;
1515 Bit = Bit + *C;
1516 }
1517 break;
1518 case TargetOpcode::G_XOR:
1519 // We can walk through a G_XOR by inverting whether we use tbz/tbnz when
1520 // appropriate.
1521 //
1522 // e.g. If x' = xor x, c, and the b-th bit is set in c then
1523 //
1524 // tbz x', b -> tbnz x, b
1525 //
1526 // Because x' only has the b-th bit set if x does not.
1527 if ((*C >> Bit) & 1)
1528 Invert = !Invert;
1529 NextReg = TestReg;
1530 break;
1531 }
1532
1533 // Check if we found anything worth folding.
1534 if (!NextReg.isValid())
1535 return Reg;
1536 Reg = NextReg;
1537 }
1538
1539 return Reg;
1540}
1541
1542MachineInstr *AArch64InstructionSelector::emitTestBit(
1543 Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB,
1544 MachineIRBuilder &MIB) const {
1545 assert(TestReg.isValid());
1546 assert(ProduceNonFlagSettingCondBr &&
1547 "Cannot emit TB(N)Z with speculation tracking!");
1548 MachineRegisterInfo &MRI = *MIB.getMRI();
1549
1550 // Attempt to optimize the test bit by walking over instructions.
1551 TestReg = getTestBitReg(Reg: TestReg, Bit, Invert&: IsNegative, MRI);
1552 LLT Ty = MRI.getType(Reg: TestReg);
1553 unsigned Size = Ty.getSizeInBits();
1554 assert(!Ty.isVector() && "Expected a scalar!");
1555 assert(Bit < 64 && "Bit is too large!");
1556
1557 // When the test register is a 64-bit register, we have to narrow to make
1558 // TBNZW work.
1559 bool UseWReg = Bit < 32;
1560 unsigned NecessarySize = UseWReg ? 32 : 64;
1561 if (Size != NecessarySize)
1562 TestReg = moveScalarRegClass(
1563 Reg: TestReg, RC: UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass,
1564 MIB);
1565
1566 static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
1567 {AArch64::TBZW, AArch64::TBNZW}};
1568 unsigned Opc = OpcTable[UseWReg][IsNegative];
1569 auto TestBitMI =
1570 MIB.buildInstr(Opcode: Opc).addReg(RegNo: TestReg).addImm(Val: Bit).addMBB(MBB: DstMBB);
1571 constrainSelectedInstRegOperands(I&: *TestBitMI, TII, TRI, RBI);
1572 return &*TestBitMI;
1573}
1574
1575bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
1576 MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB,
1577 MachineIRBuilder &MIB) const {
1578 assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?");
1579 // Given something like this:
1580 //
1581 // %x = ...Something...
1582 // %one = G_CONSTANT i64 1
1583 // %zero = G_CONSTANT i64 0
1584 // %and = G_AND %x, %one
1585 // %cmp = G_ICMP intpred(ne), %and, %zero
1586 // %cmp_trunc = G_TRUNC %cmp
1587 // G_BRCOND %cmp_trunc, %bb.3
1588 //
1589 // We want to try and fold the AND into the G_BRCOND and produce either a
1590 // TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)).
1591 //
1592 // In this case, we'd get
1593 //
1594 // TBNZ %x %bb.3
1595 //
1596
1597 // Check if the AND has a constant on its RHS which we can use as a mask.
1598 // If it's a power of 2, then it's the same as checking a specific bit.
1599 // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)
1600 auto MaybeBit = getIConstantVRegValWithLookThrough(
1601 VReg: AndInst.getOperand(i: 2).getReg(), MRI: *MIB.getMRI());
1602 if (!MaybeBit)
1603 return false;
1604
1605 int32_t Bit = MaybeBit->Value.exactLogBase2();
1606 if (Bit < 0)
1607 return false;
1608
1609 Register TestReg = AndInst.getOperand(i: 1).getReg();
1610
1611 // Emit a TB(N)Z.
1612 emitTestBit(TestReg, Bit, IsNegative: Invert, DstMBB, MIB);
1613 return true;
1614}
1615
1616MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg,
1617 bool IsNegative,
1618 MachineBasicBlock *DestMBB,
1619 MachineIRBuilder &MIB) const {
1620 assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!");
1621 MachineRegisterInfo &MRI = *MIB.getMRI();
1622 assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() ==
1623 AArch64::GPRRegBankID &&
1624 "Expected GPRs only?");
1625 auto Ty = MRI.getType(Reg: CompareReg);
1626 unsigned Width = Ty.getSizeInBits();
1627 assert(!Ty.isVector() && "Expected scalar only?");
1628 assert(Width <= 64 && "Expected width to be at most 64?");
1629 static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
1630 {AArch64::CBNZW, AArch64::CBNZX}};
1631 unsigned Opc = OpcTable[IsNegative][Width == 64];
1632 auto BranchMI = MIB.buildInstr(Opc, DstOps: {}, SrcOps: {CompareReg}).addMBB(MBB: DestMBB);
1633 constrainSelectedInstRegOperands(I&: *BranchMI, TII, TRI, RBI);
1634 return &*BranchMI;
1635}
1636
1637bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
1638 MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const {
1639 assert(FCmp.getOpcode() == TargetOpcode::G_FCMP);
1640 assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1641 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
1642 // totally clean. Some of them require two branches to implement.
1643 auto Pred = (CmpInst::Predicate)FCmp.getOperand(i: 1).getPredicate();
1644 emitFPCompare(LHS: FCmp.getOperand(i: 2).getReg(), RHS: FCmp.getOperand(i: 3).getReg(), MIRBuilder&: MIB,
1645 Pred);
1646 AArch64CC::CondCode CC1, CC2;
1647 changeFCMPPredToAArch64CC(P: Pred, CondCode&: CC1, CondCode2&: CC2);
1648 MachineBasicBlock *DestMBB = I.getOperand(i: 1).getMBB();
1649 MIB.buildInstr(Opc: AArch64::Bcc, DstOps: {}, SrcOps: {}).addImm(Val: CC1).addMBB(MBB: DestMBB);
1650 if (CC2 != AArch64CC::AL)
1651 MIB.buildInstr(Opc: AArch64::Bcc, DstOps: {}, SrcOps: {}).addImm(Val: CC2).addMBB(MBB: DestMBB);
1652 I.eraseFromParent();
1653 return true;
1654}
1655
1656bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
1657 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1658 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
1659 assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1660 // Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z.
1661 //
1662 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1663 // instructions will not be produced, as they are conditional branch
1664 // instructions that do not set flags.
1665 if (!ProduceNonFlagSettingCondBr)
1666 return false;
1667
1668 MachineRegisterInfo &MRI = *MIB.getMRI();
1669 MachineBasicBlock *DestMBB = I.getOperand(i: 1).getMBB();
1670 auto Pred =
1671 static_cast<CmpInst::Predicate>(ICmp.getOperand(i: 1).getPredicate());
1672 Register LHS = ICmp.getOperand(i: 2).getReg();
1673 Register RHS = ICmp.getOperand(i: 3).getReg();
1674
1675 // We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that.
1676 auto VRegAndVal = getIConstantVRegValWithLookThrough(VReg: RHS, MRI);
1677 MachineInstr *AndInst = getOpcodeDef(Opcode: TargetOpcode::G_AND, Reg: LHS, MRI);
1678
1679 // When we can emit a TB(N)Z, prefer that.
1680 //
1681 // Handle non-commutative condition codes first.
1682 // Note that we don't want to do this when we have a G_AND because it can
1683 // become a tst. The tst will make the test bit in the TB(N)Z redundant.
1684 if (VRegAndVal && !AndInst) {
1685 int64_t C = VRegAndVal->Value.getSExtValue();
1686
1687 // When we have a greater-than comparison, we can just test if the msb is
1688 // zero.
1689 if (C == -1 && Pred == CmpInst::ICMP_SGT) {
1690 uint64_t Bit = MRI.getType(Reg: LHS).getSizeInBits() - 1;
1691 emitTestBit(TestReg: LHS, Bit, /*IsNegative = */ false, DstMBB: DestMBB, MIB);
1692 I.eraseFromParent();
1693 return true;
1694 }
1695
1696 // When we have a less than comparison, we can just test if the msb is not
1697 // zero.
1698 if (C == 0 && Pred == CmpInst::ICMP_SLT) {
1699 uint64_t Bit = MRI.getType(Reg: LHS).getSizeInBits() - 1;
1700 emitTestBit(TestReg: LHS, Bit, /*IsNegative = */ true, DstMBB: DestMBB, MIB);
1701 I.eraseFromParent();
1702 return true;
1703 }
1704
1705 // Inversely, if we have a signed greater-than-or-equal comparison to zero,
1706 // we can test if the msb is zero.
1707 if (C == 0 && Pred == CmpInst::ICMP_SGE) {
1708 uint64_t Bit = MRI.getType(Reg: LHS).getSizeInBits() - 1;
1709 emitTestBit(TestReg: LHS, Bit, /*IsNegative = */ false, DstMBB: DestMBB, MIB);
1710 I.eraseFromParent();
1711 return true;
1712 }
1713 }
1714
1715 // Attempt to handle commutative condition codes. Right now, that's only
1716 // eq/ne.
1717 if (ICmpInst::isEquality(P: Pred)) {
1718 if (!VRegAndVal) {
1719 std::swap(a&: RHS, b&: LHS);
1720 VRegAndVal = getIConstantVRegValWithLookThrough(VReg: RHS, MRI);
1721 AndInst = getOpcodeDef(Opcode: TargetOpcode::G_AND, Reg: LHS, MRI);
1722 }
1723
1724 if (VRegAndVal && VRegAndVal->Value == 0) {
1725 // If there's a G_AND feeding into this branch, try to fold it away by
1726 // emitting a TB(N)Z instead.
1727 //
1728 // Note: If we have LT, then it *is* possible to fold, but it wouldn't be
1729 // beneficial. When we have an AND and LT, we need a TST/ANDS, so folding
1730 // would be redundant.
1731 if (AndInst &&
1732 tryOptAndIntoCompareBranch(
1733 AndInst&: *AndInst, /*Invert = */ Pred == CmpInst::ICMP_NE, DstMBB: DestMBB, MIB)) {
1734 I.eraseFromParent();
1735 return true;
1736 }
1737
1738 // Otherwise, try to emit a CB(N)Z instead.
1739 auto LHSTy = MRI.getType(Reg: LHS);
1740 if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
1741 emitCBZ(CompareReg: LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB);
1742 I.eraseFromParent();
1743 return true;
1744 }
1745 }
1746 }
1747
1748 return false;
1749}
1750
1751bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
1752 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1753 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
1754 assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1755 if (tryOptCompareBranchFedByICmp(I, ICmp, MIB))
1756 return true;
1757
1758 // Couldn't optimize. Emit a compare + a Bcc.
1759 MachineBasicBlock *DestMBB = I.getOperand(i: 1).getMBB();
1760 auto &PredOp = ICmp.getOperand(i: 1);
1761 emitIntegerCompare(LHS&: ICmp.getOperand(i: 2), RHS&: ICmp.getOperand(i: 3), Predicate&: PredOp, MIRBuilder&: MIB);
1762 const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(
1763 P: static_cast<CmpInst::Predicate>(PredOp.getPredicate()),
1764 RHS: ICmp.getOperand(i: 3).getReg(), MRI: MIB.getMRI());
1765 MIB.buildInstr(Opc: AArch64::Bcc, DstOps: {}, SrcOps: {}).addImm(Val: CC).addMBB(MBB: DestMBB);
1766 I.eraseFromParent();
1767 return true;
1768}
1769
1770bool AArch64InstructionSelector::selectCompareBranch(
1771 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) {
1772 Register CondReg = I.getOperand(i: 0).getReg();
1773 MachineInstr *CCMI = MRI.getVRegDef(Reg: CondReg);
1774 // Try to select the G_BRCOND using whatever is feeding the condition if
1775 // possible.
1776 unsigned CCMIOpc = CCMI->getOpcode();
1777 if (CCMIOpc == TargetOpcode::G_FCMP)
1778 return selectCompareBranchFedByFCmp(I, FCmp&: *CCMI, MIB);
1779 if (CCMIOpc == TargetOpcode::G_ICMP)
1780 return selectCompareBranchFedByICmp(I, ICmp&: *CCMI, MIB);
1781
1782 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1783 // instructions will not be produced, as they are conditional branch
1784 // instructions that do not set flags.
1785 if (ProduceNonFlagSettingCondBr) {
1786 emitTestBit(TestReg: CondReg, /*Bit = */ 0, /*IsNegative = */ true,
1787 DstMBB: I.getOperand(i: 1).getMBB(), MIB);
1788 I.eraseFromParent();
1789 return true;
1790 }
1791
1792 // Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead.
1793 auto TstMI =
1794 MIB.buildInstr(Opc: AArch64::ANDSWri, DstOps: {LLT::scalar(SizeInBits: 32)}, SrcOps: {CondReg}).addImm(Val: 1);
1795 constrainSelectedInstRegOperands(I&: *TstMI, TII, TRI, RBI);
1796 auto Bcc = MIB.buildInstr(Opcode: AArch64::Bcc)
1797 .addImm(Val: AArch64CC::NE)
1798 .addMBB(MBB: I.getOperand(i: 1).getMBB());
1799 I.eraseFromParent();
1800 constrainSelectedInstRegOperands(I&: *Bcc, TII, TRI, RBI);
1801 return true;
1802}
1803
1804/// Returns the element immediate value of a vector shift operand if found.
1805/// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR.
1806static std::optional<int64_t> getVectorShiftImm(Register Reg,
1807 MachineRegisterInfo &MRI) {
1808 assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand");
1809 MachineInstr *OpMI = MRI.getVRegDef(Reg);
1810 return getAArch64VectorSplatScalar(MI: *OpMI, MRI);
1811}
1812
1813/// Matches and returns the shift immediate value for a SHL instruction given
1814/// a shift operand.
1815static std::optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg,
1816 MachineRegisterInfo &MRI) {
1817 std::optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI);
1818 if (!ShiftImm)
1819 return std::nullopt;
1820 // Check the immediate is in range for a SHL.
1821 int64_t Imm = *ShiftImm;
1822 if (Imm < 0)
1823 return std::nullopt;
1824 switch (SrcTy.getElementType().getSizeInBits()) {
1825 default:
1826 LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift");
1827 return std::nullopt;
1828 case 8:
1829 if (Imm > 7)
1830 return std::nullopt;
1831 break;
1832 case 16:
1833 if (Imm > 15)
1834 return std::nullopt;
1835 break;
1836 case 32:
1837 if (Imm > 31)
1838 return std::nullopt;
1839 break;
1840 case 64:
1841 if (Imm > 63)
1842 return std::nullopt;
1843 break;
1844 }
1845 return Imm;
1846}
1847
1848bool AArch64InstructionSelector::selectVectorSHL(MachineInstr &I,
1849 MachineRegisterInfo &MRI) {
1850 assert(I.getOpcode() == TargetOpcode::G_SHL);
1851 Register DstReg = I.getOperand(i: 0).getReg();
1852 const LLT Ty = MRI.getType(Reg: DstReg);
1853 Register Src1Reg = I.getOperand(i: 1).getReg();
1854 Register Src2Reg = I.getOperand(i: 2).getReg();
1855
1856 if (!Ty.isVector())
1857 return false;
1858
1859 // Check if we have a vector of constants on RHS that we can select as the
1860 // immediate form.
1861 std::optional<int64_t> ImmVal = getVectorSHLImm(SrcTy: Ty, Reg: Src2Reg, MRI);
1862
1863 unsigned Opc = 0;
1864 if (Ty == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 64)) {
1865 Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
1866 } else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32)) {
1867 Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
1868 } else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 32)) {
1869 Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
1870 } else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 16)) {
1871 Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
1872 } else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 16)) {
1873 Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
1874 } else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarSizeInBits: 8)) {
1875 Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
1876 } else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 8)) {
1877 Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
1878 } else {
1879 LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
1880 return false;
1881 }
1882
1883 auto Shl = MIB.buildInstr(Opc, DstOps: {DstReg}, SrcOps: {Src1Reg});
1884 if (ImmVal)
1885 Shl.addImm(Val: *ImmVal);
1886 else
1887 Shl.addUse(RegNo: Src2Reg);
1888 constrainSelectedInstRegOperands(I&: *Shl, TII, TRI, RBI);
1889 I.eraseFromParent();
1890 return true;
1891}
1892
1893bool AArch64InstructionSelector::selectVectorAshrLshr(
1894 MachineInstr &I, MachineRegisterInfo &MRI) {
1895 assert(I.getOpcode() == TargetOpcode::G_ASHR ||
1896 I.getOpcode() == TargetOpcode::G_LSHR);
1897 Register DstReg = I.getOperand(i: 0).getReg();
1898 const LLT Ty = MRI.getType(Reg: DstReg);
1899 Register Src1Reg = I.getOperand(i: 1).getReg();
1900 Register Src2Reg = I.getOperand(i: 2).getReg();
1901
1902 if (!Ty.isVector())
1903 return false;
1904
1905 bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR;
1906
1907 // We expect the immediate case to be lowered in the PostLegalCombiner to
1908 // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents.
1909
1910 // There is not a shift right register instruction, but the shift left
1911 // register instruction takes a signed value, where negative numbers specify a
1912 // right shift.
1913
1914 unsigned Opc = 0;
1915 unsigned NegOpc = 0;
1916 const TargetRegisterClass *RC =
1917 getRegClassForTypeOnBank(Ty, RB: RBI.getRegBank(ID: AArch64::FPRRegBankID));
1918 if (Ty == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 64)) {
1919 Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
1920 NegOpc = AArch64::NEGv2i64;
1921 } else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32)) {
1922 Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
1923 NegOpc = AArch64::NEGv4i32;
1924 } else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 32)) {
1925 Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
1926 NegOpc = AArch64::NEGv2i32;
1927 } else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 16)) {
1928 Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
1929 NegOpc = AArch64::NEGv4i16;
1930 } else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 16)) {
1931 Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
1932 NegOpc = AArch64::NEGv8i16;
1933 } else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarSizeInBits: 8)) {
1934 Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
1935 NegOpc = AArch64::NEGv16i8;
1936 } else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 8)) {
1937 Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
1938 NegOpc = AArch64::NEGv8i8;
1939 } else {
1940 LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
1941 return false;
1942 }
1943
1944 auto Neg = MIB.buildInstr(Opc: NegOpc, DstOps: {RC}, SrcOps: {Src2Reg});
1945 constrainSelectedInstRegOperands(I&: *Neg, TII, TRI, RBI);
1946 auto SShl = MIB.buildInstr(Opc, DstOps: {DstReg}, SrcOps: {Src1Reg, Neg});
1947 constrainSelectedInstRegOperands(I&: *SShl, TII, TRI, RBI);
1948 I.eraseFromParent();
1949 return true;
1950}
1951
1952bool AArch64InstructionSelector::selectVaStartAAPCS(
1953 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1954
1955 if (STI.isCallingConvWin64(CC: MF.getFunction().getCallingConv(),
1956 IsVarArg: MF.getFunction().isVarArg()))
1957 return false;
1958
1959 // The layout of the va_list struct is specified in the AArch64 Procedure Call
1960 // Standard, section 10.1.5.
1961
1962 const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
1963 const unsigned PtrSize = STI.isTargetILP32() ? 4 : 8;
1964 const auto *PtrRegClass =
1965 STI.isTargetILP32() ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
1966
1967 const MCInstrDesc &MCIDAddAddr =
1968 TII.get(Opcode: STI.isTargetILP32() ? AArch64::ADDWri : AArch64::ADDXri);
1969 const MCInstrDesc &MCIDStoreAddr =
1970 TII.get(Opcode: STI.isTargetILP32() ? AArch64::STRWui : AArch64::STRXui);
1971
1972 /*
1973 * typedef struct va_list {
1974 * void * stack; // next stack param
1975 * void * gr_top; // end of GP arg reg save area
1976 * void * vr_top; // end of FP/SIMD arg reg save area
1977 * int gr_offs; // offset from gr_top to next GP register arg
1978 * int vr_offs; // offset from vr_top to next FP/SIMD register arg
1979 * } va_list;
1980 */
1981 const auto VAList = I.getOperand(i: 0).getReg();
1982
1983 // Our current offset in bytes from the va_list struct (VAList).
1984 unsigned OffsetBytes = 0;
1985
1986 // Helper function to store (FrameIndex + Imm) to VAList at offset OffsetBytes
1987 // and increment OffsetBytes by PtrSize.
1988 const auto PushAddress = [&](const int FrameIndex, const int64_t Imm) {
1989 const Register Top = MRI.createVirtualRegister(RegClass: PtrRegClass);
1990 auto MIB = BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), MCID: MCIDAddAddr)
1991 .addDef(RegNo: Top)
1992 .addFrameIndex(Idx: FrameIndex)
1993 .addImm(Val: Imm)
1994 .addImm(Val: 0);
1995 constrainSelectedInstRegOperands(I&: *MIB, TII, TRI, RBI);
1996
1997 const auto *MMO = *I.memoperands_begin();
1998 MIB = BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), MCID: MCIDStoreAddr)
1999 .addUse(RegNo: Top)
2000 .addUse(RegNo: VAList)
2001 .addImm(Val: OffsetBytes / PtrSize)
2002 .addMemOperand(MMO: MF.getMachineMemOperand(
2003 PtrInfo: MMO->getPointerInfo().getWithOffset(O: OffsetBytes),
2004 F: MachineMemOperand::MOStore, Size: PtrSize, BaseAlignment: MMO->getBaseAlign()));
2005 constrainSelectedInstRegOperands(I&: *MIB, TII, TRI, RBI);
2006
2007 OffsetBytes += PtrSize;
2008 };
2009
2010 // void* stack at offset 0
2011 PushAddress(FuncInfo->getVarArgsStackIndex(), 0);
2012
2013 // void* gr_top at offset 8 (4 on ILP32)
2014 const unsigned GPRSize = FuncInfo->getVarArgsGPRSize();
2015 PushAddress(FuncInfo->getVarArgsGPRIndex(), GPRSize);
2016
2017 // void* vr_top at offset 16 (8 on ILP32)
2018 const unsigned FPRSize = FuncInfo->getVarArgsFPRSize();
2019 PushAddress(FuncInfo->getVarArgsFPRIndex(), FPRSize);
2020
2021 // Helper function to store a 4-byte integer constant to VAList at offset
2022 // OffsetBytes, and increment OffsetBytes by 4.
2023 const auto PushIntConstant = [&](const int32_t Value) {
2024 constexpr int IntSize = 4;
2025 const Register Temp = MRI.createVirtualRegister(RegClass: &AArch64::GPR32RegClass);
2026 auto MIB =
2027 BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: AArch64::MOVi32imm))
2028 .addDef(RegNo: Temp)
2029 .addImm(Val: Value);
2030 constrainSelectedInstRegOperands(I&: *MIB, TII, TRI, RBI);
2031
2032 const auto *MMO = *I.memoperands_begin();
2033 MIB = BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: AArch64::STRWui))
2034 .addUse(RegNo: Temp)
2035 .addUse(RegNo: VAList)
2036 .addImm(Val: OffsetBytes / IntSize)
2037 .addMemOperand(MMO: MF.getMachineMemOperand(
2038 PtrInfo: MMO->getPointerInfo().getWithOffset(O: OffsetBytes),
2039 F: MachineMemOperand::MOStore, Size: IntSize, BaseAlignment: MMO->getBaseAlign()));
2040 constrainSelectedInstRegOperands(I&: *MIB, TII, TRI, RBI);
2041 OffsetBytes += IntSize;
2042 };
2043
2044 // int gr_offs at offset 24 (12 on ILP32)
2045 PushIntConstant(-static_cast<int32_t>(GPRSize));
2046
2047 // int vr_offs at offset 28 (16 on ILP32)
2048 PushIntConstant(-static_cast<int32_t>(FPRSize));
2049
2050 assert(OffsetBytes == (STI.isTargetILP32() ? 20 : 32) && "Unexpected offset");
2051
2052 I.eraseFromParent();
2053 return true;
2054}
2055
2056bool AArch64InstructionSelector::selectVaStartDarwin(
2057 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
2058 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
2059 Register ListReg = I.getOperand(i: 0).getReg();
2060
2061 Register ArgsAddrReg = MRI.createVirtualRegister(RegClass: &AArch64::GPR64RegClass);
2062
2063 int FrameIdx = FuncInfo->getVarArgsStackIndex();
2064 if (MF.getSubtarget<AArch64Subtarget>().isCallingConvWin64(
2065 CC: MF.getFunction().getCallingConv(), IsVarArg: MF.getFunction().isVarArg())) {
2066 FrameIdx = FuncInfo->getVarArgsGPRSize() > 0
2067 ? FuncInfo->getVarArgsGPRIndex()
2068 : FuncInfo->getVarArgsStackIndex();
2069 }
2070
2071 auto MIB =
2072 BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: AArch64::ADDXri))
2073 .addDef(RegNo: ArgsAddrReg)
2074 .addFrameIndex(Idx: FrameIdx)
2075 .addImm(Val: 0)
2076 .addImm(Val: 0);
2077
2078 constrainSelectedInstRegOperands(I&: *MIB, TII, TRI, RBI);
2079
2080 MIB = BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: AArch64::STRXui))
2081 .addUse(RegNo: ArgsAddrReg)
2082 .addUse(RegNo: ListReg)
2083 .addImm(Val: 0)
2084 .addMemOperand(MMO: *I.memoperands_begin());
2085
2086 constrainSelectedInstRegOperands(I&: *MIB, TII, TRI, RBI);
2087 I.eraseFromParent();
2088 return true;
2089}
2090
2091void AArch64InstructionSelector::materializeLargeCMVal(
2092 MachineInstr &I, const Value *V, unsigned OpFlags) {
2093 MachineBasicBlock &MBB = *I.getParent();
2094 MachineFunction &MF = *MBB.getParent();
2095 MachineRegisterInfo &MRI = MF.getRegInfo();
2096
2097 auto MovZ = MIB.buildInstr(Opc: AArch64::MOVZXi, DstOps: {&AArch64::GPR64RegClass}, SrcOps: {});
2098 MovZ->addOperand(MF, Op: I.getOperand(i: 1));
2099 MovZ->getOperand(i: 1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
2100 AArch64II::MO_NC);
2101 MovZ->addOperand(MF, Op: MachineOperand::CreateImm(Val: 0));
2102 constrainSelectedInstRegOperands(I&: *MovZ, TII, TRI, RBI);
2103
2104 auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
2105 Register ForceDstReg) {
2106 Register DstReg = ForceDstReg
2107 ? ForceDstReg
2108 : MRI.createVirtualRegister(RegClass: &AArch64::GPR64RegClass);
2109 auto MovI = MIB.buildInstr(Opcode: AArch64::MOVKXi).addDef(RegNo: DstReg).addUse(RegNo: SrcReg);
2110 if (auto *GV = dyn_cast<GlobalValue>(Val: V)) {
2111 MovI->addOperand(MF, Op: MachineOperand::CreateGA(
2112 GV, Offset: MovZ->getOperand(i: 1).getOffset(), TargetFlags: Flags));
2113 } else {
2114 MovI->addOperand(
2115 MF, Op: MachineOperand::CreateBA(BA: cast<BlockAddress>(Val: V),
2116 Offset: MovZ->getOperand(i: 1).getOffset(), TargetFlags: Flags));
2117 }
2118 MovI->addOperand(MF, Op: MachineOperand::CreateImm(Val: Offset));
2119 constrainSelectedInstRegOperands(I&: *MovI, TII, TRI, RBI);
2120 return DstReg;
2121 };
2122 Register DstReg = BuildMovK(MovZ.getReg(Idx: 0),
2123 AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0);
2124 DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
2125 BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(i: 0).getReg());
2126}
2127
2128bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
2129 MachineBasicBlock &MBB = *I.getParent();
2130 MachineFunction &MF = *MBB.getParent();
2131 MachineRegisterInfo &MRI = MF.getRegInfo();
2132
2133 switch (I.getOpcode()) {
2134 case TargetOpcode::G_STORE: {
2135 bool Changed = contractCrossBankCopyIntoStore(I, MRI);
2136 MachineOperand &SrcOp = I.getOperand(i: 0);
2137 if (MRI.getType(Reg: SrcOp.getReg()).isPointer()) {
2138 // Allow matching with imported patterns for stores of pointers. Unlike
2139 // G_LOAD/G_PTR_ADD, we may not have selected all users. So, emit a copy
2140 // and constrain.
2141 auto Copy = MIB.buildCopy(Res: LLT::scalar(SizeInBits: 64), Op: SrcOp);
2142 Register NewSrc = Copy.getReg(Idx: 0);
2143 SrcOp.setReg(NewSrc);
2144 RBI.constrainGenericRegister(Reg: NewSrc, RC: AArch64::GPR64RegClass, MRI);
2145 Changed = true;
2146 }
2147 return Changed;
2148 }
2149 case TargetOpcode::G_PTR_ADD: {
2150 // If Checked Pointer Arithmetic (FEAT_CPA) is present, preserve the pointer
2151 // arithmetic semantics instead of falling back to regular arithmetic.
2152 const auto &TL = STI.getTargetLowering();
2153 if (TL->shouldPreservePtrArith(F: MF.getFunction(), PtrVT: EVT()))
2154 return false;
2155 return convertPtrAddToAdd(I, MRI);
2156 }
2157 case TargetOpcode::G_LOAD: {
2158 // For scalar loads of pointers, we try to convert the dest type from p0
2159 // to s64 so that our imported patterns can match. Like with the G_PTR_ADD
2160 // conversion, this should be ok because all users should have been
2161 // selected already, so the type doesn't matter for them.
2162 Register DstReg = I.getOperand(i: 0).getReg();
2163 const LLT DstTy = MRI.getType(Reg: DstReg);
2164 if (!DstTy.isPointer())
2165 return false;
2166 MRI.setType(VReg: DstReg, Ty: LLT::scalar(SizeInBits: 64));
2167 return true;
2168 }
2169 case AArch64::G_DUP: {
2170 // Convert the type from p0 to s64 to help selection.
2171 LLT DstTy = MRI.getType(Reg: I.getOperand(i: 0).getReg());
2172 if (!DstTy.isPointerVector())
2173 return false;
2174 auto NewSrc = MIB.buildCopy(Res: LLT::scalar(SizeInBits: 64), Op: I.getOperand(i: 1).getReg());
2175 MRI.setType(VReg: I.getOperand(i: 0).getReg(),
2176 Ty: DstTy.changeElementType(NewEltTy: LLT::scalar(SizeInBits: 64)));
2177 MRI.setRegClass(Reg: NewSrc.getReg(Idx: 0), RC: &AArch64::GPR64RegClass);
2178 I.getOperand(i: 1).setReg(NewSrc.getReg(Idx: 0));
2179 return true;
2180 }
2181 case AArch64::G_INSERT_VECTOR_ELT: {
2182 // Convert the type from p0 to s64 to help selection.
2183 LLT DstTy = MRI.getType(Reg: I.getOperand(i: 0).getReg());
2184 LLT SrcVecTy = MRI.getType(Reg: I.getOperand(i: 1).getReg());
2185 if (!SrcVecTy.isPointerVector())
2186 return false;
2187 auto NewSrc = MIB.buildCopy(Res: LLT::scalar(SizeInBits: 64), Op: I.getOperand(i: 2).getReg());
2188 MRI.setType(VReg: I.getOperand(i: 1).getReg(),
2189 Ty: DstTy.changeElementType(NewEltTy: LLT::scalar(SizeInBits: 64)));
2190 MRI.setType(VReg: I.getOperand(i: 0).getReg(),
2191 Ty: DstTy.changeElementType(NewEltTy: LLT::scalar(SizeInBits: 64)));
2192 MRI.setRegClass(Reg: NewSrc.getReg(Idx: 0), RC: &AArch64::GPR64RegClass);
2193 I.getOperand(i: 2).setReg(NewSrc.getReg(Idx: 0));
2194 return true;
2195 }
2196 case TargetOpcode::G_UITOFP:
2197 case TargetOpcode::G_SITOFP: {
2198 // If both source and destination regbanks are FPR, then convert the opcode
2199 // to G_SITOF so that the importer can select it to an fpr variant.
2200 // Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank
2201 // copy.
2202 Register SrcReg = I.getOperand(i: 1).getReg();
2203 LLT SrcTy = MRI.getType(Reg: SrcReg);
2204 LLT DstTy = MRI.getType(Reg: I.getOperand(i: 0).getReg());
2205 if (SrcTy.isVector() || SrcTy.getSizeInBits() != DstTy.getSizeInBits())
2206 return false;
2207
2208 if (RBI.getRegBank(Reg: SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) {
2209 if (I.getOpcode() == TargetOpcode::G_SITOFP)
2210 I.setDesc(TII.get(Opcode: AArch64::G_SITOF));
2211 else
2212 I.setDesc(TII.get(Opcode: AArch64::G_UITOF));
2213 return true;
2214 }
2215 return false;
2216 }
2217 default:
2218 return false;
2219 }
2220}
2221
2222/// This lowering tries to look for G_PTR_ADD instructions and then converts
2223/// them to a standard G_ADD with a COPY on the source.
2224///
2225/// The motivation behind this is to expose the add semantics to the imported
2226/// tablegen patterns. We shouldn't need to check for uses being loads/stores,
2227/// because the selector works bottom up, uses before defs. By the time we
2228/// end up trying to select a G_PTR_ADD, we should have already attempted to
2229/// fold this into addressing modes and were therefore unsuccessful.
2230bool AArch64InstructionSelector::convertPtrAddToAdd(
2231 MachineInstr &I, MachineRegisterInfo &MRI) {
2232 assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
2233 Register DstReg = I.getOperand(i: 0).getReg();
2234 Register AddOp1Reg = I.getOperand(i: 1).getReg();
2235 const LLT PtrTy = MRI.getType(Reg: DstReg);
2236 if (PtrTy.getAddressSpace() != 0)
2237 return false;
2238
2239 const LLT CastPtrTy =
2240 PtrTy.isVector() ? LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 64) : LLT::scalar(SizeInBits: 64);
2241 auto PtrToInt = MIB.buildPtrToInt(Dst: CastPtrTy, Src: AddOp1Reg);
2242 // Set regbanks on the registers.
2243 if (PtrTy.isVector())
2244 MRI.setRegBank(Reg: PtrToInt.getReg(Idx: 0), RegBank: RBI.getRegBank(ID: AArch64::FPRRegBankID));
2245 else
2246 MRI.setRegBank(Reg: PtrToInt.getReg(Idx: 0), RegBank: RBI.getRegBank(ID: AArch64::GPRRegBankID));
2247
2248 // Now turn the %dst(p0) = G_PTR_ADD %base, off into:
2249 // %dst(intty) = G_ADD %intbase, off
2250 I.setDesc(TII.get(Opcode: TargetOpcode::G_ADD));
2251 MRI.setType(VReg: DstReg, Ty: CastPtrTy);
2252 I.getOperand(i: 1).setReg(PtrToInt.getReg(Idx: 0));
2253 if (!select(I&: *PtrToInt)) {
2254 LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd");
2255 return false;
2256 }
2257
2258 // Also take the opportunity here to try to do some optimization.
2259 // Try to convert this into a G_SUB if the offset is a 0-x negate idiom.
2260 Register NegatedReg;
2261 if (!mi_match(R: I.getOperand(i: 2).getReg(), MRI, P: m_Neg(Src: m_Reg(R&: NegatedReg))))
2262 return true;
2263 I.getOperand(i: 2).setReg(NegatedReg);
2264 I.setDesc(TII.get(Opcode: TargetOpcode::G_SUB));
2265 return true;
2266}
2267
2268bool AArch64InstructionSelector::earlySelectSHL(MachineInstr &I,
2269 MachineRegisterInfo &MRI) {
2270 // We try to match the immediate variant of LSL, which is actually an alias
2271 // for a special case of UBFM. Otherwise, we fall back to the imported
2272 // selector which will match the register variant.
2273 assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op");
2274 const auto &MO = I.getOperand(i: 2);
2275 auto VRegAndVal = getIConstantVRegVal(VReg: MO.getReg(), MRI);
2276 if (!VRegAndVal)
2277 return false;
2278
2279 const LLT DstTy = MRI.getType(Reg: I.getOperand(i: 0).getReg());
2280 if (DstTy.isVector())
2281 return false;
2282 bool Is64Bit = DstTy.getSizeInBits() == 64;
2283 auto Imm1Fn = Is64Bit ? selectShiftA_64(Root: MO) : selectShiftA_32(Root: MO);
2284 auto Imm2Fn = Is64Bit ? selectShiftB_64(Root: MO) : selectShiftB_32(Root: MO);
2285
2286 if (!Imm1Fn || !Imm2Fn)
2287 return false;
2288
2289 auto NewI =
2290 MIB.buildInstr(Opc: Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
2291 DstOps: {I.getOperand(i: 0).getReg()}, SrcOps: {I.getOperand(i: 1).getReg()});
2292
2293 for (auto &RenderFn : *Imm1Fn)
2294 RenderFn(NewI);
2295 for (auto &RenderFn : *Imm2Fn)
2296 RenderFn(NewI);
2297
2298 I.eraseFromParent();
2299 constrainSelectedInstRegOperands(I&: *NewI, TII, TRI, RBI);
2300 return true;
2301}
2302
2303bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
2304 MachineInstr &I, MachineRegisterInfo &MRI) {
2305 assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE");
2306 // If we're storing a scalar, it doesn't matter what register bank that
2307 // scalar is on. All that matters is the size.
2308 //
2309 // So, if we see something like this (with a 32-bit scalar as an example):
2310 //
2311 // %x:gpr(s32) = ... something ...
2312 // %y:fpr(s32) = COPY %x:gpr(s32)
2313 // G_STORE %y:fpr(s32)
2314 //
2315 // We can fix this up into something like this:
2316 //
2317 // G_STORE %x:gpr(s32)
2318 //
2319 // And then continue the selection process normally.
2320 Register DefDstReg = getSrcRegIgnoringCopies(Reg: I.getOperand(i: 0).getReg(), MRI);
2321 if (!DefDstReg.isValid())
2322 return false;
2323 LLT DefDstTy = MRI.getType(Reg: DefDstReg);
2324 Register StoreSrcReg = I.getOperand(i: 0).getReg();
2325 LLT StoreSrcTy = MRI.getType(Reg: StoreSrcReg);
2326
2327 // If we get something strange like a physical register, then we shouldn't
2328 // go any further.
2329 if (!DefDstTy.isValid())
2330 return false;
2331
2332 // Are the source and dst types the same size?
2333 if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
2334 return false;
2335
2336 if (RBI.getRegBank(Reg: StoreSrcReg, MRI, TRI) ==
2337 RBI.getRegBank(Reg: DefDstReg, MRI, TRI))
2338 return false;
2339
2340 // We have a cross-bank copy, which is entering a store. Let's fold it.
2341 I.getOperand(i: 0).setReg(DefDstReg);
2342 return true;
2343}
2344
2345bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
2346 assert(I.getParent() && "Instruction should be in a basic block!");
2347 assert(I.getParent()->getParent() && "Instruction should be in a function!");
2348
2349 MachineBasicBlock &MBB = *I.getParent();
2350 MachineFunction &MF = *MBB.getParent();
2351 MachineRegisterInfo &MRI = MF.getRegInfo();
2352
2353 switch (I.getOpcode()) {
2354 case AArch64::G_DUP: {
2355 // Before selecting a DUP instruction, check if it is better selected as a
2356 // MOV or load from a constant pool.
2357 Register Src = I.getOperand(i: 1).getReg();
2358 auto ValAndVReg = getAnyConstantVRegValWithLookThrough(VReg: Src, MRI);
2359 if (!ValAndVReg)
2360 return false;
2361 LLVMContext &Ctx = MF.getFunction().getContext();
2362 Register Dst = I.getOperand(i: 0).getReg();
2363 auto *CV = ConstantDataVector::getSplat(
2364 NumElts: MRI.getType(Reg: Dst).getNumElements(),
2365 Elt: ConstantInt::get(
2366 Ty: Type::getIntNTy(C&: Ctx, N: MRI.getType(Reg: Dst).getScalarSizeInBits()),
2367 V: ValAndVReg->Value.trunc(width: MRI.getType(Reg: Dst).getScalarSizeInBits())));
2368 if (!emitConstantVector(Dst, CV, MIRBuilder&: MIB, MRI))
2369 return false;
2370 I.eraseFromParent();
2371 return true;
2372 }
2373 case TargetOpcode::G_SEXT:
2374 // Check for i64 sext(i32 vector_extract) prior to tablegen to select SMOV
2375 // over a normal extend.
2376 if (selectUSMovFromExtend(I, MRI))
2377 return true;
2378 return false;
2379 case TargetOpcode::G_BR:
2380 return false;
2381 case TargetOpcode::G_SHL:
2382 return earlySelectSHL(I, MRI);
2383 case TargetOpcode::G_CONSTANT: {
2384 bool IsZero = false;
2385 if (I.getOperand(i: 1).isCImm())
2386 IsZero = I.getOperand(i: 1).getCImm()->isZero();
2387 else if (I.getOperand(i: 1).isImm())
2388 IsZero = I.getOperand(i: 1).getImm() == 0;
2389
2390 if (!IsZero)
2391 return false;
2392
2393 Register DefReg = I.getOperand(i: 0).getReg();
2394 LLT Ty = MRI.getType(Reg: DefReg);
2395 if (Ty.getSizeInBits() == 64) {
2396 I.getOperand(i: 1).ChangeToRegister(Reg: AArch64::XZR, isDef: false);
2397 RBI.constrainGenericRegister(Reg: DefReg, RC: AArch64::GPR64RegClass, MRI);
2398 } else if (Ty.getSizeInBits() <= 32) {
2399 I.getOperand(i: 1).ChangeToRegister(Reg: AArch64::WZR, isDef: false);
2400 RBI.constrainGenericRegister(Reg: DefReg, RC: AArch64::GPR32RegClass, MRI);
2401 } else
2402 return false;
2403
2404 I.setDesc(TII.get(Opcode: TargetOpcode::COPY));
2405 return true;
2406 }
2407
2408 case TargetOpcode::G_ADD: {
2409 // Check if this is being fed by a G_ICMP on either side.
2410 //
2411 // (cmp pred, x, y) + z
2412 //
2413 // In the above case, when the cmp is true, we increment z by 1. So, we can
2414 // fold the add into the cset for the cmp by using cinc.
2415 //
2416 // FIXME: This would probably be a lot nicer in PostLegalizerLowering.
2417 Register AddDst = I.getOperand(i: 0).getReg();
2418 Register AddLHS = I.getOperand(i: 1).getReg();
2419 Register AddRHS = I.getOperand(i: 2).getReg();
2420 // Only handle scalars.
2421 LLT Ty = MRI.getType(Reg: AddLHS);
2422 if (Ty.isVector())
2423 return false;
2424 // Since G_ICMP is modeled as ADDS/SUBS/ANDS, we can handle 32 bits or 64
2425 // bits.
2426 unsigned Size = Ty.getSizeInBits();
2427 if (Size != 32 && Size != 64)
2428 return false;
2429 auto MatchCmp = [&](Register Reg) -> MachineInstr * {
2430 if (!MRI.hasOneNonDBGUse(RegNo: Reg))
2431 return nullptr;
2432 // If the LHS of the add is 32 bits, then we want to fold a 32-bit
2433 // compare.
2434 if (Size == 32)
2435 return getOpcodeDef(Opcode: TargetOpcode::G_ICMP, Reg, MRI);
2436 // We model scalar compares using 32-bit destinations right now.
2437 // If it's a 64-bit compare, it'll have 64-bit sources.
2438 Register ZExt;
2439 if (!mi_match(R: Reg, MRI,
2440 P: m_OneNonDBGUse(SP: m_GZExt(Src: m_OneNonDBGUse(SP: m_Reg(R&: ZExt))))))
2441 return nullptr;
2442 auto *Cmp = getOpcodeDef(Opcode: TargetOpcode::G_ICMP, Reg: ZExt, MRI);
2443 if (!Cmp ||
2444 MRI.getType(Reg: Cmp->getOperand(i: 2).getReg()).getSizeInBits() != 64)
2445 return nullptr;
2446 return Cmp;
2447 };
2448 // Try to match
2449 // z + (cmp pred, x, y)
2450 MachineInstr *Cmp = MatchCmp(AddRHS);
2451 if (!Cmp) {
2452 // (cmp pred, x, y) + z
2453 std::swap(a&: AddLHS, b&: AddRHS);
2454 Cmp = MatchCmp(AddRHS);
2455 if (!Cmp)
2456 return false;
2457 }
2458 auto &PredOp = Cmp->getOperand(i: 1);
2459 MIB.setInstrAndDebugLoc(I);
2460 emitIntegerCompare(/*LHS=*/Cmp->getOperand(i: 2),
2461 /*RHS=*/Cmp->getOperand(i: 3), Predicate&: PredOp, MIRBuilder&: MIB);
2462 auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());
2463 const AArch64CC::CondCode InvCC = changeICMPPredToAArch64CC(
2464 P: CmpInst::getInversePredicate(pred: Pred), RHS: Cmp->getOperand(i: 3).getReg(), MRI: &MRI);
2465 emitCSINC(/*Dst=*/AddDst, /*Src =*/Src1: AddLHS, /*Src2=*/AddLHS, Pred: InvCC, MIRBuilder&: MIB);
2466 I.eraseFromParent();
2467 return true;
2468 }
2469 case TargetOpcode::G_OR: {
2470 // Look for operations that take the lower `Width=Size-ShiftImm` bits of
2471 // `ShiftSrc` and insert them into the upper `Width` bits of `MaskSrc` via
2472 // shifting and masking that we can replace with a BFI (encoded as a BFM).
2473 Register Dst = I.getOperand(i: 0).getReg();
2474 LLT Ty = MRI.getType(Reg: Dst);
2475
2476 if (!Ty.isScalar())
2477 return false;
2478
2479 unsigned Size = Ty.getSizeInBits();
2480 if (Size != 32 && Size != 64)
2481 return false;
2482
2483 Register ShiftSrc;
2484 int64_t ShiftImm;
2485 Register MaskSrc;
2486 int64_t MaskImm;
2487 if (!mi_match(
2488 R: Dst, MRI,
2489 P: m_GOr(L: m_OneNonDBGUse(SP: m_GShl(L: m_Reg(R&: ShiftSrc), R: m_ICst(Cst&: ShiftImm))),
2490 R: m_OneNonDBGUse(SP: m_GAnd(L: m_Reg(R&: MaskSrc), R: m_ICst(Cst&: MaskImm))))))
2491 return false;
2492
2493 if (ShiftImm > Size || ((1ULL << ShiftImm) - 1ULL) != uint64_t(MaskImm))
2494 return false;
2495
2496 int64_t Immr = Size - ShiftImm;
2497 int64_t Imms = Size - ShiftImm - 1;
2498 unsigned Opc = Size == 32 ? AArch64::BFMWri : AArch64::BFMXri;
2499 emitInstr(Opcode: Opc, DstOps: {Dst}, SrcOps: {MaskSrc, ShiftSrc, Immr, Imms}, MIRBuilder&: MIB);
2500 I.eraseFromParent();
2501 return true;
2502 }
2503 case TargetOpcode::G_FENCE: {
2504 if (I.getOperand(i: 1).getImm() == 0)
2505 BuildMI(BB&: MBB, I, MIMD: MIMetadata(I), MCID: TII.get(Opcode: TargetOpcode::MEMBARRIER));
2506 else
2507 BuildMI(BB&: MBB, I, MIMD: MIMetadata(I), MCID: TII.get(Opcode: AArch64::DMB))
2508 .addImm(Val: I.getOperand(i: 0).getImm() == 4 ? 0x9 : 0xb);
2509 I.eraseFromParent();
2510 return true;
2511 }
2512 default:
2513 return false;
2514 }
2515}
2516
2517bool AArch64InstructionSelector::select(MachineInstr &I) {
2518 assert(I.getParent() && "Instruction should be in a basic block!");
2519 assert(I.getParent()->getParent() && "Instruction should be in a function!");
2520
2521 MachineBasicBlock &MBB = *I.getParent();
2522 MachineFunction &MF = *MBB.getParent();
2523 MachineRegisterInfo &MRI = MF.getRegInfo();
2524
2525 const AArch64Subtarget *Subtarget = &MF.getSubtarget<AArch64Subtarget>();
2526 if (Subtarget->requiresStrictAlign()) {
2527 // We don't support this feature yet.
2528 LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n");
2529 return false;
2530 }
2531
2532 MIB.setInstrAndDebugLoc(I);
2533
2534 unsigned Opcode = I.getOpcode();
2535 // G_PHI requires same handling as PHI
2536 if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
2537 // Certain non-generic instructions also need some special handling.
2538
2539 if (Opcode == TargetOpcode::LOAD_STACK_GUARD) {
2540 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2541 return true;
2542 }
2543
2544 if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
2545 const Register DefReg = I.getOperand(i: 0).getReg();
2546 const LLT DefTy = MRI.getType(Reg: DefReg);
2547
2548 const RegClassOrRegBank &RegClassOrBank =
2549 MRI.getRegClassOrRegBank(Reg: DefReg);
2550
2551 const TargetRegisterClass *DefRC =
2552 dyn_cast<const TargetRegisterClass *>(Val: RegClassOrBank);
2553 if (!DefRC) {
2554 if (!DefTy.isValid()) {
2555 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
2556 return false;
2557 }
2558 const RegisterBank &RB = *cast<const RegisterBank *>(Val: RegClassOrBank);
2559 DefRC = getRegClassForTypeOnBank(Ty: DefTy, RB);
2560 if (!DefRC) {
2561 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
2562 return false;
2563 }
2564 }
2565
2566 I.setDesc(TII.get(Opcode: TargetOpcode::PHI));
2567
2568 return RBI.constrainGenericRegister(Reg: DefReg, RC: *DefRC, MRI);
2569 }
2570
2571 if (I.isCopy())
2572 return selectCopy(I, TII, MRI, TRI, RBI);
2573
2574 if (I.isDebugInstr())
2575 return selectDebugInstr(I, MRI, RBI);
2576
2577 return true;
2578 }
2579
2580
2581 if (I.getNumOperands() != I.getNumExplicitOperands()) {
2582 LLVM_DEBUG(
2583 dbgs() << "Generic instruction has unexpected implicit operands\n");
2584 return false;
2585 }
2586
2587 // Try to do some lowering before we start instruction selecting. These
2588 // lowerings are purely transformations on the input G_MIR and so selection
2589 // must continue after any modification of the instruction.
2590 if (preISelLower(I)) {
2591 Opcode = I.getOpcode(); // The opcode may have been modified, refresh it.
2592 }
2593
2594 // There may be patterns where the importer can't deal with them optimally,
2595 // but does select it to a suboptimal sequence so our custom C++ selection
2596 // code later never has a chance to work on it. Therefore, we have an early
2597 // selection attempt here to give priority to certain selection routines
2598 // over the imported ones.
2599 if (earlySelect(I))
2600 return true;
2601
2602 if (selectImpl(I, CoverageInfo&: *CoverageInfo))
2603 return true;
2604
2605 LLT Ty =
2606 I.getOperand(i: 0).isReg() ? MRI.getType(Reg: I.getOperand(i: 0).getReg()) : LLT{};
2607
2608 switch (Opcode) {
2609 case TargetOpcode::G_SBFX:
2610 case TargetOpcode::G_UBFX: {
2611 static const unsigned OpcTable[2][2] = {
2612 {AArch64::UBFMWri, AArch64::UBFMXri},
2613 {AArch64::SBFMWri, AArch64::SBFMXri}};
2614 bool IsSigned = Opcode == TargetOpcode::G_SBFX;
2615 unsigned Size = Ty.getSizeInBits();
2616 unsigned Opc = OpcTable[IsSigned][Size == 64];
2617 auto Cst1 =
2618 getIConstantVRegValWithLookThrough(VReg: I.getOperand(i: 2).getReg(), MRI);
2619 assert(Cst1 && "Should have gotten a constant for src 1?");
2620 auto Cst2 =
2621 getIConstantVRegValWithLookThrough(VReg: I.getOperand(i: 3).getReg(), MRI);
2622 assert(Cst2 && "Should have gotten a constant for src 2?");
2623 auto LSB = Cst1->Value.getZExtValue();
2624 auto Width = Cst2->Value.getZExtValue();
2625 auto BitfieldInst =
2626 MIB.buildInstr(Opc, DstOps: {I.getOperand(i: 0)}, SrcOps: {I.getOperand(i: 1)})
2627 .addImm(Val: LSB)
2628 .addImm(Val: LSB + Width - 1);
2629 I.eraseFromParent();
2630 constrainSelectedInstRegOperands(I&: *BitfieldInst, TII, TRI, RBI);
2631 return true;
2632 }
2633 case TargetOpcode::G_BRCOND:
2634 return selectCompareBranch(I, MF, MRI);
2635
2636 case TargetOpcode::G_BRINDIRECT: {
2637 const Function &Fn = MF.getFunction();
2638 if (std::optional<uint16_t> BADisc =
2639 STI.getPtrAuthBlockAddressDiscriminatorIfEnabled(ParentFn: Fn)) {
2640 auto MI = MIB.buildInstr(Opc: AArch64::BRA, DstOps: {}, SrcOps: {I.getOperand(i: 0).getReg()});
2641 MI.addImm(Val: AArch64PACKey::IA);
2642 MI.addImm(Val: *BADisc);
2643 MI.addReg(/*AddrDisc=*/RegNo: AArch64::XZR);
2644 I.eraseFromParent();
2645 constrainSelectedInstRegOperands(I&: *MI, TII, TRI, RBI);
2646 return true;
2647 }
2648 I.setDesc(TII.get(Opcode: AArch64::BR));
2649 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2650 return true;
2651 }
2652
2653 case TargetOpcode::G_BRJT:
2654 return selectBrJT(I, MRI);
2655
2656 case AArch64::G_ADD_LOW: {
2657 // This op may have been separated from it's ADRP companion by the localizer
2658 // or some other code motion pass. Given that many CPUs will try to
2659 // macro fuse these operations anyway, select this into a MOVaddr pseudo
2660 // which will later be expanded into an ADRP+ADD pair after scheduling.
2661 MachineInstr *BaseMI = MRI.getVRegDef(Reg: I.getOperand(i: 1).getReg());
2662 if (BaseMI->getOpcode() != AArch64::ADRP) {
2663 I.setDesc(TII.get(Opcode: AArch64::ADDXri));
2664 I.addOperand(Op: MachineOperand::CreateImm(Val: 0));
2665 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2666 return true;
2667 }
2668 assert(TM.getCodeModel() == CodeModel::Small &&
2669 "Expected small code model");
2670 auto Op1 = BaseMI->getOperand(i: 1);
2671 auto Op2 = I.getOperand(i: 2);
2672 auto MovAddr = MIB.buildInstr(Opc: AArch64::MOVaddr, DstOps: {I.getOperand(i: 0)}, SrcOps: {})
2673 .addGlobalAddress(GV: Op1.getGlobal(), Offset: Op1.getOffset(),
2674 TargetFlags: Op1.getTargetFlags())
2675 .addGlobalAddress(GV: Op2.getGlobal(), Offset: Op2.getOffset(),
2676 TargetFlags: Op2.getTargetFlags());
2677 I.eraseFromParent();
2678 constrainSelectedInstRegOperands(I&: *MovAddr, TII, TRI, RBI);
2679 return true;
2680 }
2681
2682 case TargetOpcode::G_FCONSTANT:
2683 case TargetOpcode::G_CONSTANT: {
2684 const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
2685
2686 const LLT s8 = LLT::scalar(SizeInBits: 8);
2687 const LLT s16 = LLT::scalar(SizeInBits: 16);
2688 const LLT s32 = LLT::scalar(SizeInBits: 32);
2689 const LLT s64 = LLT::scalar(SizeInBits: 64);
2690 const LLT s128 = LLT::scalar(SizeInBits: 128);
2691 const LLT p0 = LLT::pointer(AddressSpace: 0, SizeInBits: 64);
2692
2693 const Register DefReg = I.getOperand(i: 0).getReg();
2694 const LLT DefTy = MRI.getType(Reg: DefReg);
2695 const unsigned DefSize = DefTy.getSizeInBits();
2696 const RegisterBank &RB = *RBI.getRegBank(Reg: DefReg, MRI, TRI);
2697
2698 // FIXME: Redundant check, but even less readable when factored out.
2699 if (isFP) {
2700 if (Ty != s16 && Ty != s32 && Ty != s64 && Ty != s128) {
2701 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
2702 << " constant, expected: " << s16 << " or " << s32
2703 << " or " << s64 << " or " << s128 << '\n');
2704 return false;
2705 }
2706
2707 if (RB.getID() != AArch64::FPRRegBankID) {
2708 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
2709 << " constant on bank: " << RB
2710 << ", expected: FPR\n");
2711 return false;
2712 }
2713
2714 // The case when we have 0.0 is covered by tablegen. Reject it here so we
2715 // can be sure tablegen works correctly and isn't rescued by this code.
2716 // 0.0 is not covered by tablegen for FP128. So we will handle this
2717 // scenario in the code here.
2718 if (DefSize != 128 && I.getOperand(i: 1).getFPImm()->isExactlyValue(V: 0.0))
2719 return false;
2720 } else {
2721 // s32 and s64 are covered by tablegen.
2722 if (Ty != p0 && Ty != s8 && Ty != s16) {
2723 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
2724 << " constant, expected: " << s32 << ", " << s64
2725 << ", or " << p0 << '\n');
2726 return false;
2727 }
2728
2729 if (RB.getID() != AArch64::GPRRegBankID) {
2730 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
2731 << " constant on bank: " << RB
2732 << ", expected: GPR\n");
2733 return false;
2734 }
2735 }
2736
2737 if (isFP) {
2738 const TargetRegisterClass &FPRRC = *getRegClassForTypeOnBank(Ty: DefTy, RB);
2739 // For 16, 64, and 128b values, emit a constant pool load.
2740 switch (DefSize) {
2741 default:
2742 llvm_unreachable("Unexpected destination size for G_FCONSTANT?");
2743 case 32:
2744 case 64: {
2745 bool OptForSize = shouldOptForSize(MF: &MF);
2746 const auto &TLI = MF.getSubtarget().getTargetLowering();
2747 // If TLI says that this fpimm is illegal, then we'll expand to a
2748 // constant pool load.
2749 if (TLI->isFPImmLegal(I.getOperand(i: 1).getFPImm()->getValueAPF(),
2750 EVT::getFloatingPointVT(BitWidth: DefSize), ForCodeSize: OptForSize))
2751 break;
2752 [[fallthrough]];
2753 }
2754 case 16:
2755 case 128: {
2756 auto *FPImm = I.getOperand(i: 1).getFPImm();
2757 auto *LoadMI = emitLoadFromConstantPool(CPVal: FPImm, MIRBuilder&: MIB);
2758 if (!LoadMI) {
2759 LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n");
2760 return false;
2761 }
2762 MIB.buildCopy(Res: {DefReg}, Op: {LoadMI->getOperand(i: 0).getReg()});
2763 I.eraseFromParent();
2764 return RBI.constrainGenericRegister(Reg: DefReg, RC: FPRRC, MRI);
2765 }
2766 }
2767
2768 assert((DefSize == 32 || DefSize == 64) && "Unexpected const def size");
2769 // Either emit a FMOV, or emit a copy to emit a normal mov.
2770 const Register DefGPRReg = MRI.createVirtualRegister(
2771 RegClass: DefSize == 32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2772 MachineOperand &RegOp = I.getOperand(i: 0);
2773 RegOp.setReg(DefGPRReg);
2774 MIB.setInsertPt(MBB&: MIB.getMBB(), II: std::next(x: I.getIterator()));
2775 MIB.buildCopy(Res: {DefReg}, Op: {DefGPRReg});
2776
2777 if (!RBI.constrainGenericRegister(Reg: DefReg, RC: FPRRC, MRI)) {
2778 LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
2779 return false;
2780 }
2781
2782 MachineOperand &ImmOp = I.getOperand(i: 1);
2783 // FIXME: Is going through int64_t always correct?
2784 ImmOp.ChangeToImmediate(
2785 ImmVal: ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
2786 } else if (I.getOperand(i: 1).isCImm()) {
2787 uint64_t Val = I.getOperand(i: 1).getCImm()->getZExtValue();
2788 I.getOperand(i: 1).ChangeToImmediate(ImmVal: Val);
2789 } else if (I.getOperand(i: 1).isImm()) {
2790 uint64_t Val = I.getOperand(i: 1).getImm();
2791 I.getOperand(i: 1).ChangeToImmediate(ImmVal: Val);
2792 }
2793
2794 const unsigned MovOpc =
2795 DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
2796 I.setDesc(TII.get(Opcode: MovOpc));
2797 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2798 return true;
2799 }
2800 case TargetOpcode::G_EXTRACT: {
2801 Register DstReg = I.getOperand(i: 0).getReg();
2802 Register SrcReg = I.getOperand(i: 1).getReg();
2803 LLT SrcTy = MRI.getType(Reg: SrcReg);
2804 LLT DstTy = MRI.getType(Reg: DstReg);
2805 (void)DstTy;
2806 unsigned SrcSize = SrcTy.getSizeInBits();
2807
2808 if (SrcTy.getSizeInBits() > 64) {
2809 // This should be an extract of an s128, which is like a vector extract.
2810 if (SrcTy.getSizeInBits() != 128)
2811 return false;
2812 // Only support extracting 64 bits from an s128 at the moment.
2813 if (DstTy.getSizeInBits() != 64)
2814 return false;
2815
2816 unsigned Offset = I.getOperand(i: 2).getImm();
2817 if (Offset % 64 != 0)
2818 return false;
2819
2820 // Check we have the right regbank always.
2821 const RegisterBank &SrcRB = *RBI.getRegBank(Reg: SrcReg, MRI, TRI);
2822 const RegisterBank &DstRB = *RBI.getRegBank(Reg: DstReg, MRI, TRI);
2823 assert(SrcRB.getID() == DstRB.getID() && "Wrong extract regbank!");
2824
2825 if (SrcRB.getID() == AArch64::GPRRegBankID) {
2826 auto NewI =
2827 MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {DstReg}, SrcOps: {})
2828 .addUse(RegNo: SrcReg, Flags: {},
2829 SubReg: Offset == 0 ? AArch64::sube64 : AArch64::subo64);
2830 constrainOperandRegClass(MF, TRI, MRI, TII, RBI, InsertPt&: *NewI,
2831 RegClass: AArch64::GPR64RegClass, RegMO&: NewI->getOperand(i: 0));
2832 I.eraseFromParent();
2833 return true;
2834 }
2835
2836 // Emit the same code as a vector extract.
2837 // Offset must be a multiple of 64.
2838 unsigned LaneIdx = Offset / 64;
2839 MachineInstr *Extract = emitExtractVectorElt(
2840 DstReg, DstRB, ScalarTy: LLT::scalar(SizeInBits: 64), VecReg: SrcReg, LaneIdx, MIRBuilder&: MIB);
2841 if (!Extract)
2842 return false;
2843 I.eraseFromParent();
2844 return true;
2845 }
2846
2847 I.setDesc(TII.get(Opcode: SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
2848 MachineInstrBuilder(MF, I).addImm(Val: I.getOperand(i: 2).getImm() +
2849 Ty.getSizeInBits() - 1);
2850
2851 if (SrcSize < 64) {
2852 assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
2853 "unexpected G_EXTRACT types");
2854 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2855 return true;
2856 }
2857
2858 DstReg = MRI.createGenericVirtualRegister(Ty: LLT::scalar(SizeInBits: 64));
2859 MIB.setInsertPt(MBB&: MIB.getMBB(), II: std::next(x: I.getIterator()));
2860 MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {I.getOperand(i: 0).getReg()}, SrcOps: {})
2861 .addReg(RegNo: DstReg, Flags: {}, SubReg: AArch64::sub_32);
2862 RBI.constrainGenericRegister(Reg: I.getOperand(i: 0).getReg(),
2863 RC: AArch64::GPR32RegClass, MRI);
2864 I.getOperand(i: 0).setReg(DstReg);
2865
2866 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2867 return true;
2868 }
2869
2870 case TargetOpcode::G_INSERT: {
2871 LLT SrcTy = MRI.getType(Reg: I.getOperand(i: 2).getReg());
2872 LLT DstTy = MRI.getType(Reg: I.getOperand(i: 0).getReg());
2873 unsigned DstSize = DstTy.getSizeInBits();
2874 // Larger inserts are vectors, same-size ones should be something else by
2875 // now (split up or turned into COPYs).
2876 if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
2877 return false;
2878
2879 I.setDesc(TII.get(Opcode: DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
2880 unsigned LSB = I.getOperand(i: 3).getImm();
2881 unsigned Width = MRI.getType(Reg: I.getOperand(i: 2).getReg()).getSizeInBits();
2882 I.getOperand(i: 3).setImm((DstSize - LSB) % DstSize);
2883 MachineInstrBuilder(MF, I).addImm(Val: Width - 1);
2884
2885 if (DstSize < 64) {
2886 assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
2887 "unexpected G_INSERT types");
2888 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2889 return true;
2890 }
2891
2892 Register SrcReg = MRI.createGenericVirtualRegister(Ty: LLT::scalar(SizeInBits: 64));
2893 BuildMI(BB&: MBB, I: I.getIterator(), MIMD: I.getDebugLoc(),
2894 MCID: TII.get(Opcode: AArch64::SUBREG_TO_REG))
2895 .addDef(RegNo: SrcReg)
2896 .addUse(RegNo: I.getOperand(i: 2).getReg())
2897 .addImm(Val: AArch64::sub_32);
2898 RBI.constrainGenericRegister(Reg: I.getOperand(i: 2).getReg(),
2899 RC: AArch64::GPR32RegClass, MRI);
2900 I.getOperand(i: 2).setReg(SrcReg);
2901
2902 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2903 return true;
2904 }
2905 case TargetOpcode::G_FRAME_INDEX: {
2906 // allocas and G_FRAME_INDEX are only supported in addrspace(0).
2907 if (Ty != LLT::pointer(AddressSpace: 0, SizeInBits: 64)) {
2908 LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty
2909 << ", expected: " << LLT::pointer(0, 64) << '\n');
2910 return false;
2911 }
2912 I.setDesc(TII.get(Opcode: AArch64::ADDXri));
2913
2914 // MOs for a #0 shifted immediate.
2915 I.addOperand(Op: MachineOperand::CreateImm(Val: 0));
2916 I.addOperand(Op: MachineOperand::CreateImm(Val: 0));
2917
2918 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2919 return true;
2920 }
2921
2922 case TargetOpcode::G_GLOBAL_VALUE: {
2923 const GlobalValue *GV = nullptr;
2924 unsigned OpFlags;
2925 if (I.getOperand(i: 1).isSymbol()) {
2926 OpFlags = I.getOperand(i: 1).getTargetFlags();
2927 // Currently only used by "RtLibUseGOT".
2928 assert(OpFlags == AArch64II::MO_GOT);
2929 } else {
2930 GV = I.getOperand(i: 1).getGlobal();
2931 if (GV->isThreadLocal()) {
2932 // We don't support instructions with emulated TLS variables yet
2933 if (TM.useEmulatedTLS())
2934 return false;
2935 return selectTLSGlobalValue(I, MRI);
2936 }
2937 OpFlags = STI.ClassifyGlobalReference(GV, TM);
2938 }
2939
2940 if (OpFlags & AArch64II::MO_GOT) {
2941 bool IsGOTSigned = MF.getInfo<AArch64FunctionInfo>()->hasELFSignedGOT();
2942 I.setDesc(TII.get(Opcode: IsGOTSigned ? AArch64::LOADgotAUTH : AArch64::LOADgot));
2943 I.getOperand(i: 1).setTargetFlags(OpFlags);
2944 I.addImplicitDefUseOperands(MF);
2945 } else if (TM.getCodeModel() == CodeModel::Large &&
2946 !TM.isPositionIndependent()) {
2947 // Materialize the global using movz/movk instructions.
2948 materializeLargeCMVal(I, V: GV, OpFlags);
2949 I.eraseFromParent();
2950 return true;
2951 } else if (TM.getCodeModel() == CodeModel::Tiny) {
2952 I.setDesc(TII.get(Opcode: AArch64::ADR));
2953 I.getOperand(i: 1).setTargetFlags(OpFlags);
2954 } else {
2955 I.setDesc(TII.get(Opcode: AArch64::MOVaddr));
2956 I.getOperand(i: 1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
2957 MachineInstrBuilder MIB(MF, I);
2958 MIB.addGlobalAddress(GV, Offset: I.getOperand(i: 1).getOffset(),
2959 TargetFlags: OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
2960 }
2961 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2962 return true;
2963 }
2964
2965 case TargetOpcode::G_PTRAUTH_GLOBAL_VALUE:
2966 return selectPtrAuthGlobalValue(I, MRI);
2967
2968 case TargetOpcode::G_ZEXTLOAD:
2969 case TargetOpcode::G_LOAD:
2970 case TargetOpcode::G_STORE: {
2971 GLoadStore &LdSt = cast<GLoadStore>(Val&: I);
2972 bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
2973 LLT PtrTy = MRI.getType(Reg: LdSt.getPointerReg());
2974
2975 // Can only handle AddressSpace 0, 64-bit pointers.
2976 if (PtrTy != LLT::pointer(AddressSpace: 0, SizeInBits: 64)) {
2977 return false;
2978 }
2979
2980 uint64_t MemSizeInBytes = LdSt.getMemSize().getValue();
2981 unsigned MemSizeInBits = LdSt.getMemSizeInBits().getValue();
2982 AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering();
2983
2984 // Need special instructions for atomics that affect ordering.
2985 if (isStrongerThanMonotonic(AO: Order)) {
2986 assert(!isa<GZExtLoad>(LdSt));
2987 assert(MemSizeInBytes <= 8 &&
2988 "128-bit atomics should already be custom-legalized");
2989
2990 if (isa<GLoad>(Val: LdSt)) {
2991 static constexpr unsigned LDAPROpcodes[] = {
2992 AArch64::LDAPRB, AArch64::LDAPRH, AArch64::LDAPRW, AArch64::LDAPRX};
2993 static constexpr unsigned LDAROpcodes[] = {
2994 AArch64::LDARB, AArch64::LDARH, AArch64::LDARW, AArch64::LDARX};
2995 ArrayRef<unsigned> Opcodes =
2996 STI.hasRCPC() && Order != AtomicOrdering::SequentiallyConsistent
2997 ? LDAPROpcodes
2998 : LDAROpcodes;
2999 I.setDesc(TII.get(Opcode: Opcodes[Log2_32(Value: MemSizeInBytes)]));
3000 } else {
3001 static constexpr unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH,
3002 AArch64::STLRW, AArch64::STLRX};
3003 Register ValReg = LdSt.getReg(Idx: 0);
3004 if (MRI.getType(Reg: ValReg).getSizeInBits() == 64 && MemSizeInBits != 64) {
3005 // Emit a subreg copy of 32 bits.
3006 Register NewVal = MRI.createVirtualRegister(RegClass: &AArch64::GPR32RegClass);
3007 MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {NewVal}, SrcOps: {})
3008 .addReg(RegNo: I.getOperand(i: 0).getReg(), Flags: {}, SubReg: AArch64::sub_32);
3009 I.getOperand(i: 0).setReg(NewVal);
3010 }
3011 I.setDesc(TII.get(Opcode: Opcodes[Log2_32(Value: MemSizeInBytes)]));
3012 }
3013 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3014 return true;
3015 }
3016
3017#ifndef NDEBUG
3018 const Register PtrReg = LdSt.getPointerReg();
3019 const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
3020 // Check that the pointer register is valid.
3021 assert(PtrRB.getID() == AArch64::GPRRegBankID &&
3022 "Load/Store pointer operand isn't a GPR");
3023 assert(MRI.getType(PtrReg).isPointer() &&
3024 "Load/Store pointer operand isn't a pointer");
3025#endif
3026
3027 const Register ValReg = LdSt.getReg(Idx: 0);
3028 const RegisterBank &RB = *RBI.getRegBank(Reg: ValReg, MRI, TRI);
3029 LLT ValTy = MRI.getType(Reg: ValReg);
3030
3031 // The code below doesn't support truncating stores, so we need to split it
3032 // again.
3033 if (isa<GStore>(Val: LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
3034 unsigned SubReg;
3035 LLT MemTy = LdSt.getMMO().getMemoryType();
3036 auto *RC = getRegClassForTypeOnBank(Ty: MemTy, RB);
3037 if (!getSubRegForClass(RC, TRI, SubReg))
3038 return false;
3039
3040 // Generate a subreg copy.
3041 auto Copy = MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {MemTy}, SrcOps: {})
3042 .addReg(RegNo: ValReg, Flags: {}, SubReg)
3043 .getReg(Idx: 0);
3044 RBI.constrainGenericRegister(Reg: Copy, RC: *RC, MRI);
3045 LdSt.getOperand(i: 0).setReg(Copy);
3046 } else if (isa<GLoad>(Val: LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
3047 // If this is an any-extending load from the FPR bank, split it into a regular
3048 // load + extend.
3049 if (RB.getID() == AArch64::FPRRegBankID) {
3050 unsigned SubReg;
3051 LLT MemTy = LdSt.getMMO().getMemoryType();
3052 auto *RC = getRegClassForTypeOnBank(Ty: MemTy, RB);
3053 if (!getSubRegForClass(RC, TRI, SubReg))
3054 return false;
3055 Register OldDst = LdSt.getReg(Idx: 0);
3056 Register NewDst =
3057 MRI.createGenericVirtualRegister(Ty: LdSt.getMMO().getMemoryType());
3058 LdSt.getOperand(i: 0).setReg(NewDst);
3059 MRI.setRegBank(Reg: NewDst, RegBank: RB);
3060 // Generate a SUBREG_TO_REG to extend it.
3061 MIB.setInsertPt(MBB&: MIB.getMBB(), II: std::next(x: LdSt.getIterator()));
3062 MIB.buildInstr(Opc: AArch64::SUBREG_TO_REG, DstOps: {OldDst}, SrcOps: {})
3063 .addUse(RegNo: NewDst)
3064 .addImm(Val: SubReg);
3065 auto SubRegRC = getRegClassForTypeOnBank(Ty: MRI.getType(Reg: OldDst), RB);
3066 RBI.constrainGenericRegister(Reg: OldDst, RC: *SubRegRC, MRI);
3067 MIB.setInstr(LdSt);
3068 ValTy = MemTy; // This is no longer an extending load.
3069 }
3070 }
3071
3072 // Helper lambda for partially selecting I. Either returns the original
3073 // instruction with an updated opcode, or a new instruction.
3074 auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
3075 bool IsStore = isa<GStore>(Val: I);
3076 const unsigned NewOpc =
3077 selectLoadStoreUIOp(GenericOpc: I.getOpcode(), RegBankID: RB.getID(), OpSize: MemSizeInBits);
3078 if (NewOpc == I.getOpcode())
3079 return nullptr;
3080 // Check if we can fold anything into the addressing mode.
3081 auto AddrModeFns =
3082 selectAddrModeIndexed(Root&: I.getOperand(i: 1), Size: MemSizeInBytes);
3083 if (!AddrModeFns) {
3084 // Can't fold anything. Use the original instruction.
3085 I.setDesc(TII.get(Opcode: NewOpc));
3086 I.addOperand(Op: MachineOperand::CreateImm(Val: 0));
3087 return &I;
3088 }
3089
3090 // Folded something. Create a new instruction and return it.
3091 auto NewInst = MIB.buildInstr(Opc: NewOpc, DstOps: {}, SrcOps: {}, Flags: I.getFlags());
3092 Register CurValReg = I.getOperand(i: 0).getReg();
3093 IsStore ? NewInst.addUse(RegNo: CurValReg) : NewInst.addDef(RegNo: CurValReg);
3094 NewInst.cloneMemRefs(OtherMI: I);
3095 for (auto &Fn : *AddrModeFns)
3096 Fn(NewInst);
3097 I.eraseFromParent();
3098 return &*NewInst;
3099 };
3100
3101 MachineInstr *LoadStore = SelectLoadStoreAddressingMode();
3102 if (!LoadStore)
3103 return false;
3104
3105 // If we're storing a 0, use WZR/XZR.
3106 if (Opcode == TargetOpcode::G_STORE) {
3107 auto CVal = getIConstantVRegValWithLookThrough(
3108 VReg: LoadStore->getOperand(i: 0).getReg(), MRI);
3109 if (CVal && CVal->Value == 0) {
3110 switch (LoadStore->getOpcode()) {
3111 case AArch64::STRWui:
3112 case AArch64::STRHHui:
3113 case AArch64::STRBBui:
3114 LoadStore->getOperand(i: 0).setReg(AArch64::WZR);
3115 break;
3116 case AArch64::STRXui:
3117 LoadStore->getOperand(i: 0).setReg(AArch64::XZR);
3118 break;
3119 }
3120 }
3121 }
3122
3123 if (IsZExtLoad || (Opcode == TargetOpcode::G_LOAD &&
3124 ValTy == LLT::scalar(SizeInBits: 64) && MemSizeInBits == 32)) {
3125 // The any/zextload from a smaller type to i32 should be handled by the
3126 // importer.
3127 if (MRI.getType(Reg: LoadStore->getOperand(i: 0).getReg()).getSizeInBits() != 64)
3128 return false;
3129 // If we have an extending load then change the load's type to be a
3130 // narrower reg and zero_extend with SUBREG_TO_REG.
3131 Register LdReg = MRI.createVirtualRegister(RegClass: &AArch64::GPR32RegClass);
3132 Register DstReg = LoadStore->getOperand(i: 0).getReg();
3133 LoadStore->getOperand(i: 0).setReg(LdReg);
3134
3135 MIB.setInsertPt(MBB&: MIB.getMBB(), II: std::next(x: LoadStore->getIterator()));
3136 MIB.buildInstr(Opc: AArch64::SUBREG_TO_REG, DstOps: {DstReg}, SrcOps: {})
3137 .addUse(RegNo: LdReg)
3138 .addImm(Val: AArch64::sub_32);
3139 constrainSelectedInstRegOperands(I&: *LoadStore, TII, TRI, RBI);
3140 return RBI.constrainGenericRegister(Reg: DstReg, RC: AArch64::GPR64allRegClass,
3141 MRI);
3142 }
3143 constrainSelectedInstRegOperands(I&: *LoadStore, TII, TRI, RBI);
3144 return true;
3145 }
3146
3147 case TargetOpcode::G_INDEXED_ZEXTLOAD:
3148 case TargetOpcode::G_INDEXED_SEXTLOAD:
3149 return selectIndexedExtLoad(I, MRI);
3150 case TargetOpcode::G_INDEXED_LOAD:
3151 return selectIndexedLoad(I, MRI);
3152 case TargetOpcode::G_INDEXED_STORE:
3153 return selectIndexedStore(I&: cast<GIndexedStore>(Val&: I), MRI);
3154
3155 case TargetOpcode::G_LSHR:
3156 case TargetOpcode::G_ASHR:
3157 if (MRI.getType(Reg: I.getOperand(i: 0).getReg()).isVector())
3158 return selectVectorAshrLshr(I, MRI);
3159 [[fallthrough]];
3160 case TargetOpcode::G_SHL:
3161 if (Opcode == TargetOpcode::G_SHL &&
3162 MRI.getType(Reg: I.getOperand(i: 0).getReg()).isVector())
3163 return selectVectorSHL(I, MRI);
3164
3165 // These shifts were legalized to have 64 bit shift amounts because we
3166 // want to take advantage of the selection patterns that assume the
3167 // immediates are s64s, however, selectBinaryOp will assume both operands
3168 // will have the same bit size.
3169 {
3170 Register SrcReg = I.getOperand(i: 1).getReg();
3171 Register ShiftReg = I.getOperand(i: 2).getReg();
3172 const LLT ShiftTy = MRI.getType(Reg: ShiftReg);
3173 const LLT SrcTy = MRI.getType(Reg: SrcReg);
3174 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
3175 ShiftTy.getSizeInBits() == 64) {
3176 assert(!ShiftTy.isVector() && "unexpected vector shift ty");
3177 // Insert a subregister copy to implement a 64->32 trunc
3178 auto Trunc = MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {SrcTy}, SrcOps: {})
3179 .addReg(RegNo: ShiftReg, Flags: {}, SubReg: AArch64::sub_32);
3180 MRI.setRegBank(Reg: Trunc.getReg(Idx: 0), RegBank: RBI.getRegBank(ID: AArch64::GPRRegBankID));
3181 I.getOperand(i: 2).setReg(Trunc.getReg(Idx: 0));
3182 }
3183 }
3184 [[fallthrough]];
3185 case TargetOpcode::G_OR: {
3186 // Reject the various things we don't support yet.
3187 if (unsupportedBinOp(I, RBI, MRI, TRI))
3188 return false;
3189
3190 const unsigned OpSize = Ty.getSizeInBits();
3191
3192 const Register DefReg = I.getOperand(i: 0).getReg();
3193 const RegisterBank &RB = *RBI.getRegBank(Reg: DefReg, MRI, TRI);
3194
3195 const unsigned NewOpc = selectBinaryOp(GenericOpc: I.getOpcode(), RegBankID: RB.getID(), OpSize);
3196 if (NewOpc == I.getOpcode())
3197 return false;
3198
3199 I.setDesc(TII.get(Opcode: NewOpc));
3200 // FIXME: Should the type be always reset in setDesc?
3201
3202 // Now that we selected an opcode, we need to constrain the register
3203 // operands to use appropriate classes.
3204 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3205 return true;
3206 }
3207
3208 case TargetOpcode::G_PTR_ADD: {
3209 emitADD(DefReg: I.getOperand(i: 0).getReg(), LHS&: I.getOperand(i: 1), RHS&: I.getOperand(i: 2), MIRBuilder&: MIB);
3210 I.eraseFromParent();
3211 return true;
3212 }
3213
3214 case TargetOpcode::G_SADDE:
3215 case TargetOpcode::G_UADDE:
3216 case TargetOpcode::G_SSUBE:
3217 case TargetOpcode::G_USUBE:
3218 case TargetOpcode::G_SADDO:
3219 case TargetOpcode::G_UADDO:
3220 case TargetOpcode::G_SSUBO:
3221 case TargetOpcode::G_USUBO:
3222 return selectOverflowOp(I, MRI);
3223
3224 case TargetOpcode::G_PTRMASK: {
3225 Register MaskReg = I.getOperand(i: 2).getReg();
3226 std::optional<int64_t> MaskVal = getIConstantVRegSExtVal(VReg: MaskReg, MRI);
3227 // TODO: Implement arbitrary cases
3228 if (!MaskVal || !isShiftedMask_64(Value: *MaskVal))
3229 return false;
3230
3231 uint64_t Mask = *MaskVal;
3232 I.setDesc(TII.get(Opcode: AArch64::ANDXri));
3233 I.getOperand(i: 2).ChangeToImmediate(
3234 ImmVal: AArch64_AM::encodeLogicalImmediate(imm: Mask, regSize: 64));
3235
3236 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3237 return true;
3238 }
3239 case TargetOpcode::G_PTRTOINT:
3240 case TargetOpcode::G_TRUNC: {
3241 const LLT DstTy = MRI.getType(Reg: I.getOperand(i: 0).getReg());
3242 const LLT SrcTy = MRI.getType(Reg: I.getOperand(i: 1).getReg());
3243
3244 const Register DstReg = I.getOperand(i: 0).getReg();
3245 const Register SrcReg = I.getOperand(i: 1).getReg();
3246
3247 const RegisterBank &DstRB = *RBI.getRegBank(Reg: DstReg, MRI, TRI);
3248 const RegisterBank &SrcRB = *RBI.getRegBank(Reg: SrcReg, MRI, TRI);
3249
3250 if (DstRB.getID() != SrcRB.getID()) {
3251 LLVM_DEBUG(
3252 dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");
3253 return false;
3254 }
3255
3256 if (DstRB.getID() == AArch64::GPRRegBankID) {
3257 const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(Ty: DstTy, RB: DstRB);
3258 if (!DstRC)
3259 return false;
3260
3261 const TargetRegisterClass *SrcRC = getRegClassForTypeOnBank(Ty: SrcTy, RB: SrcRB);
3262 if (!SrcRC)
3263 return false;
3264
3265 if (!RBI.constrainGenericRegister(Reg: SrcReg, RC: *SrcRC, MRI) ||
3266 !RBI.constrainGenericRegister(Reg: DstReg, RC: *DstRC, MRI)) {
3267 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");
3268 return false;
3269 }
3270
3271 if (DstRC == SrcRC) {
3272 // Nothing to be done
3273 } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(SizeInBits: 32) &&
3274 SrcTy == LLT::scalar(SizeInBits: 64)) {
3275 llvm_unreachable("TableGen can import this case");
3276 return false;
3277 } else if (DstRC == &AArch64::GPR32RegClass &&
3278 SrcRC == &AArch64::GPR64RegClass) {
3279 I.getOperand(i: 1).setSubReg(AArch64::sub_32);
3280 } else {
3281 LLVM_DEBUG(
3282 dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
3283 return false;
3284 }
3285
3286 I.setDesc(TII.get(Opcode: TargetOpcode::COPY));
3287 return true;
3288 } else if (DstRB.getID() == AArch64::FPRRegBankID) {
3289 if (DstTy == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 16) &&
3290 SrcTy == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32)) {
3291 I.setDesc(TII.get(Opcode: AArch64::XTNv4i16));
3292 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3293 return true;
3294 }
3295
3296 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) {
3297 MachineInstr *Extract = emitExtractVectorElt(
3298 DstReg, DstRB, ScalarTy: LLT::scalar(SizeInBits: DstTy.getSizeInBits()), VecReg: SrcReg, LaneIdx: 0, MIRBuilder&: MIB);
3299 if (!Extract)
3300 return false;
3301 I.eraseFromParent();
3302 return true;
3303 }
3304
3305 // We might have a vector G_PTRTOINT, in which case just emit a COPY.
3306 if (Opcode == TargetOpcode::G_PTRTOINT) {
3307 assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector");
3308 I.setDesc(TII.get(Opcode: TargetOpcode::COPY));
3309 return selectCopy(I, TII, MRI, TRI, RBI);
3310 }
3311 }
3312
3313 return false;
3314 }
3315
3316 case TargetOpcode::G_ANYEXT: {
3317 if (selectUSMovFromExtend(I, MRI))
3318 return true;
3319
3320 const Register DstReg = I.getOperand(i: 0).getReg();
3321 const Register SrcReg = I.getOperand(i: 1).getReg();
3322
3323 const RegisterBank &RBDst = *RBI.getRegBank(Reg: DstReg, MRI, TRI);
3324 if (RBDst.getID() != AArch64::GPRRegBankID) {
3325 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst
3326 << ", expected: GPR\n");
3327 return false;
3328 }
3329
3330 const RegisterBank &RBSrc = *RBI.getRegBank(Reg: SrcReg, MRI, TRI);
3331 if (RBSrc.getID() != AArch64::GPRRegBankID) {
3332 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc
3333 << ", expected: GPR\n");
3334 return false;
3335 }
3336
3337 const unsigned DstSize = MRI.getType(Reg: DstReg).getSizeInBits();
3338
3339 if (DstSize == 0) {
3340 LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n");
3341 return false;
3342 }
3343
3344 if (DstSize != 64 && DstSize > 32) {
3345 LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize
3346 << ", expected: 32 or 64\n");
3347 return false;
3348 }
3349 // At this point G_ANYEXT is just like a plain COPY, but we need
3350 // to explicitly form the 64-bit value if any.
3351 if (DstSize > 32) {
3352 Register ExtSrc = MRI.createVirtualRegister(RegClass: &AArch64::GPR64allRegClass);
3353 BuildMI(BB&: MBB, I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: AArch64::SUBREG_TO_REG))
3354 .addDef(RegNo: ExtSrc)
3355 .addUse(RegNo: SrcReg)
3356 .addImm(Val: AArch64::sub_32);
3357 I.getOperand(i: 1).setReg(ExtSrc);
3358 }
3359 return selectCopy(I, TII, MRI, TRI, RBI);
3360 }
3361
3362 case TargetOpcode::G_ZEXT:
3363 case TargetOpcode::G_SEXT_INREG:
3364 case TargetOpcode::G_SEXT: {
3365 if (selectUSMovFromExtend(I, MRI))
3366 return true;
3367
3368 unsigned Opcode = I.getOpcode();
3369 const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
3370 const Register DefReg = I.getOperand(i: 0).getReg();
3371 Register SrcReg = I.getOperand(i: 1).getReg();
3372 const LLT DstTy = MRI.getType(Reg: DefReg);
3373 const LLT SrcTy = MRI.getType(Reg: SrcReg);
3374 unsigned DstSize = DstTy.getSizeInBits();
3375 unsigned SrcSize = SrcTy.getSizeInBits();
3376
3377 // SEXT_INREG has the same src reg size as dst, the size of the value to be
3378 // extended is encoded in the imm.
3379 if (Opcode == TargetOpcode::G_SEXT_INREG)
3380 SrcSize = I.getOperand(i: 2).getImm();
3381
3382 if (DstTy.isVector())
3383 return false; // Should be handled by imported patterns.
3384
3385 assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==
3386 AArch64::GPRRegBankID &&
3387 "Unexpected ext regbank");
3388
3389 MachineInstr *ExtI;
3390
3391 // First check if we're extending the result of a load which has a dest type
3392 // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest
3393 // GPR register on AArch64 and all loads which are smaller automatically
3394 // zero-extend the upper bits. E.g.
3395 // %v(s8) = G_LOAD %p, :: (load 1)
3396 // %v2(s32) = G_ZEXT %v(s8)
3397 if (!IsSigned) {
3398 auto *LoadMI = getOpcodeDef(Opcode: TargetOpcode::G_LOAD, Reg: SrcReg, MRI);
3399 bool IsGPR =
3400 RBI.getRegBank(Reg: SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID;
3401 if (LoadMI && IsGPR) {
3402 const MachineMemOperand *MemOp = *LoadMI->memoperands_begin();
3403 unsigned BytesLoaded = MemOp->getSize().getValue();
3404 if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded)
3405 return selectCopy(I, TII, MRI, TRI, RBI);
3406 }
3407
3408 // For the 32-bit -> 64-bit case, we can emit a mov (ORRWrs)
3409 // + SUBREG_TO_REG.
3410 if (IsGPR && SrcSize == 32 && DstSize == 64) {
3411 Register SubregToRegSrc =
3412 MRI.createVirtualRegister(RegClass: &AArch64::GPR32RegClass);
3413 const Register ZReg = AArch64::WZR;
3414 MIB.buildInstr(Opc: AArch64::ORRWrs, DstOps: {SubregToRegSrc}, SrcOps: {ZReg, SrcReg})
3415 .addImm(Val: 0);
3416
3417 MIB.buildInstr(Opc: AArch64::SUBREG_TO_REG, DstOps: {DefReg}, SrcOps: {})
3418 .addUse(RegNo: SubregToRegSrc)
3419 .addImm(Val: AArch64::sub_32);
3420
3421 if (!RBI.constrainGenericRegister(Reg: DefReg, RC: AArch64::GPR64RegClass,
3422 MRI)) {
3423 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n");
3424 return false;
3425 }
3426
3427 if (!RBI.constrainGenericRegister(Reg: SrcReg, RC: AArch64::GPR32RegClass,
3428 MRI)) {
3429 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n");
3430 return false;
3431 }
3432
3433 I.eraseFromParent();
3434 return true;
3435 }
3436 }
3437
3438 if (DstSize == 64) {
3439 if (Opcode != TargetOpcode::G_SEXT_INREG) {
3440 // FIXME: Can we avoid manually doing this?
3441 if (!RBI.constrainGenericRegister(Reg: SrcReg, RC: AArch64::GPR32RegClass,
3442 MRI)) {
3443 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)
3444 << " operand\n");
3445 return false;
3446 }
3447 SrcReg = MIB.buildInstr(Opc: AArch64::SUBREG_TO_REG,
3448 DstOps: {&AArch64::GPR64RegClass}, SrcOps: {})
3449 .addUse(RegNo: SrcReg)
3450 .addImm(Val: AArch64::sub_32)
3451 .getReg(Idx: 0);
3452 }
3453
3454 ExtI = MIB.buildInstr(Opc: IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
3455 DstOps: {DefReg}, SrcOps: {SrcReg})
3456 .addImm(Val: 0)
3457 .addImm(Val: SrcSize - 1);
3458 } else if (DstSize <= 32) {
3459 ExtI = MIB.buildInstr(Opc: IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
3460 DstOps: {DefReg}, SrcOps: {SrcReg})
3461 .addImm(Val: 0)
3462 .addImm(Val: SrcSize - 1);
3463 } else {
3464 return false;
3465 }
3466
3467 constrainSelectedInstRegOperands(I&: *ExtI, TII, TRI, RBI);
3468 I.eraseFromParent();
3469 return true;
3470 }
3471
3472 case TargetOpcode::G_FREEZE:
3473 return selectCopy(I, TII, MRI, TRI, RBI);
3474
3475 case TargetOpcode::G_INTTOPTR:
3476 // The importer is currently unable to import pointer types since they
3477 // didn't exist in SelectionDAG.
3478 return selectCopy(I, TII, MRI, TRI, RBI);
3479
3480 case TargetOpcode::G_BITCAST:
3481 // Imported SelectionDAG rules can handle every bitcast except those that
3482 // bitcast from a type to the same type. Ideally, these shouldn't occur
3483 // but we might not run an optimizer that deletes them. The other exception
3484 // is bitcasts involving pointer types, as SelectionDAG has no knowledge
3485 // of them.
3486 return selectCopy(I, TII, MRI, TRI, RBI);
3487
3488 case TargetOpcode::G_SELECT: {
3489 auto &Sel = cast<GSelect>(Val&: I);
3490 const Register CondReg = Sel.getCondReg();
3491 const Register TReg = Sel.getTrueReg();
3492 const Register FReg = Sel.getFalseReg();
3493
3494 if (tryOptSelect(Sel))
3495 return true;
3496
3497 // Make sure to use an unused vreg instead of wzr, so that the peephole
3498 // optimizations will be able to optimize these.
3499 Register DeadVReg = MRI.createVirtualRegister(RegClass: &AArch64::GPR32RegClass);
3500 auto TstMI = MIB.buildInstr(Opc: AArch64::ANDSWri, DstOps: {DeadVReg}, SrcOps: {CondReg})
3501 .addImm(Val: AArch64_AM::encodeLogicalImmediate(imm: 1, regSize: 32));
3502 constrainSelectedInstRegOperands(I&: *TstMI, TII, TRI, RBI);
3503 if (!emitSelect(Dst: Sel.getReg(Idx: 0), True: TReg, False: FReg, CC: AArch64CC::NE, MIB))
3504 return false;
3505 Sel.eraseFromParent();
3506 return true;
3507 }
3508 case TargetOpcode::G_ICMP: {
3509 if (Ty.isVector())
3510 return false;
3511
3512 if (Ty != LLT::scalar(SizeInBits: 32)) {
3513 LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty
3514 << ", expected: " << LLT::scalar(32) << '\n');
3515 return false;
3516 }
3517
3518 auto &PredOp = I.getOperand(i: 1);
3519 emitIntegerCompare(LHS&: I.getOperand(i: 2), RHS&: I.getOperand(i: 3), Predicate&: PredOp, MIRBuilder&: MIB);
3520 auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());
3521 const AArch64CC::CondCode InvCC = changeICMPPredToAArch64CC(
3522 P: CmpInst::getInversePredicate(pred: Pred), RHS: I.getOperand(i: 3).getReg(), MRI: &MRI);
3523 emitCSINC(/*Dst=*/I.getOperand(i: 0).getReg(), /*Src1=*/AArch64::WZR,
3524 /*Src2=*/AArch64::WZR, Pred: InvCC, MIRBuilder&: MIB);
3525 I.eraseFromParent();
3526 return true;
3527 }
3528
3529 case TargetOpcode::G_FCMP: {
3530 CmpInst::Predicate Pred =
3531 static_cast<CmpInst::Predicate>(I.getOperand(i: 1).getPredicate());
3532 if (!emitFPCompare(LHS: I.getOperand(i: 2).getReg(), RHS: I.getOperand(i: 3).getReg(), MIRBuilder&: MIB,
3533 Pred) ||
3534 !emitCSetForFCmp(Dst: I.getOperand(i: 0).getReg(), Pred, MIRBuilder&: MIB))
3535 return false;
3536 I.eraseFromParent();
3537 return true;
3538 }
3539 case TargetOpcode::G_VASTART:
3540 return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
3541 : selectVaStartAAPCS(I, MF, MRI);
3542 case TargetOpcode::G_INTRINSIC:
3543 return selectIntrinsic(I, MRI);
3544 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3545 return selectIntrinsicWithSideEffects(I, MRI);
3546 case TargetOpcode::G_IMPLICIT_DEF: {
3547 I.setDesc(TII.get(Opcode: TargetOpcode::IMPLICIT_DEF));
3548 const LLT DstTy = MRI.getType(Reg: I.getOperand(i: 0).getReg());
3549 const Register DstReg = I.getOperand(i: 0).getReg();
3550 const RegisterBank &DstRB = *RBI.getRegBank(Reg: DstReg, MRI, TRI);
3551 const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(Ty: DstTy, RB: DstRB);
3552 RBI.constrainGenericRegister(Reg: DstReg, RC: *DstRC, MRI);
3553 return true;
3554 }
3555 case TargetOpcode::G_BLOCK_ADDR: {
3556 Function *BAFn = I.getOperand(i: 1).getBlockAddress()->getFunction();
3557 if (std::optional<uint16_t> BADisc =
3558 STI.getPtrAuthBlockAddressDiscriminatorIfEnabled(ParentFn: *BAFn)) {
3559 MIB.buildInstr(Opc: TargetOpcode::IMPLICIT_DEF, DstOps: {AArch64::X16}, SrcOps: {});
3560 MIB.buildInstr(Opc: TargetOpcode::IMPLICIT_DEF, DstOps: {AArch64::X17}, SrcOps: {});
3561 MIB.buildInstr(Opcode: AArch64::MOVaddrPAC)
3562 .addBlockAddress(BA: I.getOperand(i: 1).getBlockAddress())
3563 .addImm(Val: AArch64PACKey::IA)
3564 .addReg(/*AddrDisc=*/RegNo: AArch64::XZR)
3565 .addImm(Val: *BADisc)
3566 .constrainAllUses(TII, TRI, RBI);
3567 MIB.buildCopy(Res: I.getOperand(i: 0).getReg(), Op: Register(AArch64::X16));
3568 RBI.constrainGenericRegister(Reg: I.getOperand(i: 0).getReg(),
3569 RC: AArch64::GPR64RegClass, MRI);
3570 I.eraseFromParent();
3571 return true;
3572 }
3573 if (TM.getCodeModel() == CodeModel::Large && !TM.isPositionIndependent()) {
3574 materializeLargeCMVal(I, V: I.getOperand(i: 1).getBlockAddress(), OpFlags: 0);
3575 I.eraseFromParent();
3576 return true;
3577 } else {
3578 I.setDesc(TII.get(Opcode: AArch64::MOVaddrBA));
3579 auto MovMI = BuildMI(BB&: MBB, I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: AArch64::MOVaddrBA),
3580 DestReg: I.getOperand(i: 0).getReg())
3581 .addBlockAddress(BA: I.getOperand(i: 1).getBlockAddress(),
3582 /* Offset */ 0, TargetFlags: AArch64II::MO_PAGE)
3583 .addBlockAddress(
3584 BA: I.getOperand(i: 1).getBlockAddress(), /* Offset */ 0,
3585 TargetFlags: AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
3586 I.eraseFromParent();
3587 constrainSelectedInstRegOperands(I&: *MovMI, TII, TRI, RBI);
3588 return true;
3589 }
3590 }
3591 case AArch64::G_DUP: {
3592 // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by
3593 // imported patterns. Do it manually here. Avoiding generating s16 gpr is
3594 // difficult because at RBS we may end up pessimizing the fpr case if we
3595 // decided to add an anyextend to fix this. Manual selection is the most
3596 // robust solution for now.
3597 if (RBI.getRegBank(Reg: I.getOperand(i: 1).getReg(), MRI, TRI)->getID() !=
3598 AArch64::GPRRegBankID)
3599 return false; // We expect the fpr regbank case to be imported.
3600 LLT VecTy = MRI.getType(Reg: I.getOperand(i: 0).getReg());
3601 if (VecTy == LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 8))
3602 I.setDesc(TII.get(Opcode: AArch64::DUPv8i8gpr));
3603 else if (VecTy == LLT::fixed_vector(NumElements: 16, ScalarSizeInBits: 8))
3604 I.setDesc(TII.get(Opcode: AArch64::DUPv16i8gpr));
3605 else if (VecTy == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 16))
3606 I.setDesc(TII.get(Opcode: AArch64::DUPv4i16gpr));
3607 else if (VecTy == LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 16))
3608 I.setDesc(TII.get(Opcode: AArch64::DUPv8i16gpr));
3609 else
3610 return false;
3611 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3612 return true;
3613 }
3614 case TargetOpcode::G_BUILD_VECTOR:
3615 return selectBuildVector(I, MRI);
3616 case TargetOpcode::G_MERGE_VALUES:
3617 return selectMergeValues(I, MRI);
3618 case TargetOpcode::G_UNMERGE_VALUES:
3619 return selectUnmergeValues(I, MRI);
3620 case TargetOpcode::G_SHUFFLE_VECTOR:
3621 return selectShuffleVector(I, MRI);
3622 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3623 return selectExtractElt(I, MRI);
3624 case TargetOpcode::G_CONCAT_VECTORS:
3625 return selectConcatVectors(I, MRI);
3626 case TargetOpcode::G_JUMP_TABLE:
3627 return selectJumpTable(I, MRI);
3628 case TargetOpcode::G_MEMCPY:
3629 case TargetOpcode::G_MEMCPY_INLINE:
3630 case TargetOpcode::G_MEMMOVE:
3631 case TargetOpcode::G_MEMSET:
3632 assert(STI.hasMOPS() && "Shouldn't get here without +mops feature");
3633 return selectMOPS(I, MRI);
3634 }
3635
3636 return false;
3637}
3638
3639bool AArch64InstructionSelector::selectAndRestoreState(MachineInstr &I) {
3640 MachineIRBuilderState OldMIBState = MIB.getState();
3641 bool Success = select(I);
3642 MIB.setState(OldMIBState);
3643 return Success;
3644}
3645
3646bool AArch64InstructionSelector::selectMOPS(MachineInstr &GI,
3647 MachineRegisterInfo &MRI) {
3648 unsigned Mopcode;
3649 switch (GI.getOpcode()) {
3650 case TargetOpcode::G_MEMCPY:
3651 case TargetOpcode::G_MEMCPY_INLINE:
3652 Mopcode = AArch64::MOPSMemoryCopyPseudo;
3653 break;
3654 case TargetOpcode::G_MEMMOVE:
3655 Mopcode = AArch64::MOPSMemoryMovePseudo;
3656 break;
3657 case TargetOpcode::G_MEMSET:
3658 // For tagged memset see llvm.aarch64.mops.memset.tag
3659 Mopcode = AArch64::MOPSMemorySetPseudo;
3660 break;
3661 }
3662
3663 auto &DstPtr = GI.getOperand(i: 0);
3664 auto &SrcOrVal = GI.getOperand(i: 1);
3665 auto &Size = GI.getOperand(i: 2);
3666
3667 // Create copies of the registers that can be clobbered.
3668 const Register DstPtrCopy = MRI.cloneVirtualRegister(VReg: DstPtr.getReg());
3669 const Register SrcValCopy = MRI.cloneVirtualRegister(VReg: SrcOrVal.getReg());
3670 const Register SizeCopy = MRI.cloneVirtualRegister(VReg: Size.getReg());
3671
3672 const bool IsSet = Mopcode == AArch64::MOPSMemorySetPseudo;
3673 const auto &SrcValRegClass =
3674 IsSet ? AArch64::GPR64RegClass : AArch64::GPR64commonRegClass;
3675
3676 // Constrain to specific registers
3677 RBI.constrainGenericRegister(Reg: DstPtrCopy, RC: AArch64::GPR64commonRegClass, MRI);
3678 RBI.constrainGenericRegister(Reg: SrcValCopy, RC: SrcValRegClass, MRI);
3679 RBI.constrainGenericRegister(Reg: SizeCopy, RC: AArch64::GPR64RegClass, MRI);
3680
3681 MIB.buildCopy(Res: DstPtrCopy, Op: DstPtr);
3682 MIB.buildCopy(Res: SrcValCopy, Op: SrcOrVal);
3683 MIB.buildCopy(Res: SizeCopy, Op: Size);
3684
3685 // New instruction uses the copied registers because it must update them.
3686 // The defs are not used since they don't exist in G_MEM*. They are still
3687 // tied.
3688 // Note: order of operands is different from G_MEMSET, G_MEMCPY, G_MEMMOVE
3689 Register DefDstPtr = MRI.createVirtualRegister(RegClass: &AArch64::GPR64commonRegClass);
3690 Register DefSize = MRI.createVirtualRegister(RegClass: &AArch64::GPR64RegClass);
3691 if (IsSet) {
3692 MIB.buildInstr(Opc: Mopcode, DstOps: {DefDstPtr, DefSize},
3693 SrcOps: {DstPtrCopy, SizeCopy, SrcValCopy});
3694 } else {
3695 Register DefSrcPtr = MRI.createVirtualRegister(RegClass: &SrcValRegClass);
3696 MIB.buildInstr(Opc: Mopcode, DstOps: {DefDstPtr, DefSrcPtr, DefSize},
3697 SrcOps: {DstPtrCopy, SrcValCopy, SizeCopy});
3698 }
3699
3700 GI.eraseFromParent();
3701 return true;
3702}
3703
3704bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
3705 MachineRegisterInfo &MRI) {
3706 assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT");
3707 Register JTAddr = I.getOperand(i: 0).getReg();
3708 unsigned JTI = I.getOperand(i: 1).getIndex();
3709 Register Index = I.getOperand(i: 2).getReg();
3710
3711 MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(Idx: JTI, Size: 4, PCRelSym: nullptr);
3712
3713 // With aarch64-jump-table-hardening, we only expand the jump table dispatch
3714 // sequence later, to guarantee the integrity of the intermediate values.
3715 if (MF->getFunction().hasFnAttribute(Kind: "aarch64-jump-table-hardening")) {
3716 CodeModel::Model CM = TM.getCodeModel();
3717 if (STI.isTargetMachO()) {
3718 if (CM != CodeModel::Small && CM != CodeModel::Large)
3719 report_fatal_error(reason: "Unsupported code-model for hardened jump-table");
3720 } else {
3721 // Note that COFF support would likely also need JUMP_TABLE_DEBUG_INFO.
3722 assert(STI.isTargetELF() &&
3723 "jump table hardening only supported on MachO/ELF");
3724 if (CM != CodeModel::Small)
3725 report_fatal_error(reason: "Unsupported code-model for hardened jump-table");
3726 }
3727
3728 MIB.buildCopy(Res: {AArch64::X16}, Op: I.getOperand(i: 2).getReg());
3729 MIB.buildInstr(Opcode: AArch64::BR_JumpTable)
3730 .addJumpTableIndex(Idx: I.getOperand(i: 1).getIndex());
3731 I.eraseFromParent();
3732 return true;
3733 }
3734
3735 Register TargetReg = MRI.createVirtualRegister(RegClass: &AArch64::GPR64RegClass);
3736 Register ScratchReg = MRI.createVirtualRegister(RegClass: &AArch64::GPR64spRegClass);
3737
3738 auto JumpTableInst = MIB.buildInstr(Opc: AArch64::JumpTableDest32,
3739 DstOps: {TargetReg, ScratchReg}, SrcOps: {JTAddr, Index})
3740 .addJumpTableIndex(Idx: JTI);
3741 // Save the jump table info.
3742 MIB.buildInstr(Opc: TargetOpcode::JUMP_TABLE_DEBUG_INFO, DstOps: {},
3743 SrcOps: {static_cast<int64_t>(JTI)});
3744 // Build the indirect branch.
3745 MIB.buildInstr(Opc: AArch64::BR, DstOps: {}, SrcOps: {TargetReg});
3746 I.eraseFromParent();
3747 constrainSelectedInstRegOperands(I&: *JumpTableInst, TII, TRI, RBI);
3748 return true;
3749}
3750
3751bool AArch64InstructionSelector::selectJumpTable(MachineInstr &I,
3752 MachineRegisterInfo &MRI) {
3753 assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table");
3754 assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!");
3755
3756 Register DstReg = I.getOperand(i: 0).getReg();
3757 unsigned JTI = I.getOperand(i: 1).getIndex();
3758 // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
3759 auto MovMI =
3760 MIB.buildInstr(Opc: AArch64::MOVaddrJT, DstOps: {DstReg}, SrcOps: {})
3761 .addJumpTableIndex(Idx: JTI, TargetFlags: AArch64II::MO_PAGE)
3762 .addJumpTableIndex(Idx: JTI, TargetFlags: AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
3763 I.eraseFromParent();
3764 constrainSelectedInstRegOperands(I&: *MovMI, TII, TRI, RBI);
3765 return true;
3766}
3767
3768bool AArch64InstructionSelector::selectTLSGlobalValue(
3769 MachineInstr &I, MachineRegisterInfo &MRI) {
3770 if (!STI.isTargetMachO())
3771 return false;
3772 MachineFunction &MF = *I.getParent()->getParent();
3773 MF.getFrameInfo().setAdjustsStack(true);
3774
3775 const auto &GlobalOp = I.getOperand(i: 1);
3776 assert(GlobalOp.getOffset() == 0 &&
3777 "Shouldn't have an offset on TLS globals!");
3778 const GlobalValue &GV = *GlobalOp.getGlobal();
3779
3780 auto LoadGOT =
3781 MIB.buildInstr(Opc: AArch64::LOADgot, DstOps: {&AArch64::GPR64commonRegClass}, SrcOps: {})
3782 .addGlobalAddress(GV: &GV, Offset: 0, TargetFlags: AArch64II::MO_TLS);
3783
3784 auto Load = MIB.buildInstr(Opc: AArch64::LDRXui, DstOps: {&AArch64::GPR64commonRegClass},
3785 SrcOps: {LoadGOT.getReg(Idx: 0)})
3786 .addImm(Val: 0);
3787
3788 MIB.buildCopy(Res: Register(AArch64::X0), Op: LoadGOT.getReg(Idx: 0));
3789 // TLS calls preserve all registers except those that absolutely must be
3790 // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
3791 // silly).
3792 unsigned Opcode = getBLRCallOpcode(MF);
3793
3794 // With ptrauth-calls, the tlv access thunk pointer is authenticated (IA, 0).
3795 if (MF.getFunction().hasFnAttribute(Kind: "ptrauth-calls")) {
3796 assert(Opcode == AArch64::BLR);
3797 Opcode = AArch64::BLRAAZ;
3798 }
3799
3800 MIB.buildInstr(Opc: Opcode, DstOps: {}, SrcOps: {Load})
3801 .addUse(RegNo: AArch64::X0, Flags: RegState::Implicit)
3802 .addDef(RegNo: AArch64::X0, Flags: RegState::Implicit)
3803 .addRegMask(Mask: TRI.getTLSCallPreservedMask());
3804
3805 MIB.buildCopy(Res: I.getOperand(i: 0).getReg(), Op: Register(AArch64::X0));
3806 RBI.constrainGenericRegister(Reg: I.getOperand(i: 0).getReg(), RC: AArch64::GPR64RegClass,
3807 MRI);
3808 I.eraseFromParent();
3809 return true;
3810}
3811
3812MachineInstr *AArch64InstructionSelector::emitScalarToVector(
3813 unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
3814 MachineIRBuilder &MIRBuilder) const {
3815 auto Undef = MIRBuilder.buildInstr(Opc: TargetOpcode::IMPLICIT_DEF, DstOps: {DstRC}, SrcOps: {});
3816
3817 auto BuildFn = [&](unsigned SubregIndex) {
3818 auto Ins =
3819 MIRBuilder
3820 .buildInstr(Opc: TargetOpcode::INSERT_SUBREG, DstOps: {DstRC}, SrcOps: {Undef, Scalar})
3821 .addImm(Val: SubregIndex);
3822 constrainSelectedInstRegOperands(I&: *Undef, TII, TRI, RBI);
3823 constrainSelectedInstRegOperands(I&: *Ins, TII, TRI, RBI);
3824 return &*Ins;
3825 };
3826
3827 switch (EltSize) {
3828 case 8:
3829 return BuildFn(AArch64::bsub);
3830 case 16:
3831 return BuildFn(AArch64::hsub);
3832 case 32:
3833 return BuildFn(AArch64::ssub);
3834 case 64:
3835 return BuildFn(AArch64::dsub);
3836 default:
3837 return nullptr;
3838 }
3839}
3840
3841MachineInstr *
3842AArch64InstructionSelector::emitNarrowVector(Register DstReg, Register SrcReg,
3843 MachineIRBuilder &MIB,
3844 MachineRegisterInfo &MRI) const {
3845 LLT DstTy = MRI.getType(Reg: DstReg);
3846 const TargetRegisterClass *RC =
3847 getRegClassForTypeOnBank(Ty: DstTy, RB: *RBI.getRegBank(Reg: SrcReg, MRI, TRI));
3848 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
3849 LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
3850 return nullptr;
3851 }
3852 unsigned SubReg = 0;
3853 if (!getSubRegForClass(RC, TRI, SubReg))
3854 return nullptr;
3855 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
3856 LLVM_DEBUG(dbgs() << "Unsupported destination size! ("
3857 << DstTy.getSizeInBits() << "\n");
3858 return nullptr;
3859 }
3860 auto Copy = MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {DstReg}, SrcOps: {})
3861 .addReg(RegNo: SrcReg, Flags: {}, SubReg);
3862 RBI.constrainGenericRegister(Reg: DstReg, RC: *RC, MRI);
3863 return Copy;
3864}
3865
3866bool AArch64InstructionSelector::selectMergeValues(
3867 MachineInstr &I, MachineRegisterInfo &MRI) {
3868 assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode");
3869 const LLT DstTy = MRI.getType(Reg: I.getOperand(i: 0).getReg());
3870 const LLT SrcTy = MRI.getType(Reg: I.getOperand(i: 1).getReg());
3871 assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation");
3872 const RegisterBank &RB = *RBI.getRegBank(Reg: I.getOperand(i: 1).getReg(), MRI, TRI);
3873
3874 if (I.getNumOperands() != 3)
3875 return false;
3876
3877 // Merging 2 s64s into an s128.
3878 if (DstTy == LLT::scalar(SizeInBits: 128)) {
3879 if (SrcTy.getSizeInBits() != 64)
3880 return false;
3881 Register DstReg = I.getOperand(i: 0).getReg();
3882 Register Src1Reg = I.getOperand(i: 1).getReg();
3883 Register Src2Reg = I.getOperand(i: 2).getReg();
3884 auto Tmp = MIB.buildInstr(Opc: TargetOpcode::IMPLICIT_DEF, DstOps: {DstTy}, SrcOps: {});
3885 MachineInstr *InsMI = emitLaneInsert(DstReg: std::nullopt, SrcReg: Tmp.getReg(Idx: 0), EltReg: Src1Reg,
3886 /* LaneIdx */ 0, RB, MIRBuilder&: MIB);
3887 if (!InsMI)
3888 return false;
3889 MachineInstr *Ins2MI = emitLaneInsert(DstReg, SrcReg: InsMI->getOperand(i: 0).getReg(),
3890 EltReg: Src2Reg, /* LaneIdx */ 1, RB, MIRBuilder&: MIB);
3891 if (!Ins2MI)
3892 return false;
3893 constrainSelectedInstRegOperands(I&: *InsMI, TII, TRI, RBI);
3894 constrainSelectedInstRegOperands(I&: *Ins2MI, TII, TRI, RBI);
3895 I.eraseFromParent();
3896 return true;
3897 }
3898
3899 if (RB.getID() != AArch64::GPRRegBankID)
3900 return false;
3901
3902 if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
3903 return false;
3904
3905 auto *DstRC = &AArch64::GPR64RegClass;
3906 Register SubToRegDef = MRI.createVirtualRegister(RegClass: DstRC);
3907 MachineInstr &SubRegMI = *BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(),
3908 MCID: TII.get(Opcode: TargetOpcode::SUBREG_TO_REG))
3909 .addDef(RegNo: SubToRegDef)
3910 .addUse(RegNo: I.getOperand(i: 1).getReg())
3911 .addImm(Val: AArch64::sub_32);
3912 Register SubToRegDef2 = MRI.createVirtualRegister(RegClass: DstRC);
3913 // Need to anyext the second scalar before we can use bfm
3914 MachineInstr &SubRegMI2 = *BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(),
3915 MCID: TII.get(Opcode: TargetOpcode::SUBREG_TO_REG))
3916 .addDef(RegNo: SubToRegDef2)
3917 .addUse(RegNo: I.getOperand(i: 2).getReg())
3918 .addImm(Val: AArch64::sub_32);
3919 MachineInstr &BFM =
3920 *BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: AArch64::BFMXri))
3921 .addDef(RegNo: I.getOperand(i: 0).getReg())
3922 .addUse(RegNo: SubToRegDef)
3923 .addUse(RegNo: SubToRegDef2)
3924 .addImm(Val: 32)
3925 .addImm(Val: 31);
3926 constrainSelectedInstRegOperands(I&: SubRegMI, TII, TRI, RBI);
3927 constrainSelectedInstRegOperands(I&: SubRegMI2, TII, TRI, RBI);
3928 constrainSelectedInstRegOperands(I&: BFM, TII, TRI, RBI);
3929 I.eraseFromParent();
3930 return true;
3931}
3932
3933static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
3934 const unsigned EltSize) {
3935 // Choose a lane copy opcode and subregister based off of the size of the
3936 // vector's elements.
3937 switch (EltSize) {
3938 case 8:
3939 CopyOpc = AArch64::DUPi8;
3940 ExtractSubReg = AArch64::bsub;
3941 break;
3942 case 16:
3943 CopyOpc = AArch64::DUPi16;
3944 ExtractSubReg = AArch64::hsub;
3945 break;
3946 case 32:
3947 CopyOpc = AArch64::DUPi32;
3948 ExtractSubReg = AArch64::ssub;
3949 break;
3950 case 64:
3951 CopyOpc = AArch64::DUPi64;
3952 ExtractSubReg = AArch64::dsub;
3953 break;
3954 default:
3955 // Unknown size, bail out.
3956 LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n");
3957 return false;
3958 }
3959 return true;
3960}
3961
3962MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
3963 std::optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
3964 Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
3965 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3966 unsigned CopyOpc = 0;
3967 unsigned ExtractSubReg = 0;
3968 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, EltSize: ScalarTy.getSizeInBits())) {
3969 LLVM_DEBUG(
3970 dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
3971 return nullptr;
3972 }
3973
3974 const TargetRegisterClass *DstRC =
3975 getRegClassForTypeOnBank(Ty: ScalarTy, RB: DstRB, GetAllRegSet: true);
3976 if (!DstRC) {
3977 LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
3978 return nullptr;
3979 }
3980
3981 const RegisterBank &VecRB = *RBI.getRegBank(Reg: VecReg, MRI, TRI);
3982 const LLT &VecTy = MRI.getType(Reg: VecReg);
3983 const TargetRegisterClass *VecRC =
3984 getRegClassForTypeOnBank(Ty: VecTy, RB: VecRB, GetAllRegSet: true);
3985 if (!VecRC) {
3986 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
3987 return nullptr;
3988 }
3989
3990 // The register that we're going to copy into.
3991 Register InsertReg = VecReg;
3992 if (!DstReg)
3993 DstReg = MRI.createVirtualRegister(RegClass: DstRC);
3994 // If the lane index is 0, we just use a subregister COPY.
3995 if (LaneIdx == 0) {
3996 auto Copy = MIRBuilder.buildInstr(Opc: TargetOpcode::COPY, DstOps: {*DstReg}, SrcOps: {})
3997 .addReg(RegNo: VecReg, Flags: {}, SubReg: ExtractSubReg);
3998 RBI.constrainGenericRegister(Reg: *DstReg, RC: *DstRC, MRI);
3999 return &*Copy;
4000 }
4001
4002 // Lane copies require 128-bit wide registers. If we're dealing with an
4003 // unpacked vector, then we need to move up to that width. Insert an implicit
4004 // def and a subregister insert to get us there.
4005 if (VecTy.getSizeInBits() != 128) {
4006 MachineInstr *ScalarToVector = emitScalarToVector(
4007 EltSize: VecTy.getSizeInBits(), DstRC: &AArch64::FPR128RegClass, Scalar: VecReg, MIRBuilder);
4008 if (!ScalarToVector)
4009 return nullptr;
4010 InsertReg = ScalarToVector->getOperand(i: 0).getReg();
4011 }
4012
4013 MachineInstr *LaneCopyMI =
4014 MIRBuilder.buildInstr(Opc: CopyOpc, DstOps: {*DstReg}, SrcOps: {InsertReg}).addImm(Val: LaneIdx);
4015 constrainSelectedInstRegOperands(I&: *LaneCopyMI, TII, TRI, RBI);
4016
4017 // Make sure that we actually constrain the initial copy.
4018 RBI.constrainGenericRegister(Reg: *DstReg, RC: *DstRC, MRI);
4019 return LaneCopyMI;
4020}
4021
4022bool AArch64InstructionSelector::selectExtractElt(
4023 MachineInstr &I, MachineRegisterInfo &MRI) {
4024 assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
4025 "unexpected opcode!");
4026 Register DstReg = I.getOperand(i: 0).getReg();
4027 const LLT NarrowTy = MRI.getType(Reg: DstReg);
4028 const Register SrcReg = I.getOperand(i: 1).getReg();
4029 const LLT WideTy = MRI.getType(Reg: SrcReg);
4030 (void)WideTy;
4031 assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
4032 "source register size too small!");
4033 assert(!NarrowTy.isVector() && "cannot extract vector into vector!");
4034
4035 // Need the lane index to determine the correct copy opcode.
4036 MachineOperand &LaneIdxOp = I.getOperand(i: 2);
4037 assert(LaneIdxOp.isReg() && "Lane index operand was not a register?");
4038
4039 if (RBI.getRegBank(Reg: DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
4040 LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n");
4041 return false;
4042 }
4043
4044 // Find the index to extract from.
4045 auto VRegAndVal = getIConstantVRegValWithLookThrough(VReg: LaneIdxOp.getReg(), MRI);
4046 if (!VRegAndVal)
4047 return false;
4048 unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
4049
4050
4051 const RegisterBank &DstRB = *RBI.getRegBank(Reg: DstReg, MRI, TRI);
4052 MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, ScalarTy: NarrowTy, VecReg: SrcReg,
4053 LaneIdx, MIRBuilder&: MIB);
4054 if (!Extract)
4055 return false;
4056
4057 I.eraseFromParent();
4058 return true;
4059}
4060
4061bool AArch64InstructionSelector::selectSplitVectorUnmerge(
4062 MachineInstr &I, MachineRegisterInfo &MRI) {
4063 unsigned NumElts = I.getNumOperands() - 1;
4064 Register SrcReg = I.getOperand(i: NumElts).getReg();
4065 const LLT NarrowTy = MRI.getType(Reg: I.getOperand(i: 0).getReg());
4066 const LLT SrcTy = MRI.getType(Reg: SrcReg);
4067
4068 assert(NarrowTy.isVector() && "Expected an unmerge into vectors");
4069 if (SrcTy.getSizeInBits() > 128) {
4070 LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge");
4071 return false;
4072 }
4073
4074 // We implement a split vector operation by treating the sub-vectors as
4075 // scalars and extracting them.
4076 const RegisterBank &DstRB =
4077 *RBI.getRegBank(Reg: I.getOperand(i: 0).getReg(), MRI, TRI);
4078 for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
4079 Register Dst = I.getOperand(i: OpIdx).getReg();
4080 MachineInstr *Extract =
4081 emitExtractVectorElt(DstReg: Dst, DstRB, ScalarTy: NarrowTy, VecReg: SrcReg, LaneIdx: OpIdx, MIRBuilder&: MIB);
4082 if (!Extract)
4083 return false;
4084 }
4085 I.eraseFromParent();
4086 return true;
4087}
4088
4089bool AArch64InstructionSelector::selectUnmergeValues(MachineInstr &I,
4090 MachineRegisterInfo &MRI) {
4091 assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
4092 "unexpected opcode");
4093
4094 // TODO: Handle unmerging into GPRs and from scalars to scalars.
4095 if (RBI.getRegBank(Reg: I.getOperand(i: 0).getReg(), MRI, TRI)->getID() !=
4096 AArch64::FPRRegBankID ||
4097 RBI.getRegBank(Reg: I.getOperand(i: 1).getReg(), MRI, TRI)->getID() !=
4098 AArch64::FPRRegBankID) {
4099 LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
4100 "currently unsupported.\n");
4101 return false;
4102 }
4103
4104 // The last operand is the vector source register, and every other operand is
4105 // a register to unpack into.
4106 unsigned NumElts = I.getNumOperands() - 1;
4107 Register SrcReg = I.getOperand(i: NumElts).getReg();
4108 const LLT NarrowTy = MRI.getType(Reg: I.getOperand(i: 0).getReg());
4109 const LLT WideTy = MRI.getType(Reg: SrcReg);
4110
4111 assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
4112 "source register size too small!");
4113
4114 if (!NarrowTy.isScalar())
4115 return selectSplitVectorUnmerge(I, MRI);
4116
4117 // Choose a lane copy opcode and subregister based off of the size of the
4118 // vector's elements.
4119 unsigned CopyOpc = 0;
4120 unsigned ExtractSubReg = 0;
4121 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, EltSize: NarrowTy.getSizeInBits()))
4122 return false;
4123
4124 // Set up for the lane copies.
4125 MachineBasicBlock &MBB = *I.getParent();
4126
4127 // Stores the registers we'll be copying from.
4128 SmallVector<Register, 4> InsertRegs;
4129
4130 // We'll use the first register twice, so we only need NumElts-1 registers.
4131 unsigned NumInsertRegs = NumElts - 1;
4132
4133 // If our elements fit into exactly 128 bits, then we can copy from the source
4134 // directly. Otherwise, we need to do a bit of setup with some subregister
4135 // inserts.
4136 if (NarrowTy.getSizeInBits() * NumElts == 128) {
4137 InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);
4138 } else {
4139 // No. We have to perform subregister inserts. For each insert, create an
4140 // implicit def and a subregister insert, and save the register we create.
4141 // For scalar sources, treat as a pseudo-vector of NarrowTy elements.
4142 unsigned EltSize = WideTy.isVector() ? WideTy.getScalarSizeInBits()
4143 : NarrowTy.getSizeInBits();
4144 const TargetRegisterClass *RC = getRegClassForTypeOnBank(
4145 Ty: LLT::fixed_vector(NumElements: NumElts, ScalarSizeInBits: EltSize), RB: *RBI.getRegBank(Reg: SrcReg, MRI, TRI));
4146 unsigned SubReg = 0;
4147 bool Found = getSubRegForClass(RC, TRI, SubReg);
4148 (void)Found;
4149 assert(Found && "expected to find last operand's subeg idx");
4150 for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
4151 Register ImpDefReg = MRI.createVirtualRegister(RegClass: &AArch64::FPR128RegClass);
4152 MachineInstr &ImpDefMI =
4153 *BuildMI(BB&: MBB, I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: TargetOpcode::IMPLICIT_DEF),
4154 DestReg: ImpDefReg);
4155
4156 // Now, create the subregister insert from SrcReg.
4157 Register InsertReg = MRI.createVirtualRegister(RegClass: &AArch64::FPR128RegClass);
4158 MachineInstr &InsMI =
4159 *BuildMI(BB&: MBB, I, MIMD: I.getDebugLoc(),
4160 MCID: TII.get(Opcode: TargetOpcode::INSERT_SUBREG), DestReg: InsertReg)
4161 .addUse(RegNo: ImpDefReg)
4162 .addUse(RegNo: SrcReg)
4163 .addImm(Val: SubReg);
4164
4165 constrainSelectedInstRegOperands(I&: ImpDefMI, TII, TRI, RBI);
4166 constrainSelectedInstRegOperands(I&: InsMI, TII, TRI, RBI);
4167
4168 // Save the register so that we can copy from it after.
4169 InsertRegs.push_back(Elt: InsertReg);
4170 }
4171 }
4172
4173 // Now that we've created any necessary subregister inserts, we can
4174 // create the copies.
4175 //
4176 // Perform the first copy separately as a subregister copy.
4177 Register CopyTo = I.getOperand(i: 0).getReg();
4178 auto FirstCopy = MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {CopyTo}, SrcOps: {})
4179 .addReg(RegNo: InsertRegs[0], Flags: {}, SubReg: ExtractSubReg);
4180 constrainSelectedInstRegOperands(I&: *FirstCopy, TII, TRI, RBI);
4181
4182 // Now, perform the remaining copies as vector lane copies.
4183 unsigned LaneIdx = 1;
4184 for (Register InsReg : InsertRegs) {
4185 Register CopyTo = I.getOperand(i: LaneIdx).getReg();
4186 MachineInstr &CopyInst =
4187 *BuildMI(BB&: MBB, I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: CopyOpc), DestReg: CopyTo)
4188 .addUse(RegNo: InsReg)
4189 .addImm(Val: LaneIdx);
4190 constrainSelectedInstRegOperands(I&: CopyInst, TII, TRI, RBI);
4191 ++LaneIdx;
4192 }
4193
4194 // Separately constrain the first copy's destination. Because of the
4195 // limitation in constrainOperandRegClass, we can't guarantee that this will
4196 // actually be constrained. So, do it ourselves using the second operand.
4197 const TargetRegisterClass *RC =
4198 MRI.getRegClassOrNull(Reg: I.getOperand(i: 1).getReg());
4199 if (!RC) {
4200 LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n");
4201 return false;
4202 }
4203
4204 RBI.constrainGenericRegister(Reg: CopyTo, RC: *RC, MRI);
4205 I.eraseFromParent();
4206 return true;
4207}
4208
4209bool AArch64InstructionSelector::selectConcatVectors(
4210 MachineInstr &I, MachineRegisterInfo &MRI) {
4211 assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
4212 "Unexpected opcode");
4213 Register Dst = I.getOperand(i: 0).getReg();
4214 Register Op1 = I.getOperand(i: 1).getReg();
4215 Register Op2 = I.getOperand(i: 2).getReg();
4216 MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIRBuilder&: MIB);
4217 if (!ConcatMI)
4218 return false;
4219 I.eraseFromParent();
4220 return true;
4221}
4222
4223unsigned
4224AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal,
4225 MachineFunction &MF) const {
4226 Type *CPTy = CPVal->getType();
4227 Align Alignment = MF.getDataLayout().getPrefTypeAlign(Ty: CPTy);
4228
4229 MachineConstantPool *MCP = MF.getConstantPool();
4230 return MCP->getConstantPoolIndex(C: CPVal, Alignment);
4231}
4232
4233MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
4234 const Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
4235 const TargetRegisterClass *RC;
4236 unsigned Opc;
4237 bool IsTiny = TM.getCodeModel() == CodeModel::Tiny;
4238 unsigned Size = MIRBuilder.getDataLayout().getTypeStoreSize(Ty: CPVal->getType());
4239 switch (Size) {
4240 case 16:
4241 RC = &AArch64::FPR128RegClass;
4242 Opc = IsTiny ? AArch64::LDRQl : AArch64::LDRQui;
4243 break;
4244 case 8:
4245 RC = &AArch64::FPR64RegClass;
4246 Opc = IsTiny ? AArch64::LDRDl : AArch64::LDRDui;
4247 break;
4248 case 4:
4249 RC = &AArch64::FPR32RegClass;
4250 Opc = IsTiny ? AArch64::LDRSl : AArch64::LDRSui;
4251 break;
4252 case 2:
4253 RC = &AArch64::FPR16RegClass;
4254 Opc = AArch64::LDRHui;
4255 break;
4256 default:
4257 LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
4258 << *CPVal->getType());
4259 return nullptr;
4260 }
4261
4262 MachineInstr *LoadMI = nullptr;
4263 auto &MF = MIRBuilder.getMF();
4264 unsigned CPIdx = emitConstantPoolEntry(CPVal, MF);
4265 if (IsTiny && (Size == 16 || Size == 8 || Size == 4)) {
4266 // Use load(literal) for tiny code model.
4267 LoadMI = &*MIRBuilder.buildInstr(Opc, DstOps: {RC}, SrcOps: {}).addConstantPoolIndex(Idx: CPIdx);
4268 } else {
4269 auto Adrp =
4270 MIRBuilder.buildInstr(Opc: AArch64::ADRP, DstOps: {&AArch64::GPR64RegClass}, SrcOps: {})
4271 .addConstantPoolIndex(Idx: CPIdx, Offset: 0, TargetFlags: AArch64II::MO_PAGE);
4272
4273 LoadMI = &*MIRBuilder.buildInstr(Opc, DstOps: {RC}, SrcOps: {Adrp})
4274 .addConstantPoolIndex(
4275 Idx: CPIdx, Offset: 0, TargetFlags: AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4276
4277 constrainSelectedInstRegOperands(I&: *Adrp, TII, TRI, RBI);
4278 }
4279
4280 MachinePointerInfo PtrInfo = MachinePointerInfo::getConstantPool(MF);
4281 LoadMI->addMemOperand(MF, MO: MF.getMachineMemOperand(PtrInfo,
4282 F: MachineMemOperand::MOLoad,
4283 Size, BaseAlignment: Align(Size)));
4284 constrainSelectedInstRegOperands(I&: *LoadMI, TII, TRI, RBI);
4285 return LoadMI;
4286}
4287
4288/// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
4289/// size and RB.
4290static std::pair<unsigned, unsigned>
4291getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
4292 unsigned Opc, SubregIdx;
4293 if (RB.getID() == AArch64::GPRRegBankID) {
4294 if (EltSize == 8) {
4295 Opc = AArch64::INSvi8gpr;
4296 SubregIdx = AArch64::bsub;
4297 } else if (EltSize == 16) {
4298 Opc = AArch64::INSvi16gpr;
4299 SubregIdx = AArch64::ssub;
4300 } else if (EltSize == 32) {
4301 Opc = AArch64::INSvi32gpr;
4302 SubregIdx = AArch64::ssub;
4303 } else if (EltSize == 64) {
4304 Opc = AArch64::INSvi64gpr;
4305 SubregIdx = AArch64::dsub;
4306 } else {
4307 llvm_unreachable("invalid elt size!");
4308 }
4309 } else {
4310 if (EltSize == 8) {
4311 Opc = AArch64::INSvi8lane;
4312 SubregIdx = AArch64::bsub;
4313 } else if (EltSize == 16) {
4314 Opc = AArch64::INSvi16lane;
4315 SubregIdx = AArch64::hsub;
4316 } else if (EltSize == 32) {
4317 Opc = AArch64::INSvi32lane;
4318 SubregIdx = AArch64::ssub;
4319 } else if (EltSize == 64) {
4320 Opc = AArch64::INSvi64lane;
4321 SubregIdx = AArch64::dsub;
4322 } else {
4323 llvm_unreachable("invalid elt size!");
4324 }
4325 }
4326 return std::make_pair(x&: Opc, y&: SubregIdx);
4327}
4328
4329MachineInstr *AArch64InstructionSelector::emitInstr(
4330 unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
4331 std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder,
4332 const ComplexRendererFns &RenderFns) const {
4333 assert(Opcode && "Expected an opcode?");
4334 assert(!isPreISelGenericOpcode(Opcode) &&
4335 "Function should only be used to produce selected instructions!");
4336 auto MI = MIRBuilder.buildInstr(Opc: Opcode, DstOps, SrcOps);
4337 if (RenderFns)
4338 for (auto &Fn : *RenderFns)
4339 Fn(MI);
4340 constrainSelectedInstRegOperands(I&: *MI, TII, TRI, RBI);
4341 return &*MI;
4342}
4343
4344MachineInstr *AArch64InstructionSelector::emitAddSub(
4345 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
4346 Register Dst, MachineOperand &LHS, MachineOperand &RHS,
4347 MachineIRBuilder &MIRBuilder) const {
4348 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4349 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4350 auto Ty = MRI.getType(Reg: LHS.getReg());
4351 assert(!Ty.isVector() && "Expected a scalar or pointer?");
4352 unsigned Size = Ty.getSizeInBits();
4353 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only");
4354 bool Is32Bit = Size == 32;
4355
4356 // INSTRri form with positive arithmetic immediate.
4357 if (auto Fns = selectArithImmed(Root&: RHS))
4358 return emitInstr(Opcode: AddrModeAndSizeToOpcode[0][Is32Bit], DstOps: {Dst}, SrcOps: {LHS},
4359 MIRBuilder, RenderFns: Fns);
4360
4361 // INSTRri form with negative arithmetic immediate.
4362 if (auto Fns = selectNegArithImmed(Root&: RHS))
4363 return emitInstr(Opcode: AddrModeAndSizeToOpcode[3][Is32Bit], DstOps: {Dst}, SrcOps: {LHS},
4364 MIRBuilder, RenderFns: Fns);
4365
4366 // INSTRrx form.
4367 if (auto Fns = selectArithExtendedRegister(Root&: RHS))
4368 return emitInstr(Opcode: AddrModeAndSizeToOpcode[4][Is32Bit], DstOps: {Dst}, SrcOps: {LHS},
4369 MIRBuilder, RenderFns: Fns);
4370
4371 // INSTRrs form.
4372 if (auto Fns = selectShiftedRegister(Root&: RHS))
4373 return emitInstr(Opcode: AddrModeAndSizeToOpcode[1][Is32Bit], DstOps: {Dst}, SrcOps: {LHS},
4374 MIRBuilder, RenderFns: Fns);
4375 return emitInstr(Opcode: AddrModeAndSizeToOpcode[2][Is32Bit], DstOps: {Dst}, SrcOps: {LHS, RHS},
4376 MIRBuilder);
4377}
4378
4379MachineInstr *
4380AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
4381 MachineOperand &RHS,
4382 MachineIRBuilder &MIRBuilder) const {
4383 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4384 ._M_elems: {{AArch64::ADDXri, AArch64::ADDWri},
4385 {AArch64::ADDXrs, AArch64::ADDWrs},
4386 {AArch64::ADDXrr, AArch64::ADDWrr},
4387 {AArch64::SUBXri, AArch64::SUBWri},
4388 {AArch64::ADDXrx, AArch64::ADDWrx}}};
4389 return emitAddSub(AddrModeAndSizeToOpcode: OpcTable, Dst: DefReg, LHS, RHS, MIRBuilder);
4390}
4391
4392MachineInstr *
4393AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS,
4394 MachineOperand &RHS,
4395 MachineIRBuilder &MIRBuilder) const {
4396 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4397 ._M_elems: {{AArch64::ADDSXri, AArch64::ADDSWri},
4398 {AArch64::ADDSXrs, AArch64::ADDSWrs},
4399 {AArch64::ADDSXrr, AArch64::ADDSWrr},
4400 {AArch64::SUBSXri, AArch64::SUBSWri},
4401 {AArch64::ADDSXrx, AArch64::ADDSWrx}}};
4402 return emitAddSub(AddrModeAndSizeToOpcode: OpcTable, Dst, LHS, RHS, MIRBuilder);
4403}
4404
4405MachineInstr *
4406AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS,
4407 MachineOperand &RHS,
4408 MachineIRBuilder &MIRBuilder) const {
4409 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4410 ._M_elems: {{AArch64::SUBSXri, AArch64::SUBSWri},
4411 {AArch64::SUBSXrs, AArch64::SUBSWrs},
4412 {AArch64::SUBSXrr, AArch64::SUBSWrr},
4413 {AArch64::ADDSXri, AArch64::ADDSWri},
4414 {AArch64::SUBSXrx, AArch64::SUBSWrx}}};
4415 return emitAddSub(AddrModeAndSizeToOpcode: OpcTable, Dst, LHS, RHS, MIRBuilder);
4416}
4417
4418MachineInstr *
4419AArch64InstructionSelector::emitADCS(Register Dst, MachineOperand &LHS,
4420 MachineOperand &RHS,
4421 MachineIRBuilder &MIRBuilder) const {
4422 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4423 MachineRegisterInfo *MRI = MIRBuilder.getMRI();
4424 bool Is32Bit = (MRI->getType(Reg: LHS.getReg()).getSizeInBits() == 32);
4425 static const unsigned OpcTable[2] = {AArch64::ADCSXr, AArch64::ADCSWr};
4426 return emitInstr(Opcode: OpcTable[Is32Bit], DstOps: {Dst}, SrcOps: {LHS, RHS}, MIRBuilder);
4427}
4428
4429MachineInstr *
4430AArch64InstructionSelector::emitSBCS(Register Dst, MachineOperand &LHS,
4431 MachineOperand &RHS,
4432 MachineIRBuilder &MIRBuilder) const {
4433 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4434 MachineRegisterInfo *MRI = MIRBuilder.getMRI();
4435 bool Is32Bit = (MRI->getType(Reg: LHS.getReg()).getSizeInBits() == 32);
4436 static const unsigned OpcTable[2] = {AArch64::SBCSXr, AArch64::SBCSWr};
4437 return emitInstr(Opcode: OpcTable[Is32Bit], DstOps: {Dst}, SrcOps: {LHS, RHS}, MIRBuilder);
4438}
4439
4440MachineInstr *
4441AArch64InstructionSelector::emitCMP(MachineOperand &LHS, MachineOperand &RHS,
4442 MachineIRBuilder &MIRBuilder) const {
4443 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4444 bool Is32Bit = MRI.getType(Reg: LHS.getReg()).getSizeInBits() == 32;
4445 auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
4446 return emitSUBS(Dst: MRI.createVirtualRegister(RegClass: RC), LHS, RHS, MIRBuilder);
4447}
4448
4449MachineInstr *
4450AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
4451 MachineIRBuilder &MIRBuilder) const {
4452 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4453 bool Is32Bit = (MRI.getType(Reg: LHS.getReg()).getSizeInBits() == 32);
4454 auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
4455 return emitADDS(Dst: MRI.createVirtualRegister(RegClass: RC), LHS, RHS, MIRBuilder);
4456}
4457
4458MachineInstr *
4459AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS,
4460 MachineIRBuilder &MIRBuilder) const {
4461 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4462 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4463 LLT Ty = MRI.getType(Reg: LHS.getReg());
4464 unsigned RegSize = Ty.getSizeInBits();
4465 bool Is32Bit = (RegSize == 32);
4466 const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri},
4467 {AArch64::ANDSXrs, AArch64::ANDSWrs},
4468 {AArch64::ANDSXrr, AArch64::ANDSWrr}};
4469 // ANDS needs a logical immediate for its immediate form. Check if we can
4470 // fold one in.
4471 if (auto ValAndVReg = getIConstantVRegValWithLookThrough(VReg: RHS.getReg(), MRI)) {
4472 int64_t Imm = ValAndVReg->Value.getSExtValue();
4473
4474 if (AArch64_AM::isLogicalImmediate(imm: Imm, regSize: RegSize)) {
4475 auto TstMI = MIRBuilder.buildInstr(Opc: OpcTable[0][Is32Bit], DstOps: {Ty}, SrcOps: {LHS});
4476 TstMI.addImm(Val: AArch64_AM::encodeLogicalImmediate(imm: Imm, regSize: RegSize));
4477 constrainSelectedInstRegOperands(I&: *TstMI, TII, TRI, RBI);
4478 return &*TstMI;
4479 }
4480 }
4481
4482 if (auto Fns = selectLogicalShiftedRegister(Root&: RHS))
4483 return emitInstr(Opcode: OpcTable[1][Is32Bit], DstOps: {Ty}, SrcOps: {LHS}, MIRBuilder, RenderFns: Fns);
4484 return emitInstr(Opcode: OpcTable[2][Is32Bit], DstOps: {Ty}, SrcOps: {LHS, RHS}, MIRBuilder);
4485}
4486
4487MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
4488 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
4489 MachineIRBuilder &MIRBuilder) const {
4490 assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
4491 assert(Predicate.isPredicate() && "Expected predicate?");
4492 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4493 LLT CmpTy = MRI.getType(Reg: LHS.getReg());
4494 assert(!CmpTy.isVector() && "Expected scalar or pointer");
4495 unsigned Size = CmpTy.getSizeInBits();
4496 (void)Size;
4497 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?");
4498 // Fold the compare into a cmn or tst if possible.
4499 if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))
4500 return FoldCmp;
4501 return emitCMP(LHS, RHS, MIRBuilder);
4502}
4503
4504MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
4505 Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const {
4506 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4507#ifndef NDEBUG
4508 LLT Ty = MRI.getType(Dst);
4509 assert(!Ty.isVector() && Ty.getSizeInBits() == 32 &&
4510 "Expected a 32-bit scalar register?");
4511#endif
4512 const Register ZReg = AArch64::WZR;
4513 AArch64CC::CondCode CC1, CC2;
4514 changeFCMPPredToAArch64CC(P: Pred, CondCode&: CC1, CondCode2&: CC2);
4515 auto InvCC1 = AArch64CC::getInvertedCondCode(Code: CC1);
4516 if (CC2 == AArch64CC::AL)
4517 return emitCSINC(/*Dst=*/Dst, /*Src1=*/ZReg, /*Src2=*/ZReg, Pred: InvCC1,
4518 MIRBuilder);
4519 const TargetRegisterClass *RC = &AArch64::GPR32RegClass;
4520 Register Def1Reg = MRI.createVirtualRegister(RegClass: RC);
4521 Register Def2Reg = MRI.createVirtualRegister(RegClass: RC);
4522 auto InvCC2 = AArch64CC::getInvertedCondCode(Code: CC2);
4523 emitCSINC(/*Dst=*/Def1Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, Pred: InvCC1, MIRBuilder);
4524 emitCSINC(/*Dst=*/Def2Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, Pred: InvCC2, MIRBuilder);
4525 auto OrMI = MIRBuilder.buildInstr(Opc: AArch64::ORRWrr, DstOps: {Dst}, SrcOps: {Def1Reg, Def2Reg});
4526 constrainSelectedInstRegOperands(I&: *OrMI, TII, TRI, RBI);
4527 return &*OrMI;
4528}
4529
4530MachineInstr *AArch64InstructionSelector::emitFPCompare(
4531 Register LHS, Register RHS, MachineIRBuilder &MIRBuilder,
4532 std::optional<CmpInst::Predicate> Pred) const {
4533 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4534 LLT Ty = MRI.getType(Reg: LHS);
4535 if (Ty.isVector())
4536 return nullptr;
4537 unsigned OpSize = Ty.getSizeInBits();
4538 assert(OpSize == 16 || OpSize == 32 || OpSize == 64);
4539
4540 // If this is a compare against +0.0, then we don't have
4541 // to explicitly materialize a constant.
4542 const ConstantFP *FPImm = getConstantFPVRegVal(VReg: RHS, MRI);
4543 bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
4544
4545 auto IsEqualityPred = [](CmpInst::Predicate P) {
4546 return P == CmpInst::FCMP_OEQ || P == CmpInst::FCMP_ONE ||
4547 P == CmpInst::FCMP_UEQ || P == CmpInst::FCMP_UNE;
4548 };
4549 if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) {
4550 // Try commutating the operands.
4551 const ConstantFP *LHSImm = getConstantFPVRegVal(VReg: LHS, MRI);
4552 if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) {
4553 ShouldUseImm = true;
4554 std::swap(a&: LHS, b&: RHS);
4555 }
4556 }
4557 unsigned CmpOpcTbl[2][3] = {
4558 {AArch64::FCMPHrr, AArch64::FCMPSrr, AArch64::FCMPDrr},
4559 {AArch64::FCMPHri, AArch64::FCMPSri, AArch64::FCMPDri}};
4560 unsigned CmpOpc =
4561 CmpOpcTbl[ShouldUseImm][OpSize == 16 ? 0 : (OpSize == 32 ? 1 : 2)];
4562
4563 // Partially build the compare. Decide if we need to add a use for the
4564 // third operand based off whether or not we're comparing against 0.0.
4565 auto CmpMI = MIRBuilder.buildInstr(Opcode: CmpOpc).addUse(RegNo: LHS);
4566 CmpMI.setMIFlags(MachineInstr::NoFPExcept);
4567 if (!ShouldUseImm)
4568 CmpMI.addUse(RegNo: RHS);
4569 constrainSelectedInstRegOperands(I&: *CmpMI, TII, TRI, RBI);
4570 return &*CmpMI;
4571}
4572
4573MachineInstr *AArch64InstructionSelector::emitVectorConcat(
4574 std::optional<Register> Dst, Register Op1, Register Op2,
4575 MachineIRBuilder &MIRBuilder) const {
4576 // We implement a vector concat by:
4577 // 1. Use scalar_to_vector to insert the lower vector into the larger dest
4578 // 2. Insert the upper vector into the destination's upper element
4579 // TODO: some of this code is common with G_BUILD_VECTOR handling.
4580 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4581
4582 const LLT Op1Ty = MRI.getType(Reg: Op1);
4583 const LLT Op2Ty = MRI.getType(Reg: Op2);
4584
4585 if (Op1Ty != Op2Ty) {
4586 LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys");
4587 return nullptr;
4588 }
4589 assert(Op1Ty.isVector() && "Expected a vector for vector concat");
4590
4591 if (Op1Ty.getSizeInBits() >= 128) {
4592 LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors");
4593 return nullptr;
4594 }
4595
4596 // At the moment we just support 64 bit vector concats.
4597 if (Op1Ty.getSizeInBits() != 64) {
4598 LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors");
4599 return nullptr;
4600 }
4601
4602 const LLT ScalarTy = LLT::scalar(SizeInBits: Op1Ty.getSizeInBits());
4603 const RegisterBank &FPRBank = *RBI.getRegBank(Reg: Op1, MRI, TRI);
4604 const TargetRegisterClass *DstRC =
4605 getRegClassForTypeOnBank(Ty: Op1Ty.multiplyElements(Factor: 2), RB: FPRBank);
4606
4607 MachineInstr *WidenedOp1 =
4608 emitScalarToVector(EltSize: ScalarTy.getSizeInBits(), DstRC, Scalar: Op1, MIRBuilder);
4609 MachineInstr *WidenedOp2 =
4610 emitScalarToVector(EltSize: ScalarTy.getSizeInBits(), DstRC, Scalar: Op2, MIRBuilder);
4611 if (!WidenedOp1 || !WidenedOp2) {
4612 LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value");
4613 return nullptr;
4614 }
4615
4616 // Now do the insert of the upper element.
4617 unsigned InsertOpc, InsSubRegIdx;
4618 std::tie(args&: InsertOpc, args&: InsSubRegIdx) =
4619 getInsertVecEltOpInfo(RB: FPRBank, EltSize: ScalarTy.getSizeInBits());
4620
4621 if (!Dst)
4622 Dst = MRI.createVirtualRegister(RegClass: DstRC);
4623 auto InsElt =
4624 MIRBuilder
4625 .buildInstr(Opc: InsertOpc, DstOps: {*Dst}, SrcOps: {WidenedOp1->getOperand(i: 0).getReg()})
4626 .addImm(Val: 1) /* Lane index */
4627 .addUse(RegNo: WidenedOp2->getOperand(i: 0).getReg())
4628 .addImm(Val: 0);
4629 constrainSelectedInstRegOperands(I&: *InsElt, TII, TRI, RBI);
4630 return &*InsElt;
4631}
4632
4633MachineInstr *
4634AArch64InstructionSelector::emitCSINC(Register Dst, Register Src1,
4635 Register Src2, AArch64CC::CondCode Pred,
4636 MachineIRBuilder &MIRBuilder) const {
4637 auto &MRI = *MIRBuilder.getMRI();
4638 const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg: Dst);
4639 // If we used a register class, then this won't necessarily have an LLT.
4640 // Compute the size based off whether or not we have a class or bank.
4641 unsigned Size;
4642 if (const auto *RC = dyn_cast<const TargetRegisterClass *>(Val: RegClassOrBank))
4643 Size = TRI.getRegSizeInBits(RC: *RC);
4644 else
4645 Size = MRI.getType(Reg: Dst).getSizeInBits();
4646 // Some opcodes use s1.
4647 assert(Size <= 64 && "Expected 64 bits or less only!");
4648 static const unsigned OpcTable[2] = {AArch64::CSINCWr, AArch64::CSINCXr};
4649 unsigned Opc = OpcTable[Size == 64];
4650 auto CSINC = MIRBuilder.buildInstr(Opc, DstOps: {Dst}, SrcOps: {Src1, Src2}).addImm(Val: Pred);
4651 constrainSelectedInstRegOperands(I&: *CSINC, TII, TRI, RBI);
4652 return &*CSINC;
4653}
4654
4655MachineInstr *AArch64InstructionSelector::emitCarryIn(MachineInstr &I,
4656 Register CarryReg) {
4657 MachineRegisterInfo *MRI = MIB.getMRI();
4658 unsigned Opcode = I.getOpcode();
4659
4660 // If the instruction is a SUB, we need to negate the carry,
4661 // because borrowing is indicated by carry-flag == 0.
4662 bool NeedsNegatedCarry =
4663 (Opcode == TargetOpcode::G_USUBE || Opcode == TargetOpcode::G_SSUBE);
4664
4665 // If the previous instruction will already produce the correct carry, do not
4666 // emit a carry generating instruction. E.g. for G_UADDE/G_USUBE sequences
4667 // generated during legalization of wide add/sub. This optimization depends on
4668 // these sequences not being interrupted by other instructions.
4669 // We have to select the previous instruction before the carry-using
4670 // instruction is deleted by the calling function, otherwise the previous
4671 // instruction might become dead and would get deleted.
4672 MachineInstr *SrcMI = MRI->getVRegDef(Reg: CarryReg);
4673 if (SrcMI == I.getPrevNode()) {
4674 if (auto *CarrySrcMI = dyn_cast<GAddSubCarryOut>(Val: SrcMI)) {
4675 bool ProducesNegatedCarry = CarrySrcMI->isSub();
4676 if (NeedsNegatedCarry == ProducesNegatedCarry &&
4677 CarrySrcMI->isUnsigned() &&
4678 CarrySrcMI->getCarryOutReg() == CarryReg &&
4679 selectAndRestoreState(I&: *SrcMI))
4680 return nullptr;
4681 }
4682 }
4683
4684 Register DeadReg = MRI->createVirtualRegister(RegClass: &AArch64::GPR32RegClass);
4685
4686 if (NeedsNegatedCarry) {
4687 // (0 - Carry) sets !C in NZCV when Carry == 1
4688 Register ZReg = AArch64::WZR;
4689 return emitInstr(Opcode: AArch64::SUBSWrr, DstOps: {DeadReg}, SrcOps: {ZReg, CarryReg}, MIRBuilder&: MIB);
4690 }
4691
4692 // (Carry - 1) sets !C in NZCV when Carry == 0
4693 auto Fns = select12BitValueWithLeftShift(Immed: 1);
4694 return emitInstr(Opcode: AArch64::SUBSWri, DstOps: {DeadReg}, SrcOps: {CarryReg}, MIRBuilder&: MIB, RenderFns: Fns);
4695}
4696
4697bool AArch64InstructionSelector::selectOverflowOp(MachineInstr &I,
4698 MachineRegisterInfo &MRI) {
4699 auto &CarryMI = cast<GAddSubCarryOut>(Val&: I);
4700
4701 if (auto *CarryInMI = dyn_cast<GAddSubCarryInOut>(Val: &I)) {
4702 // Set NZCV carry according to carry-in VReg
4703 emitCarryIn(I, CarryReg: CarryInMI->getCarryInReg());
4704 }
4705
4706 // Emit the operation and get the correct condition code.
4707 auto OpAndCC = emitOverflowOp(Opcode: I.getOpcode(), Dst: CarryMI.getDstReg(),
4708 LHS&: CarryMI.getLHS(), RHS&: CarryMI.getRHS(), MIRBuilder&: MIB);
4709
4710 Register CarryOutReg = CarryMI.getCarryOutReg();
4711
4712 // Don't convert carry-out to VReg if it is never used
4713 if (!MRI.use_nodbg_empty(RegNo: CarryOutReg)) {
4714 // Now, put the overflow result in the register given by the first operand
4715 // to the overflow op. CSINC increments the result when the predicate is
4716 // false, so to get the increment when it's true, we need to use the
4717 // inverse. In this case, we want to increment when carry is set.
4718 Register ZReg = AArch64::WZR;
4719 emitCSINC(/*Dst=*/CarryOutReg, /*Src1=*/ZReg, /*Src2=*/ZReg,
4720 Pred: getInvertedCondCode(Code: OpAndCC.second), MIRBuilder&: MIB);
4721 }
4722
4723 I.eraseFromParent();
4724 return true;
4725}
4726
4727std::pair<MachineInstr *, AArch64CC::CondCode>
4728AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst,
4729 MachineOperand &LHS,
4730 MachineOperand &RHS,
4731 MachineIRBuilder &MIRBuilder) const {
4732 switch (Opcode) {
4733 default:
4734 llvm_unreachable("Unexpected opcode!");
4735 case TargetOpcode::G_SADDO:
4736 return std::make_pair(x: emitADDS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::VS);
4737 case TargetOpcode::G_UADDO:
4738 return std::make_pair(x: emitADDS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::HS);
4739 case TargetOpcode::G_SSUBO:
4740 return std::make_pair(x: emitSUBS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::VS);
4741 case TargetOpcode::G_USUBO:
4742 return std::make_pair(x: emitSUBS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::LO);
4743 case TargetOpcode::G_SADDE:
4744 return std::make_pair(x: emitADCS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::VS);
4745 case TargetOpcode::G_UADDE:
4746 return std::make_pair(x: emitADCS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::HS);
4747 case TargetOpcode::G_SSUBE:
4748 return std::make_pair(x: emitSBCS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::VS);
4749 case TargetOpcode::G_USUBE:
4750 return std::make_pair(x: emitSBCS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::LO);
4751 }
4752}
4753
4754/// Returns true if @p Val is a tree of AND/OR/CMP operations that can be
4755/// expressed as a conjunction.
4756/// \param CanNegate Set to true if we can negate the whole sub-tree just by
4757/// changing the conditions on the CMP tests.
4758/// (this means we can call emitConjunctionRec() with
4759/// Negate==true on this sub-tree)
4760/// \param MustBeFirst Set to true if this subtree needs to be negated and we
4761/// cannot do the negation naturally. We are required to
4762/// emit the subtree first in this case.
4763/// \param WillNegate Is true if are called when the result of this
4764/// subexpression must be negated. This happens when the
4765/// outer expression is an OR. We can use this fact to know
4766/// that we have a double negation (or (or ...) ...) that
4767/// can be implemented for free.
4768static bool canEmitConjunction(Register Val, bool &CanNegate, bool &MustBeFirst,
4769 bool WillNegate, MachineRegisterInfo &MRI,
4770 unsigned Depth = 0) {
4771 if (!MRI.hasOneNonDBGUse(RegNo: Val))
4772 return false;
4773 MachineInstr *ValDef = MRI.getVRegDef(Reg: Val);
4774 unsigned Opcode = ValDef->getOpcode();
4775 if (isa<GAnyCmp>(Val: ValDef)) {
4776 CanNegate = true;
4777 MustBeFirst = false;
4778 return true;
4779 }
4780 // Protect against exponential runtime and stack overflow.
4781 if (Depth > 6)
4782 return false;
4783 if (Opcode == TargetOpcode::G_AND || Opcode == TargetOpcode::G_OR) {
4784 bool IsOR = Opcode == TargetOpcode::G_OR;
4785 Register O0 = ValDef->getOperand(i: 1).getReg();
4786 Register O1 = ValDef->getOperand(i: 2).getReg();
4787 bool CanNegateL;
4788 bool MustBeFirstL;
4789 if (!canEmitConjunction(Val: O0, CanNegate&: CanNegateL, MustBeFirst&: MustBeFirstL, WillNegate: IsOR, MRI, Depth: Depth + 1))
4790 return false;
4791 bool CanNegateR;
4792 bool MustBeFirstR;
4793 if (!canEmitConjunction(Val: O1, CanNegate&: CanNegateR, MustBeFirst&: MustBeFirstR, WillNegate: IsOR, MRI, Depth: Depth + 1))
4794 return false;
4795
4796 if (MustBeFirstL && MustBeFirstR)
4797 return false;
4798
4799 if (IsOR) {
4800 // For an OR expression we need to be able to naturally negate at least
4801 // one side or we cannot do the transformation at all.
4802 if (!CanNegateL && !CanNegateR)
4803 return false;
4804 // If we the result of the OR will be negated and we can naturally negate
4805 // the leaves, then this sub-tree as a whole negates naturally.
4806 CanNegate = WillNegate && CanNegateL && CanNegateR;
4807 // If we cannot naturally negate the whole sub-tree, then this must be
4808 // emitted first.
4809 MustBeFirst = !CanNegate;
4810 } else {
4811 assert(Opcode == TargetOpcode::G_AND && "Must be G_AND");
4812 // We cannot naturally negate an AND operation.
4813 CanNegate = false;
4814 MustBeFirst = MustBeFirstL || MustBeFirstR;
4815 }
4816 return true;
4817 }
4818 return false;
4819}
4820
4821MachineInstr *AArch64InstructionSelector::emitConditionalComparison(
4822 Register LHS, Register RHS, CmpInst::Predicate CC,
4823 AArch64CC::CondCode Predicate, AArch64CC::CondCode OutCC,
4824 MachineIRBuilder &MIB) const {
4825 auto &MRI = *MIB.getMRI();
4826 LLT OpTy = MRI.getType(Reg: LHS);
4827 unsigned CCmpOpc;
4828 std::optional<ValueAndVReg> C;
4829 if (CmpInst::isIntPredicate(P: CC)) {
4830 assert(OpTy.getSizeInBits() == 32 || OpTy.getSizeInBits() == 64);
4831 C = getIConstantVRegValWithLookThrough(VReg: RHS, MRI);
4832 if (!C || C->Value.sgt(RHS: 31) || C->Value.slt(RHS: -31))
4833 CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWr : AArch64::CCMPXr;
4834 else if (C->Value.ule(RHS: 31))
4835 CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWi : AArch64::CCMPXi;
4836 else
4837 CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMNWi : AArch64::CCMNXi;
4838 } else {
4839 assert(OpTy.getSizeInBits() == 16 || OpTy.getSizeInBits() == 32 ||
4840 OpTy.getSizeInBits() == 64);
4841 switch (OpTy.getSizeInBits()) {
4842 case 16:
4843 assert(STI.hasFullFP16() && "Expected Full FP16 for fp16 comparisons");
4844 CCmpOpc = AArch64::FCCMPHrr;
4845 break;
4846 case 32:
4847 CCmpOpc = AArch64::FCCMPSrr;
4848 break;
4849 case 64:
4850 CCmpOpc = AArch64::FCCMPDrr;
4851 break;
4852 default:
4853 return nullptr;
4854 }
4855 }
4856 AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(Code: OutCC);
4857 unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(Code: InvOutCC);
4858 auto CCmp =
4859 MIB.buildInstr(Opc: CCmpOpc, DstOps: {}, SrcOps: {LHS});
4860 if (CCmpOpc == AArch64::CCMPWi || CCmpOpc == AArch64::CCMPXi)
4861 CCmp.addImm(Val: C->Value.getZExtValue());
4862 else if (CCmpOpc == AArch64::CCMNWi || CCmpOpc == AArch64::CCMNXi)
4863 CCmp.addImm(Val: C->Value.abs().getZExtValue());
4864 else
4865 CCmp.addReg(RegNo: RHS);
4866 CCmp.addImm(Val: NZCV).addImm(Val: Predicate);
4867 constrainSelectedInstRegOperands(I&: *CCmp, TII, TRI, RBI);
4868 return &*CCmp;
4869}
4870
4871MachineInstr *AArch64InstructionSelector::emitConjunctionRec(
4872 Register Val, AArch64CC::CondCode &OutCC, bool Negate, Register CCOp,
4873 AArch64CC::CondCode Predicate, MachineIRBuilder &MIB) const {
4874 // We're at a tree leaf, produce a conditional comparison operation.
4875 auto &MRI = *MIB.getMRI();
4876 MachineInstr *ValDef = MRI.getVRegDef(Reg: Val);
4877 unsigned Opcode = ValDef->getOpcode();
4878 if (auto *Cmp = dyn_cast<GAnyCmp>(Val: ValDef)) {
4879 Register LHS = Cmp->getLHSReg();
4880 Register RHS = Cmp->getRHSReg();
4881 CmpInst::Predicate CC = Cmp->getCond();
4882 if (Negate)
4883 CC = CmpInst::getInversePredicate(pred: CC);
4884 if (isa<GICmp>(Val: Cmp)) {
4885 OutCC = changeICMPPredToAArch64CC(P: CC, RHS, MRI: MIB.getMRI());
4886 } else {
4887 // Handle special FP cases.
4888 AArch64CC::CondCode ExtraCC;
4889 changeFPCCToANDAArch64CC(CC, CondCode&: OutCC, CondCode2&: ExtraCC);
4890 // Some floating point conditions can't be tested with a single condition
4891 // code. Construct an additional comparison in this case.
4892 if (ExtraCC != AArch64CC::AL) {
4893 MachineInstr *ExtraCmp;
4894 if (!CCOp)
4895 ExtraCmp = emitFPCompare(LHS, RHS, MIRBuilder&: MIB, Pred: CC);
4896 else
4897 ExtraCmp =
4898 emitConditionalComparison(LHS, RHS, CC, Predicate, OutCC: ExtraCC, MIB);
4899 CCOp = ExtraCmp->getOperand(i: 0).getReg();
4900 Predicate = ExtraCC;
4901 }
4902 }
4903
4904 // Produce a normal comparison if we are first in the chain
4905 if (!CCOp) {
4906 if (isa<GICmp>(Val: Cmp))
4907 return emitCMP(LHS&: Cmp->getOperand(i: 2), RHS&: Cmp->getOperand(i: 3), MIRBuilder&: MIB);
4908 return emitFPCompare(LHS: Cmp->getOperand(i: 2).getReg(),
4909 RHS: Cmp->getOperand(i: 3).getReg(), MIRBuilder&: MIB);
4910 }
4911 // Otherwise produce a ccmp.
4912 return emitConditionalComparison(LHS, RHS, CC, Predicate, OutCC, MIB);
4913 }
4914 assert(MRI.hasOneNonDBGUse(Val) && "Valid conjunction/disjunction tree");
4915
4916 bool IsOR = Opcode == TargetOpcode::G_OR;
4917
4918 Register LHS = ValDef->getOperand(i: 1).getReg();
4919 bool CanNegateL;
4920 bool MustBeFirstL;
4921 bool ValidL = canEmitConjunction(Val: LHS, CanNegate&: CanNegateL, MustBeFirst&: MustBeFirstL, WillNegate: IsOR, MRI);
4922 assert(ValidL && "Valid conjunction/disjunction tree");
4923 (void)ValidL;
4924
4925 Register RHS = ValDef->getOperand(i: 2).getReg();
4926 bool CanNegateR;
4927 bool MustBeFirstR;
4928 bool ValidR = canEmitConjunction(Val: RHS, CanNegate&: CanNegateR, MustBeFirst&: MustBeFirstR, WillNegate: IsOR, MRI);
4929 assert(ValidR && "Valid conjunction/disjunction tree");
4930 (void)ValidR;
4931
4932 // Swap sub-tree that must come first to the right side.
4933 if (MustBeFirstL) {
4934 assert(!MustBeFirstR && "Valid conjunction/disjunction tree");
4935 std::swap(a&: LHS, b&: RHS);
4936 std::swap(a&: CanNegateL, b&: CanNegateR);
4937 std::swap(a&: MustBeFirstL, b&: MustBeFirstR);
4938 }
4939
4940 bool NegateR;
4941 bool NegateAfterR;
4942 bool NegateL;
4943 bool NegateAfterAll;
4944 if (Opcode == TargetOpcode::G_OR) {
4945 // Swap the sub-tree that we can negate naturally to the left.
4946 if (!CanNegateL) {
4947 assert(CanNegateR && "at least one side must be negatable");
4948 assert(!MustBeFirstR && "invalid conjunction/disjunction tree");
4949 assert(!Negate);
4950 std::swap(a&: LHS, b&: RHS);
4951 NegateR = false;
4952 NegateAfterR = true;
4953 } else {
4954 // Negate the left sub-tree if possible, otherwise negate the result.
4955 NegateR = CanNegateR;
4956 NegateAfterR = !CanNegateR;
4957 }
4958 NegateL = true;
4959 NegateAfterAll = !Negate;
4960 } else {
4961 assert(Opcode == TargetOpcode::G_AND &&
4962 "Valid conjunction/disjunction tree");
4963 assert(!Negate && "Valid conjunction/disjunction tree");
4964
4965 NegateL = false;
4966 NegateR = false;
4967 NegateAfterR = false;
4968 NegateAfterAll = false;
4969 }
4970
4971 // Emit sub-trees.
4972 AArch64CC::CondCode RHSCC;
4973 MachineInstr *CmpR =
4974 emitConjunctionRec(Val: RHS, OutCC&: RHSCC, Negate: NegateR, CCOp, Predicate, MIB);
4975 if (NegateAfterR)
4976 RHSCC = AArch64CC::getInvertedCondCode(Code: RHSCC);
4977 MachineInstr *CmpL = emitConjunctionRec(
4978 Val: LHS, OutCC, Negate: NegateL, CCOp: CmpR->getOperand(i: 0).getReg(), Predicate: RHSCC, MIB);
4979 if (NegateAfterAll)
4980 OutCC = AArch64CC::getInvertedCondCode(Code: OutCC);
4981 return CmpL;
4982}
4983
4984MachineInstr *AArch64InstructionSelector::emitConjunction(
4985 Register Val, AArch64CC::CondCode &OutCC, MachineIRBuilder &MIB) const {
4986 bool DummyCanNegate;
4987 bool DummyMustBeFirst;
4988 if (!canEmitConjunction(Val, CanNegate&: DummyCanNegate, MustBeFirst&: DummyMustBeFirst, WillNegate: false,
4989 MRI&: *MIB.getMRI()))
4990 return nullptr;
4991 return emitConjunctionRec(Val, OutCC, Negate: false, CCOp: Register(), Predicate: AArch64CC::AL, MIB);
4992}
4993
4994bool AArch64InstructionSelector::tryOptSelectConjunction(GSelect &SelI,
4995 MachineInstr &CondMI) {
4996 AArch64CC::CondCode AArch64CC;
4997 MachineInstr *ConjMI = emitConjunction(Val: SelI.getCondReg(), OutCC&: AArch64CC, MIB);
4998 if (!ConjMI)
4999 return false;
5000
5001 emitSelect(Dst: SelI.getReg(Idx: 0), True: SelI.getTrueReg(), False: SelI.getFalseReg(), CC: AArch64CC, MIB);
5002 SelI.eraseFromParent();
5003 return true;
5004}
5005
5006bool AArch64InstructionSelector::tryOptSelect(GSelect &I) {
5007 MachineRegisterInfo &MRI = *MIB.getMRI();
5008 // We want to recognize this pattern:
5009 //
5010 // $z = G_FCMP pred, $x, $y
5011 // ...
5012 // $w = G_SELECT $z, $a, $b
5013 //
5014 // Where the value of $z is *only* ever used by the G_SELECT (possibly with
5015 // some copies/truncs in between.)
5016 //
5017 // If we see this, then we can emit something like this:
5018 //
5019 // fcmp $x, $y
5020 // fcsel $w, $a, $b, pred
5021 //
5022 // Rather than emitting both of the rather long sequences in the standard
5023 // G_FCMP/G_SELECT select methods.
5024
5025 // First, check if the condition is defined by a compare.
5026 MachineInstr *CondDef = MRI.getVRegDef(Reg: I.getOperand(i: 1).getReg());
5027
5028 // We can only fold if all of the defs have one use.
5029 Register CondDefReg = CondDef->getOperand(i: 0).getReg();
5030 if (!MRI.hasOneNonDBGUse(RegNo: CondDefReg)) {
5031 // Unless it's another select.
5032 for (const MachineInstr &UI : MRI.use_nodbg_instructions(Reg: CondDefReg)) {
5033 if (CondDef == &UI)
5034 continue;
5035 if (UI.getOpcode() != TargetOpcode::G_SELECT)
5036 return false;
5037 }
5038 }
5039
5040 // Is the condition defined by a compare?
5041 unsigned CondOpc = CondDef->getOpcode();
5042 if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP) {
5043 if (tryOptSelectConjunction(SelI&: I, CondMI&: *CondDef))
5044 return true;
5045 return false;
5046 }
5047
5048 AArch64CC::CondCode CondCode;
5049 if (CondOpc == TargetOpcode::G_ICMP) {
5050 auto &PredOp = CondDef->getOperand(i: 1);
5051 emitIntegerCompare(LHS&: CondDef->getOperand(i: 2), RHS&: CondDef->getOperand(i: 3), Predicate&: PredOp,
5052 MIRBuilder&: MIB);
5053 auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());
5054 CondCode =
5055 changeICMPPredToAArch64CC(P: Pred, RHS: CondDef->getOperand(i: 3).getReg(), MRI: &MRI);
5056 } else {
5057 // Get the condition code for the select.
5058 auto Pred =
5059 static_cast<CmpInst::Predicate>(CondDef->getOperand(i: 1).getPredicate());
5060 AArch64CC::CondCode CondCode2;
5061 changeFCMPPredToAArch64CC(P: Pred, CondCode, CondCode2);
5062
5063 // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
5064 // instructions to emit the comparison.
5065 // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
5066 // unnecessary.
5067 if (CondCode2 != AArch64CC::AL)
5068 return false;
5069
5070 if (!emitFPCompare(LHS: CondDef->getOperand(i: 2).getReg(),
5071 RHS: CondDef->getOperand(i: 3).getReg(), MIRBuilder&: MIB)) {
5072 LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n");
5073 return false;
5074 }
5075 }
5076
5077 // Emit the select.
5078 emitSelect(Dst: I.getOperand(i: 0).getReg(), True: I.getOperand(i: 2).getReg(),
5079 False: I.getOperand(i: 3).getReg(), CC: CondCode, MIB);
5080 I.eraseFromParent();
5081 return true;
5082}
5083
5084MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
5085 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
5086 MachineIRBuilder &MIRBuilder) const {
5087 assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&
5088 "Unexpected MachineOperand");
5089 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
5090 // We want to find this sort of thing:
5091 // x = G_SUB 0, y
5092 // G_ICMP z, x
5093 //
5094 // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.
5095 // e.g:
5096 //
5097 // cmn z, y
5098
5099 // Check if the RHS or LHS of the G_ICMP is defined by a SUB
5100 MachineInstr *LHSDef = getDefIgnoringCopies(Reg: LHS.getReg(), MRI);
5101 MachineInstr *RHSDef = getDefIgnoringCopies(Reg: RHS.getReg(), MRI);
5102 auto P = static_cast<CmpInst::Predicate>(Predicate.getPredicate());
5103
5104 // Given this:
5105 //
5106 // x = G_SUB 0, y
5107 // G_ICMP z, x
5108 //
5109 // Produce this:
5110 //
5111 // cmn z, y
5112 if (isCMN(MaybeSub: RHSDef, Pred: P, MRI))
5113 return emitCMN(LHS, RHS&: RHSDef->getOperand(i: 2), MIRBuilder);
5114
5115 // Same idea here, but with the LHS of the compare instead:
5116 //
5117 // Given this:
5118 //
5119 // x = G_SUB 0, y
5120 // G_ICMP x, z
5121 //
5122 // Produce this:
5123 //
5124 // cmn y, z
5125 //
5126 // But be careful! We need to swap the predicate!
5127 if (isCMN(MaybeSub: LHSDef, Pred: P, MRI)) {
5128 if (!CmpInst::isEquality(pred: P)) {
5129 P = CmpInst::getSwappedPredicate(pred: P);
5130 Predicate = MachineOperand::CreatePredicate(Pred: P);
5131 }
5132 return emitCMN(LHS&: LHSDef->getOperand(i: 2), RHS, MIRBuilder);
5133 }
5134
5135 // Given this:
5136 //
5137 // z = G_AND x, y
5138 // G_ICMP z, 0
5139 //
5140 // Produce this if the compare is signed:
5141 //
5142 // tst x, y
5143 if (!CmpInst::isUnsigned(predicate: P) && LHSDef &&
5144 LHSDef->getOpcode() == TargetOpcode::G_AND) {
5145 // Make sure that the RHS is 0.
5146 auto ValAndVReg = getIConstantVRegValWithLookThrough(VReg: RHS.getReg(), MRI);
5147 if (!ValAndVReg || ValAndVReg->Value != 0)
5148 return nullptr;
5149
5150 return emitTST(LHS&: LHSDef->getOperand(i: 1),
5151 RHS&: LHSDef->getOperand(i: 2), MIRBuilder);
5152 }
5153
5154 return nullptr;
5155}
5156
5157bool AArch64InstructionSelector::selectShuffleVector(
5158 MachineInstr &I, MachineRegisterInfo &MRI) {
5159 const LLT DstTy = MRI.getType(Reg: I.getOperand(i: 0).getReg());
5160 Register Src1Reg = I.getOperand(i: 1).getReg();
5161 Register Src2Reg = I.getOperand(i: 2).getReg();
5162 ArrayRef<int> Mask = I.getOperand(i: 3).getShuffleMask();
5163
5164 MachineBasicBlock &MBB = *I.getParent();
5165 MachineFunction &MF = *MBB.getParent();
5166 LLVMContext &Ctx = MF.getFunction().getContext();
5167
5168 unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
5169
5170 SmallVector<Constant *, 64> CstIdxs;
5171 for (int Val : Mask) {
5172 // For now, any undef indexes we'll just assume to be 0. This should be
5173 // optimized in future, e.g. to select DUP etc.
5174 Val = Val < 0 ? 0 : Val;
5175 for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
5176 unsigned Offset = Byte + Val * BytesPerElt;
5177 CstIdxs.emplace_back(Args: ConstantInt::get(Ty: Type::getInt8Ty(C&: Ctx), V: Offset));
5178 }
5179 }
5180
5181 // Use a constant pool to load the index vector for TBL.
5182 Constant *CPVal = ConstantVector::get(V: CstIdxs);
5183 MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIRBuilder&: MIB);
5184 if (!IndexLoad) {
5185 LLVM_DEBUG(dbgs() << "Could not load from a constant pool");
5186 return false;
5187 }
5188
5189 if (DstTy.getSizeInBits() != 128) {
5190 assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty");
5191 // This case can be done with TBL1.
5192 MachineInstr *Concat =
5193 emitVectorConcat(Dst: std::nullopt, Op1: Src1Reg, Op2: Src2Reg, MIRBuilder&: MIB);
5194 if (!Concat) {
5195 LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1");
5196 return false;
5197 }
5198
5199 // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
5200 IndexLoad = emitScalarToVector(EltSize: 64, DstRC: &AArch64::FPR128RegClass,
5201 Scalar: IndexLoad->getOperand(i: 0).getReg(), MIRBuilder&: MIB);
5202
5203 auto TBL1 = MIB.buildInstr(
5204 Opc: AArch64::TBLv16i8One, DstOps: {&AArch64::FPR128RegClass},
5205 SrcOps: {Concat->getOperand(i: 0).getReg(), IndexLoad->getOperand(i: 0).getReg()});
5206 constrainSelectedInstRegOperands(I&: *TBL1, TII, TRI, RBI);
5207
5208 auto Copy =
5209 MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {I.getOperand(i: 0).getReg()}, SrcOps: {})
5210 .addReg(RegNo: TBL1.getReg(Idx: 0), Flags: {}, SubReg: AArch64::dsub);
5211 RBI.constrainGenericRegister(Reg: Copy.getReg(Idx: 0), RC: AArch64::FPR64RegClass, MRI);
5212 I.eraseFromParent();
5213 return true;
5214 }
5215
5216 // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
5217 // Q registers for regalloc.
5218 SmallVector<Register, 2> Regs = {Src1Reg, Src2Reg};
5219 auto RegSeq = createQTuple(Regs, MIB);
5220 auto TBL2 = MIB.buildInstr(Opc: AArch64::TBLv16i8Two, DstOps: {I.getOperand(i: 0)},
5221 SrcOps: {RegSeq, IndexLoad->getOperand(i: 0)});
5222 constrainSelectedInstRegOperands(I&: *TBL2, TII, TRI, RBI);
5223 I.eraseFromParent();
5224 return true;
5225}
5226
5227MachineInstr *AArch64InstructionSelector::emitLaneInsert(
5228 std::optional<Register> DstReg, Register SrcReg, Register EltReg,
5229 unsigned LaneIdx, const RegisterBank &RB,
5230 MachineIRBuilder &MIRBuilder) const {
5231 MachineInstr *InsElt = nullptr;
5232 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
5233 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
5234
5235 // Create a register to define with the insert if one wasn't passed in.
5236 if (!DstReg)
5237 DstReg = MRI.createVirtualRegister(RegClass: DstRC);
5238
5239 unsigned EltSize = MRI.getType(Reg: EltReg).getSizeInBits();
5240 unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
5241
5242 if (RB.getID() == AArch64::FPRRegBankID) {
5243 auto InsSub = emitScalarToVector(EltSize, DstRC, Scalar: EltReg, MIRBuilder);
5244 InsElt = MIRBuilder.buildInstr(Opc, DstOps: {*DstReg}, SrcOps: {SrcReg})
5245 .addImm(Val: LaneIdx)
5246 .addUse(RegNo: InsSub->getOperand(i: 0).getReg())
5247 .addImm(Val: 0);
5248 } else {
5249 InsElt = MIRBuilder.buildInstr(Opc, DstOps: {*DstReg}, SrcOps: {SrcReg})
5250 .addImm(Val: LaneIdx)
5251 .addUse(RegNo: EltReg);
5252 }
5253
5254 constrainSelectedInstRegOperands(I&: *InsElt, TII, TRI, RBI);
5255 return InsElt;
5256}
5257
5258bool AArch64InstructionSelector::selectUSMovFromExtend(
5259 MachineInstr &MI, MachineRegisterInfo &MRI) {
5260 if (MI.getOpcode() != TargetOpcode::G_SEXT &&
5261 MI.getOpcode() != TargetOpcode::G_ZEXT &&
5262 MI.getOpcode() != TargetOpcode::G_ANYEXT)
5263 return false;
5264 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SEXT;
5265 const Register DefReg = MI.getOperand(i: 0).getReg();
5266 const LLT DstTy = MRI.getType(Reg: DefReg);
5267 unsigned DstSize = DstTy.getSizeInBits();
5268
5269 if (DstSize != 32 && DstSize != 64)
5270 return false;
5271
5272 MachineInstr *Extract = getOpcodeDef(Opcode: TargetOpcode::G_EXTRACT_VECTOR_ELT,
5273 Reg: MI.getOperand(i: 1).getReg(), MRI);
5274 int64_t Lane;
5275 if (!Extract || !mi_match(R: Extract->getOperand(i: 2).getReg(), MRI, P: m_ICst(Cst&: Lane)))
5276 return false;
5277 Register Src0 = Extract->getOperand(i: 1).getReg();
5278
5279 const LLT VecTy = MRI.getType(Reg: Src0);
5280 if (VecTy.isScalableVector())
5281 return false;
5282
5283 if (VecTy.getSizeInBits() != 128) {
5284 const MachineInstr *ScalarToVector = emitScalarToVector(
5285 EltSize: VecTy.getSizeInBits(), DstRC: &AArch64::FPR128RegClass, Scalar: Src0, MIRBuilder&: MIB);
5286 assert(ScalarToVector && "Didn't expect emitScalarToVector to fail!");
5287 Src0 = ScalarToVector->getOperand(i: 0).getReg();
5288 }
5289
5290 unsigned Opcode;
5291 if (DstSize == 64 && VecTy.getScalarSizeInBits() == 32)
5292 Opcode = IsSigned ? AArch64::SMOVvi32to64 : AArch64::UMOVvi32;
5293 else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 16)
5294 Opcode = IsSigned ? AArch64::SMOVvi16to64 : AArch64::UMOVvi16;
5295 else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 8)
5296 Opcode = IsSigned ? AArch64::SMOVvi8to64 : AArch64::UMOVvi8;
5297 else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 16)
5298 Opcode = IsSigned ? AArch64::SMOVvi16to32 : AArch64::UMOVvi16;
5299 else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 8)
5300 Opcode = IsSigned ? AArch64::SMOVvi8to32 : AArch64::UMOVvi8;
5301 else
5302 llvm_unreachable("Unexpected type combo for S/UMov!");
5303
5304 // We may need to generate one of these, depending on the type and sign of the
5305 // input:
5306 // DstReg = SMOV Src0, Lane;
5307 // NewReg = UMOV Src0, Lane; DstReg = SUBREG_TO_REG NewReg, sub_32;
5308 MachineInstr *ExtI = nullptr;
5309 if (DstSize == 64 && !IsSigned) {
5310 Register NewReg = MRI.createVirtualRegister(RegClass: &AArch64::GPR32RegClass);
5311 MIB.buildInstr(Opc: Opcode, DstOps: {NewReg}, SrcOps: {Src0}).addImm(Val: Lane);
5312 ExtI = MIB.buildInstr(Opc: AArch64::SUBREG_TO_REG, DstOps: {DefReg}, SrcOps: {})
5313 .addUse(RegNo: NewReg)
5314 .addImm(Val: AArch64::sub_32);
5315 RBI.constrainGenericRegister(Reg: DefReg, RC: AArch64::GPR64RegClass, MRI);
5316 } else
5317 ExtI = MIB.buildInstr(Opc: Opcode, DstOps: {DefReg}, SrcOps: {Src0}).addImm(Val: Lane);
5318
5319 constrainSelectedInstRegOperands(I&: *ExtI, TII, TRI, RBI);
5320 MI.eraseFromParent();
5321 return true;
5322}
5323
5324MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm8(
5325 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
5326 unsigned int Op;
5327 if (DstSize == 128) {
5328 if (Bits.getHiBits(numBits: 64) != Bits.getLoBits(numBits: 64))
5329 return nullptr;
5330 Op = AArch64::MOVIv16b_ns;
5331 } else {
5332 Op = AArch64::MOVIv8b_ns;
5333 }
5334
5335 uint64_t Val = Bits.zextOrTrunc(width: 64).getZExtValue();
5336
5337 if (AArch64_AM::isAdvSIMDModImmType9(Imm: Val)) {
5338 Val = AArch64_AM::encodeAdvSIMDModImmType9(Imm: Val);
5339 auto Mov = Builder.buildInstr(Opc: Op, DstOps: {Dst}, SrcOps: {}).addImm(Val);
5340 constrainSelectedInstRegOperands(I&: *Mov, TII, TRI, RBI);
5341 return &*Mov;
5342 }
5343 return nullptr;
5344}
5345
5346MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm16(
5347 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
5348 bool Inv) {
5349
5350 unsigned int Op;
5351 if (DstSize == 128) {
5352 if (Bits.getHiBits(numBits: 64) != Bits.getLoBits(numBits: 64))
5353 return nullptr;
5354 Op = Inv ? AArch64::MVNIv8i16 : AArch64::MOVIv8i16;
5355 } else {
5356 Op = Inv ? AArch64::MVNIv4i16 : AArch64::MOVIv4i16;
5357 }
5358
5359 uint64_t Val = Bits.zextOrTrunc(width: 64).getZExtValue();
5360 uint64_t Shift;
5361
5362 if (AArch64_AM::isAdvSIMDModImmType5(Imm: Val)) {
5363 Val = AArch64_AM::encodeAdvSIMDModImmType5(Imm: Val);
5364 Shift = 0;
5365 } else if (AArch64_AM::isAdvSIMDModImmType6(Imm: Val)) {
5366 Val = AArch64_AM::encodeAdvSIMDModImmType6(Imm: Val);
5367 Shift = 8;
5368 } else
5369 return nullptr;
5370
5371 auto Mov = Builder.buildInstr(Opc: Op, DstOps: {Dst}, SrcOps: {}).addImm(Val).addImm(Val: Shift);
5372 constrainSelectedInstRegOperands(I&: *Mov, TII, TRI, RBI);
5373 return &*Mov;
5374}
5375
5376MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm32(
5377 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
5378 bool Inv) {
5379
5380 unsigned int Op;
5381 if (DstSize == 128) {
5382 if (Bits.getHiBits(numBits: 64) != Bits.getLoBits(numBits: 64))
5383 return nullptr;
5384 Op = Inv ? AArch64::MVNIv4i32 : AArch64::MOVIv4i32;
5385 } else {
5386 Op = Inv ? AArch64::MVNIv2i32 : AArch64::MOVIv2i32;
5387 }
5388
5389 uint64_t Val = Bits.zextOrTrunc(width: 64).getZExtValue();
5390 uint64_t Shift;
5391
5392 if ((AArch64_AM::isAdvSIMDModImmType1(Imm: Val))) {
5393 Val = AArch64_AM::encodeAdvSIMDModImmType1(Imm: Val);
5394 Shift = 0;
5395 } else if ((AArch64_AM::isAdvSIMDModImmType2(Imm: Val))) {
5396 Val = AArch64_AM::encodeAdvSIMDModImmType2(Imm: Val);
5397 Shift = 8;
5398 } else if ((AArch64_AM::isAdvSIMDModImmType3(Imm: Val))) {
5399 Val = AArch64_AM::encodeAdvSIMDModImmType3(Imm: Val);
5400 Shift = 16;
5401 } else if ((AArch64_AM::isAdvSIMDModImmType4(Imm: Val))) {
5402 Val = AArch64_AM::encodeAdvSIMDModImmType4(Imm: Val);
5403 Shift = 24;
5404 } else
5405 return nullptr;
5406
5407 auto Mov = Builder.buildInstr(Opc: Op, DstOps: {Dst}, SrcOps: {}).addImm(Val).addImm(Val: Shift);
5408 constrainSelectedInstRegOperands(I&: *Mov, TII, TRI, RBI);
5409 return &*Mov;
5410}
5411
5412MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm64(
5413 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
5414
5415 unsigned int Op;
5416 if (DstSize == 128) {
5417 if (Bits.getHiBits(numBits: 64) != Bits.getLoBits(numBits: 64))
5418 return nullptr;
5419 Op = AArch64::MOVIv2d_ns;
5420 } else {
5421 Op = AArch64::MOVID;
5422 }
5423
5424 uint64_t Val = Bits.zextOrTrunc(width: 64).getZExtValue();
5425 if (AArch64_AM::isAdvSIMDModImmType10(Imm: Val)) {
5426 Val = AArch64_AM::encodeAdvSIMDModImmType10(Imm: Val);
5427 auto Mov = Builder.buildInstr(Opc: Op, DstOps: {Dst}, SrcOps: {}).addImm(Val);
5428 constrainSelectedInstRegOperands(I&: *Mov, TII, TRI, RBI);
5429 return &*Mov;
5430 }
5431 return nullptr;
5432}
5433
5434MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm321s(
5435 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
5436 bool Inv) {
5437
5438 unsigned int Op;
5439 if (DstSize == 128) {
5440 if (Bits.getHiBits(numBits: 64) != Bits.getLoBits(numBits: 64))
5441 return nullptr;
5442 Op = Inv ? AArch64::MVNIv4s_msl : AArch64::MOVIv4s_msl;
5443 } else {
5444 Op = Inv ? AArch64::MVNIv2s_msl : AArch64::MOVIv2s_msl;
5445 }
5446
5447 uint64_t Val = Bits.zextOrTrunc(width: 64).getZExtValue();
5448 uint64_t Shift;
5449
5450 if (AArch64_AM::isAdvSIMDModImmType7(Imm: Val)) {
5451 Val = AArch64_AM::encodeAdvSIMDModImmType7(Imm: Val);
5452 Shift = 264;
5453 } else if (AArch64_AM::isAdvSIMDModImmType8(Imm: Val)) {
5454 Val = AArch64_AM::encodeAdvSIMDModImmType8(Imm: Val);
5455 Shift = 272;
5456 } else
5457 return nullptr;
5458
5459 auto Mov = Builder.buildInstr(Opc: Op, DstOps: {Dst}, SrcOps: {}).addImm(Val).addImm(Val: Shift);
5460 constrainSelectedInstRegOperands(I&: *Mov, TII, TRI, RBI);
5461 return &*Mov;
5462}
5463
5464MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImmFP(
5465 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
5466
5467 unsigned int Op;
5468 bool IsWide = false;
5469 if (DstSize == 128) {
5470 if (Bits.getHiBits(numBits: 64) != Bits.getLoBits(numBits: 64))
5471 return nullptr;
5472 Op = AArch64::FMOVv4f32_ns;
5473 IsWide = true;
5474 } else {
5475 Op = AArch64::FMOVv2f32_ns;
5476 }
5477
5478 uint64_t Val = Bits.zextOrTrunc(width: 64).getZExtValue();
5479
5480 if (AArch64_AM::isAdvSIMDModImmType11(Imm: Val)) {
5481 Val = AArch64_AM::encodeAdvSIMDModImmType11(Imm: Val);
5482 } else if (IsWide && AArch64_AM::isAdvSIMDModImmType12(Imm: Val)) {
5483 Val = AArch64_AM::encodeAdvSIMDModImmType12(Imm: Val);
5484 Op = AArch64::FMOVv2f64_ns;
5485 } else
5486 return nullptr;
5487
5488 auto Mov = Builder.buildInstr(Opc: Op, DstOps: {Dst}, SrcOps: {}).addImm(Val);
5489 constrainSelectedInstRegOperands(I&: *Mov, TII, TRI, RBI);
5490 return &*Mov;
5491}
5492
5493bool AArch64InstructionSelector::selectIndexedExtLoad(
5494 MachineInstr &MI, MachineRegisterInfo &MRI) {
5495 auto &ExtLd = cast<GIndexedAnyExtLoad>(Val&: MI);
5496 Register Dst = ExtLd.getDstReg();
5497 Register WriteBack = ExtLd.getWritebackReg();
5498 Register Base = ExtLd.getBaseReg();
5499 Register Offset = ExtLd.getOffsetReg();
5500 LLT Ty = MRI.getType(Reg: Dst);
5501 assert(Ty.getSizeInBits() <= 64); // Only for scalar GPRs.
5502 unsigned MemSizeBits = ExtLd.getMMO().getMemoryType().getSizeInBits();
5503 bool IsPre = ExtLd.isPre();
5504 bool IsSExt = isa<GIndexedSExtLoad>(Val: ExtLd);
5505 unsigned InsertIntoSubReg = 0;
5506 bool IsDst64 = Ty.getSizeInBits() == 64;
5507
5508 // ZExt/SExt should be on gpr but can handle extload and zextload of fpr, so
5509 // long as they are scalar.
5510 bool IsFPR = RBI.getRegBank(Reg: Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID;
5511 if ((IsSExt && IsFPR) || Ty.isVector())
5512 return false;
5513
5514 unsigned Opc = 0;
5515 LLT NewLdDstTy;
5516 LLT s32 = LLT::scalar(SizeInBits: 32);
5517 LLT s64 = LLT::scalar(SizeInBits: 64);
5518
5519 if (MemSizeBits == 8) {
5520 if (IsSExt) {
5521 if (IsDst64)
5522 Opc = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
5523 else
5524 Opc = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
5525 NewLdDstTy = IsDst64 ? s64 : s32;
5526 } else if (IsFPR) {
5527 Opc = IsPre ? AArch64::LDRBpre : AArch64::LDRBpost;
5528 InsertIntoSubReg = AArch64::bsub;
5529 NewLdDstTy = LLT::scalar(SizeInBits: MemSizeBits);
5530 } else {
5531 Opc = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
5532 InsertIntoSubReg = IsDst64 ? AArch64::sub_32 : 0;
5533 NewLdDstTy = s32;
5534 }
5535 } else if (MemSizeBits == 16) {
5536 if (IsSExt) {
5537 if (IsDst64)
5538 Opc = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
5539 else
5540 Opc = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
5541 NewLdDstTy = IsDst64 ? s64 : s32;
5542 } else if (IsFPR) {
5543 Opc = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
5544 InsertIntoSubReg = AArch64::hsub;
5545 NewLdDstTy = LLT::scalar(SizeInBits: MemSizeBits);
5546 } else {
5547 Opc = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
5548 InsertIntoSubReg = IsDst64 ? AArch64::sub_32 : 0;
5549 NewLdDstTy = s32;
5550 }
5551 } else if (MemSizeBits == 32) {
5552 if (IsSExt) {
5553 Opc = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
5554 NewLdDstTy = s64;
5555 } else if (IsFPR) {
5556 Opc = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
5557 InsertIntoSubReg = AArch64::ssub;
5558 NewLdDstTy = LLT::scalar(SizeInBits: MemSizeBits);
5559 } else {
5560 Opc = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
5561 InsertIntoSubReg = IsDst64 ? AArch64::sub_32 : 0;
5562 NewLdDstTy = s32;
5563 }
5564 } else {
5565 llvm_unreachable("Unexpected size for indexed load");
5566 }
5567
5568 auto Cst = getIConstantVRegVal(VReg: Offset, MRI);
5569 if (!Cst)
5570 return false; // Shouldn't happen, but just in case.
5571
5572 auto LdMI = MIB.buildInstr(Opc, DstOps: {WriteBack, NewLdDstTy}, SrcOps: {Base})
5573 .addImm(Val: Cst->getSExtValue());
5574 LdMI.cloneMemRefs(OtherMI: ExtLd);
5575 constrainSelectedInstRegOperands(I&: *LdMI, TII, TRI, RBI);
5576 // Make sure to select the load with the MemTy as the dest type, and then
5577 // insert into a larger reg if needed.
5578 if (InsertIntoSubReg) {
5579 // Generate a SUBREG_TO_REG.
5580 auto SubToReg = MIB.buildInstr(Opc: TargetOpcode::SUBREG_TO_REG, DstOps: {Dst}, SrcOps: {})
5581 .addUse(RegNo: LdMI.getReg(Idx: 1))
5582 .addImm(Val: InsertIntoSubReg);
5583 RBI.constrainGenericRegister(
5584 Reg: SubToReg.getReg(Idx: 0),
5585 RC: *getRegClassForTypeOnBank(Ty: MRI.getType(Reg: Dst),
5586 RB: *RBI.getRegBank(Reg: Dst, MRI, TRI)),
5587 MRI);
5588 } else {
5589 auto Copy = MIB.buildCopy(Res: Dst, Op: LdMI.getReg(Idx: 1));
5590 selectCopy(I&: *Copy, TII, MRI, TRI, RBI);
5591 }
5592 MI.eraseFromParent();
5593
5594 return true;
5595}
5596
5597bool AArch64InstructionSelector::selectIndexedLoad(MachineInstr &MI,
5598 MachineRegisterInfo &MRI) {
5599 auto &Ld = cast<GIndexedLoad>(Val&: MI);
5600 Register Dst = Ld.getDstReg();
5601 Register WriteBack = Ld.getWritebackReg();
5602 Register Base = Ld.getBaseReg();
5603 Register Offset = Ld.getOffsetReg();
5604 assert(MRI.getType(Dst).getSizeInBits() <= 128 &&
5605 "Unexpected type for indexed load");
5606 unsigned MemSize = Ld.getMMO().getMemoryType().getSizeInBytes();
5607
5608 if (MemSize < MRI.getType(Reg: Dst).getSizeInBytes())
5609 return selectIndexedExtLoad(MI, MRI);
5610
5611 unsigned Opc = 0;
5612 if (Ld.isPre()) {
5613 static constexpr unsigned GPROpcodes[] = {
5614 AArch64::LDRBBpre, AArch64::LDRHHpre, AArch64::LDRWpre,
5615 AArch64::LDRXpre};
5616 static constexpr unsigned FPROpcodes[] = {
5617 AArch64::LDRBpre, AArch64::LDRHpre, AArch64::LDRSpre, AArch64::LDRDpre,
5618 AArch64::LDRQpre};
5619 Opc = (RBI.getRegBank(Reg: Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5620 ? FPROpcodes[Log2_32(Value: MemSize)]
5621 : GPROpcodes[Log2_32(Value: MemSize)];
5622 ;
5623 } else {
5624 static constexpr unsigned GPROpcodes[] = {
5625 AArch64::LDRBBpost, AArch64::LDRHHpost, AArch64::LDRWpost,
5626 AArch64::LDRXpost};
5627 static constexpr unsigned FPROpcodes[] = {
5628 AArch64::LDRBpost, AArch64::LDRHpost, AArch64::LDRSpost,
5629 AArch64::LDRDpost, AArch64::LDRQpost};
5630 Opc = (RBI.getRegBank(Reg: Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5631 ? FPROpcodes[Log2_32(Value: MemSize)]
5632 : GPROpcodes[Log2_32(Value: MemSize)];
5633 ;
5634 }
5635 auto Cst = getIConstantVRegVal(VReg: Offset, MRI);
5636 if (!Cst)
5637 return false; // Shouldn't happen, but just in case.
5638 auto LdMI =
5639 MIB.buildInstr(Opc, DstOps: {WriteBack, Dst}, SrcOps: {Base}).addImm(Val: Cst->getSExtValue());
5640 LdMI.cloneMemRefs(OtherMI: Ld);
5641 constrainSelectedInstRegOperands(I&: *LdMI, TII, TRI, RBI);
5642 MI.eraseFromParent();
5643 return true;
5644}
5645
5646bool AArch64InstructionSelector::selectIndexedStore(GIndexedStore &I,
5647 MachineRegisterInfo &MRI) {
5648 Register Dst = I.getWritebackReg();
5649 Register Val = I.getValueReg();
5650 Register Base = I.getBaseReg();
5651 Register Offset = I.getOffsetReg();
5652 assert(MRI.getType(Val).getSizeInBits() <= 128 &&
5653 "Unexpected type for indexed store");
5654
5655 LocationSize MemSize = I.getMMO().getSize();
5656 unsigned MemSizeInBytes = MemSize.getValue();
5657
5658 assert(MemSizeInBytes && MemSizeInBytes <= 16 &&
5659 "Unexpected indexed store size");
5660 unsigned MemSizeLog2 = Log2_32(Value: MemSizeInBytes);
5661
5662 unsigned Opc = 0;
5663 if (I.isPre()) {
5664 static constexpr unsigned GPROpcodes[] = {
5665 AArch64::STRBBpre, AArch64::STRHHpre, AArch64::STRWpre,
5666 AArch64::STRXpre};
5667 static constexpr unsigned FPROpcodes[] = {
5668 AArch64::STRBpre, AArch64::STRHpre, AArch64::STRSpre, AArch64::STRDpre,
5669 AArch64::STRQpre};
5670
5671 if (RBI.getRegBank(Reg: Val, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5672 Opc = FPROpcodes[MemSizeLog2];
5673 else
5674 Opc = GPROpcodes[MemSizeLog2];
5675 } else {
5676 static constexpr unsigned GPROpcodes[] = {
5677 AArch64::STRBBpost, AArch64::STRHHpost, AArch64::STRWpost,
5678 AArch64::STRXpost};
5679 static constexpr unsigned FPROpcodes[] = {
5680 AArch64::STRBpost, AArch64::STRHpost, AArch64::STRSpost,
5681 AArch64::STRDpost, AArch64::STRQpost};
5682
5683 if (RBI.getRegBank(Reg: Val, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5684 Opc = FPROpcodes[MemSizeLog2];
5685 else
5686 Opc = GPROpcodes[MemSizeLog2];
5687 }
5688
5689 auto Cst = getIConstantVRegVal(VReg: Offset, MRI);
5690 if (!Cst)
5691 return false; // Shouldn't happen, but just in case.
5692 auto Str =
5693 MIB.buildInstr(Opc, DstOps: {Dst}, SrcOps: {Val, Base}).addImm(Val: Cst->getSExtValue());
5694 Str.cloneMemRefs(OtherMI: I);
5695 constrainSelectedInstRegOperands(I&: *Str, TII, TRI, RBI);
5696 I.eraseFromParent();
5697 return true;
5698}
5699
5700MachineInstr *
5701AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
5702 MachineIRBuilder &MIRBuilder,
5703 MachineRegisterInfo &MRI) {
5704 LLT DstTy = MRI.getType(Reg: Dst);
5705 unsigned DstSize = DstTy.getSizeInBits();
5706 assert((DstSize == 64 || DstSize == 128) &&
5707 "Unexpected vector constant size");
5708
5709 if (CV->isNullValue()) {
5710 if (DstSize == 128) {
5711 auto Mov =
5712 MIRBuilder.buildInstr(Opc: AArch64::MOVIv2d_ns, DstOps: {Dst}, SrcOps: {}).addImm(Val: 0);
5713 constrainSelectedInstRegOperands(I&: *Mov, TII, TRI, RBI);
5714 return &*Mov;
5715 }
5716
5717 if (DstSize == 64) {
5718 auto Mov =
5719 MIRBuilder
5720 .buildInstr(Opc: AArch64::MOVIv2d_ns, DstOps: {&AArch64::FPR128RegClass}, SrcOps: {})
5721 .addImm(Val: 0);
5722 auto Copy = MIRBuilder.buildInstr(Opc: TargetOpcode::COPY, DstOps: {Dst}, SrcOps: {})
5723 .addReg(RegNo: Mov.getReg(Idx: 0), Flags: {}, SubReg: AArch64::dsub);
5724 RBI.constrainGenericRegister(Reg: Dst, RC: AArch64::FPR64RegClass, MRI);
5725 return &*Copy;
5726 }
5727 }
5728
5729 if (Constant *SplatValue = CV->getSplatValue()) {
5730 APInt SplatValueAsInt =
5731 isa<ConstantFP>(Val: SplatValue)
5732 ? cast<ConstantFP>(Val: SplatValue)->getValueAPF().bitcastToAPInt()
5733 : SplatValue->getUniqueInteger();
5734 APInt DefBits = APInt::getSplat(
5735 NewLen: DstSize, V: SplatValueAsInt.trunc(width: DstTy.getScalarSizeInBits()));
5736 auto TryMOVIWithBits = [&](APInt DefBits) -> MachineInstr * {
5737 MachineInstr *NewOp;
5738 bool Inv = false;
5739 if ((NewOp = tryAdvSIMDModImm64(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder)) ||
5740 (NewOp =
5741 tryAdvSIMDModImm32(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder, Inv)) ||
5742 (NewOp =
5743 tryAdvSIMDModImm321s(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder, Inv)) ||
5744 (NewOp =
5745 tryAdvSIMDModImm16(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder, Inv)) ||
5746 (NewOp = tryAdvSIMDModImm8(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder)) ||
5747 (NewOp = tryAdvSIMDModImmFP(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder)))
5748 return NewOp;
5749
5750 DefBits = ~DefBits;
5751 Inv = true;
5752 if ((NewOp =
5753 tryAdvSIMDModImm32(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder, Inv)) ||
5754 (NewOp =
5755 tryAdvSIMDModImm321s(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder, Inv)) ||
5756 (NewOp = tryAdvSIMDModImm16(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder, Inv)))
5757 return NewOp;
5758 return nullptr;
5759 };
5760
5761 if (auto *NewOp = TryMOVIWithBits(DefBits))
5762 return NewOp;
5763
5764 // See if a fneg of the constant can be materialized with a MOVI, etc
5765 auto TryWithFNeg = [&](APInt DefBits, int NumBits,
5766 unsigned NegOpc) -> MachineInstr * {
5767 // FNegate each sub-element of the constant
5768 APInt Neg = APInt::getHighBitsSet(numBits: NumBits, hiBitsSet: 1).zext(width: DstSize);
5769 APInt NegBits(DstSize, 0);
5770 unsigned NumElts = DstSize / NumBits;
5771 for (unsigned i = 0; i < NumElts; i++)
5772 NegBits |= Neg << (NumBits * i);
5773 NegBits = DefBits ^ NegBits;
5774
5775 // Try to create the new constants with MOVI, and if so generate a fneg
5776 // for it.
5777 if (auto *NewOp = TryMOVIWithBits(NegBits)) {
5778 Register NewDst = MRI.createVirtualRegister(
5779 RegClass: DstSize == 64 ? &AArch64::FPR64RegClass : &AArch64::FPR128RegClass);
5780 NewOp->getOperand(i: 0).setReg(NewDst);
5781 return MIRBuilder.buildInstr(Opc: NegOpc, DstOps: {Dst}, SrcOps: {NewDst});
5782 }
5783 return nullptr;
5784 };
5785 MachineInstr *R;
5786 if ((R = TryWithFNeg(DefBits, 32,
5787 DstSize == 64 ? AArch64::FNEGv2f32
5788 : AArch64::FNEGv4f32)) ||
5789 (R = TryWithFNeg(DefBits, 64,
5790 DstSize == 64 ? AArch64::FNEGDr
5791 : AArch64::FNEGv2f64)) ||
5792 (STI.hasFullFP16() &&
5793 (R = TryWithFNeg(DefBits, 16,
5794 DstSize == 64 ? AArch64::FNEGv4f16
5795 : AArch64::FNEGv8f16))))
5796 return R;
5797 }
5798
5799 auto *CPLoad = emitLoadFromConstantPool(CPVal: CV, MIRBuilder);
5800 if (!CPLoad) {
5801 LLVM_DEBUG(dbgs() << "Could not generate cp load for constant vector!");
5802 return nullptr;
5803 }
5804
5805 auto Copy = MIRBuilder.buildCopy(Res: Dst, Op: CPLoad->getOperand(i: 0));
5806 RBI.constrainGenericRegister(
5807 Reg: Dst, RC: *MRI.getRegClass(Reg: CPLoad->getOperand(i: 0).getReg()), MRI);
5808 return &*Copy;
5809}
5810
5811bool AArch64InstructionSelector::tryOptConstantBuildVec(
5812 MachineInstr &I, LLT DstTy, MachineRegisterInfo &MRI) {
5813 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
5814 unsigned DstSize = DstTy.getSizeInBits();
5815 assert(DstSize <= 128 && "Unexpected build_vec type!");
5816 if (DstSize < 32)
5817 return false;
5818 // Check if we're building a constant vector, in which case we want to
5819 // generate a constant pool load instead of a vector insert sequence.
5820 SmallVector<Constant *, 16> Csts;
5821 for (unsigned Idx = 1; Idx < I.getNumOperands(); ++Idx) {
5822 // Try to find G_CONSTANT or G_FCONSTANT
5823 auto *OpMI =
5824 getOpcodeDef(Opcode: TargetOpcode::G_CONSTANT, Reg: I.getOperand(i: Idx).getReg(), MRI);
5825 if (OpMI)
5826 Csts.emplace_back(
5827 Args: const_cast<ConstantInt *>(OpMI->getOperand(i: 1).getCImm()));
5828 else if ((OpMI = getOpcodeDef(Opcode: TargetOpcode::G_FCONSTANT,
5829 Reg: I.getOperand(i: Idx).getReg(), MRI)))
5830 Csts.emplace_back(
5831 Args: const_cast<ConstantFP *>(OpMI->getOperand(i: 1).getFPImm()));
5832 else
5833 return false;
5834 }
5835 Constant *CV = ConstantVector::get(V: Csts);
5836 if (!emitConstantVector(Dst: I.getOperand(i: 0).getReg(), CV, MIRBuilder&: MIB, MRI))
5837 return false;
5838 I.eraseFromParent();
5839 return true;
5840}
5841
5842bool AArch64InstructionSelector::tryOptBuildVecToSubregToReg(
5843 MachineInstr &I, MachineRegisterInfo &MRI) {
5844 // Given:
5845 // %vec = G_BUILD_VECTOR %elt, %undef, %undef, ... %undef
5846 //
5847 // Select the G_BUILD_VECTOR as a SUBREG_TO_REG from %elt.
5848 Register Dst = I.getOperand(i: 0).getReg();
5849 Register EltReg = I.getOperand(i: 1).getReg();
5850 LLT EltTy = MRI.getType(Reg: EltReg);
5851 // If the index isn't on the same bank as its elements, then this can't be a
5852 // SUBREG_TO_REG.
5853 const RegisterBank &EltRB = *RBI.getRegBank(Reg: EltReg, MRI, TRI);
5854 const RegisterBank &DstRB = *RBI.getRegBank(Reg: Dst, MRI, TRI);
5855 if (EltRB != DstRB)
5856 return false;
5857 if (any_of(Range: drop_begin(RangeOrContainer: I.operands(), N: 2), P: [&MRI](const MachineOperand &Op) {
5858 return !getOpcodeDef(Opcode: TargetOpcode::G_IMPLICIT_DEF, Reg: Op.getReg(), MRI);
5859 }))
5860 return false;
5861 unsigned SubReg;
5862 const TargetRegisterClass *EltRC = getRegClassForTypeOnBank(Ty: EltTy, RB: EltRB);
5863 if (!EltRC)
5864 return false;
5865 const TargetRegisterClass *DstRC =
5866 getRegClassForTypeOnBank(Ty: MRI.getType(Reg: Dst), RB: DstRB);
5867 if (!DstRC)
5868 return false;
5869 if (!getSubRegForClass(RC: EltRC, TRI, SubReg))
5870 return false;
5871 auto SubregToReg = MIB.buildInstr(Opc: AArch64::SUBREG_TO_REG, DstOps: {Dst}, SrcOps: {})
5872 .addUse(RegNo: EltReg)
5873 .addImm(Val: SubReg);
5874 I.eraseFromParent();
5875 constrainSelectedInstRegOperands(I&: *SubregToReg, TII, TRI, RBI);
5876 return RBI.constrainGenericRegister(Reg: Dst, RC: *DstRC, MRI);
5877}
5878
5879bool AArch64InstructionSelector::selectBuildVector(MachineInstr &I,
5880 MachineRegisterInfo &MRI) {
5881 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
5882 // Until we port more of the optimized selections, for now just use a vector
5883 // insert sequence.
5884 const LLT DstTy = MRI.getType(Reg: I.getOperand(i: 0).getReg());
5885 const LLT EltTy = MRI.getType(Reg: I.getOperand(i: 1).getReg());
5886 unsigned EltSize = EltTy.getSizeInBits();
5887
5888 if (tryOptConstantBuildVec(I, DstTy, MRI))
5889 return true;
5890 if (tryOptBuildVecToSubregToReg(I, MRI))
5891 return true;
5892
5893 if (EltSize != 8 && EltSize != 16 && EltSize != 32 && EltSize != 64)
5894 return false; // Don't support all element types yet.
5895 const RegisterBank &RB = *RBI.getRegBank(Reg: I.getOperand(i: 1).getReg(), MRI, TRI);
5896
5897 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
5898 MachineInstr *ScalarToVec =
5899 emitScalarToVector(EltSize: DstTy.getElementType().getSizeInBits(), DstRC,
5900 Scalar: I.getOperand(i: 1).getReg(), MIRBuilder&: MIB);
5901 if (!ScalarToVec)
5902 return false;
5903
5904 Register DstVec = ScalarToVec->getOperand(i: 0).getReg();
5905 unsigned DstSize = DstTy.getSizeInBits();
5906
5907 // Keep track of the last MI we inserted. Later on, we might be able to save
5908 // a copy using it.
5909 MachineInstr *PrevMI = ScalarToVec;
5910 for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
5911 // Note that if we don't do a subregister copy, we can end up making an
5912 // extra register.
5913 Register OpReg = I.getOperand(i).getReg();
5914 // Do not emit inserts for undefs
5915 if (!getOpcodeDef<GImplicitDef>(Reg: OpReg, MRI)) {
5916 PrevMI = &*emitLaneInsert(DstReg: std::nullopt, SrcReg: DstVec, EltReg: OpReg, LaneIdx: i - 1, RB, MIRBuilder&: MIB);
5917 DstVec = PrevMI->getOperand(i: 0).getReg();
5918 }
5919 }
5920
5921 // If DstTy's size in bits is less than 128, then emit a subregister copy
5922 // from DstVec to the last register we've defined.
5923 if (DstSize < 128) {
5924 // Force this to be FPR using the destination vector.
5925 const TargetRegisterClass *RC =
5926 getRegClassForTypeOnBank(Ty: DstTy, RB: *RBI.getRegBank(Reg: DstVec, MRI, TRI));
5927 if (!RC)
5928 return false;
5929 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
5930 LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
5931 return false;
5932 }
5933
5934 unsigned SubReg = 0;
5935 if (!getSubRegForClass(RC, TRI, SubReg))
5936 return false;
5937 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
5938 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize
5939 << "\n");
5940 return false;
5941 }
5942
5943 Register Reg = MRI.createVirtualRegister(RegClass: RC);
5944 Register DstReg = I.getOperand(i: 0).getReg();
5945
5946 MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {DstReg}, SrcOps: {}).addReg(RegNo: DstVec, Flags: {}, SubReg);
5947 MachineOperand &RegOp = I.getOperand(i: 1);
5948 RegOp.setReg(Reg);
5949 RBI.constrainGenericRegister(Reg: DstReg, RC: *RC, MRI);
5950 } else {
5951 // We either have a vector with all elements (except the first one) undef or
5952 // at least one non-undef non-first element. In the first case, we need to
5953 // constrain the output register ourselves as we may have generated an
5954 // INSERT_SUBREG operation which is a generic operation for which the
5955 // output regclass cannot be automatically chosen.
5956 //
5957 // In the second case, there is no need to do this as it may generate an
5958 // instruction like INSvi32gpr where the regclass can be automatically
5959 // chosen.
5960 //
5961 // Also, we save a copy by re-using the destination register on the final
5962 // insert.
5963 PrevMI->getOperand(i: 0).setReg(I.getOperand(i: 0).getReg());
5964 constrainSelectedInstRegOperands(I&: *PrevMI, TII, TRI, RBI);
5965
5966 Register DstReg = PrevMI->getOperand(i: 0).getReg();
5967 if (PrevMI == ScalarToVec && DstReg.isVirtual()) {
5968 const TargetRegisterClass *RC =
5969 getRegClassForTypeOnBank(Ty: DstTy, RB: *RBI.getRegBank(Reg: DstVec, MRI, TRI));
5970 RBI.constrainGenericRegister(Reg: DstReg, RC: *RC, MRI);
5971 }
5972 }
5973
5974 I.eraseFromParent();
5975 return true;
5976}
5977
5978bool AArch64InstructionSelector::selectVectorLoadIntrinsic(unsigned Opc,
5979 unsigned NumVecs,
5980 MachineInstr &I) {
5981 assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
5982 assert(Opc && "Expected an opcode?");
5983 assert(NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors");
5984 auto &MRI = *MIB.getMRI();
5985 LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg());
5986 unsigned Size = Ty.getSizeInBits();
5987 assert((Size == 64 || Size == 128) &&
5988 "Destination must be 64 bits or 128 bits?");
5989 unsigned SubReg = Size == 64 ? AArch64::dsub0 : AArch64::qsub0;
5990 auto Ptr = I.getOperand(i: I.getNumOperands() - 1).getReg();
5991 assert(MRI.getType(Ptr).isPointer() && "Expected a pointer type?");
5992 auto Load = MIB.buildInstr(Opc, DstOps: {Ty}, SrcOps: {Ptr});
5993 Load.cloneMemRefs(OtherMI: I);
5994 constrainSelectedInstRegOperands(I&: *Load, TII, TRI, RBI);
5995 Register SelectedLoadDst = Load->getOperand(i: 0).getReg();
5996 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
5997 auto Vec = MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {I.getOperand(i: Idx)}, SrcOps: {})
5998 .addReg(RegNo: SelectedLoadDst, Flags: {}, SubReg: SubReg + Idx);
5999 // Emit the subreg copies and immediately select them.
6000 // FIXME: We should refactor our copy code into an emitCopy helper and
6001 // clean up uses of this pattern elsewhere in the selector.
6002 selectCopy(I&: *Vec, TII, MRI, TRI, RBI);
6003 }
6004 return true;
6005}
6006
6007bool AArch64InstructionSelector::selectVectorLoadLaneIntrinsic(
6008 unsigned Opc, unsigned NumVecs, MachineInstr &I) {
6009 assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
6010 assert(Opc && "Expected an opcode?");
6011 assert(NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors");
6012 auto &MRI = *MIB.getMRI();
6013 LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg());
6014 bool Narrow = Ty.getSizeInBits() == 64;
6015
6016 auto FirstSrcRegIt = I.operands_begin() + NumVecs + 1;
6017 SmallVector<Register, 4> Regs(NumVecs);
6018 std::transform(first: FirstSrcRegIt, last: FirstSrcRegIt + NumVecs, result: Regs.begin(),
6019 unary_op: [](auto MO) { return MO.getReg(); });
6020
6021 if (Narrow) {
6022 transform(Range&: Regs, d_first: Regs.begin(), F: [this](Register Reg) {
6023 return emitScalarToVector(EltSize: 64, DstRC: &AArch64::FPR128RegClass, Scalar: Reg, MIRBuilder&: MIB)
6024 ->getOperand(i: 0)
6025 .getReg();
6026 });
6027 Ty = Ty.multiplyElements(Factor: 2);
6028 }
6029
6030 Register Tuple = createQTuple(Regs, MIB);
6031 auto LaneNo = getIConstantVRegVal(VReg: (FirstSrcRegIt + NumVecs)->getReg(), MRI);
6032 if (!LaneNo)
6033 return false;
6034
6035 Register Ptr = (FirstSrcRegIt + NumVecs + 1)->getReg();
6036 auto Load = MIB.buildInstr(Opc, DstOps: {Ty}, SrcOps: {})
6037 .addReg(RegNo: Tuple)
6038 .addImm(Val: LaneNo->getZExtValue())
6039 .addReg(RegNo: Ptr);
6040 Load.cloneMemRefs(OtherMI: I);
6041 constrainSelectedInstRegOperands(I&: *Load, TII, TRI, RBI);
6042 Register SelectedLoadDst = Load->getOperand(i: 0).getReg();
6043 unsigned SubReg = AArch64::qsub0;
6044 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
6045 auto Vec = MIB.buildInstr(Opc: TargetOpcode::COPY,
6046 DstOps: {Narrow ? DstOp(&AArch64::FPR128RegClass)
6047 : DstOp(I.getOperand(i: Idx).getReg())},
6048 SrcOps: {})
6049 .addReg(RegNo: SelectedLoadDst, Flags: {}, SubReg: SubReg + Idx);
6050 Register WideReg = Vec.getReg(Idx: 0);
6051 // Emit the subreg copies and immediately select them.
6052 selectCopy(I&: *Vec, TII, MRI, TRI, RBI);
6053 if (Narrow &&
6054 !emitNarrowVector(DstReg: I.getOperand(i: Idx).getReg(), SrcReg: WideReg, MIB, MRI))
6055 return false;
6056 }
6057 return true;
6058}
6059
6060void AArch64InstructionSelector::selectVectorStoreIntrinsic(MachineInstr &I,
6061 unsigned NumVecs,
6062 unsigned Opc) {
6063 MachineRegisterInfo &MRI = I.getParent()->getParent()->getRegInfo();
6064 LLT Ty = MRI.getType(Reg: I.getOperand(i: 1).getReg());
6065 Register Ptr = I.getOperand(i: 1 + NumVecs).getReg();
6066
6067 SmallVector<Register, 2> Regs(NumVecs);
6068 std::transform(first: I.operands_begin() + 1, last: I.operands_begin() + 1 + NumVecs,
6069 result: Regs.begin(), unary_op: [](auto MO) { return MO.getReg(); });
6070
6071 Register Tuple = Ty.getSizeInBits() == 128 ? createQTuple(Regs, MIB)
6072 : createDTuple(Regs, MIB);
6073 auto Store = MIB.buildInstr(Opc, DstOps: {}, SrcOps: {Tuple, Ptr});
6074 Store.cloneMemRefs(OtherMI: I);
6075 constrainSelectedInstRegOperands(I&: *Store, TII, TRI, RBI);
6076}
6077
6078bool AArch64InstructionSelector::selectVectorStoreLaneIntrinsic(
6079 MachineInstr &I, unsigned NumVecs, unsigned Opc) {
6080 MachineRegisterInfo &MRI = I.getParent()->getParent()->getRegInfo();
6081 LLT Ty = MRI.getType(Reg: I.getOperand(i: 1).getReg());
6082 bool Narrow = Ty.getSizeInBits() == 64;
6083
6084 SmallVector<Register, 2> Regs(NumVecs);
6085 std::transform(first: I.operands_begin() + 1, last: I.operands_begin() + 1 + NumVecs,
6086 result: Regs.begin(), unary_op: [](auto MO) { return MO.getReg(); });
6087
6088 if (Narrow)
6089 transform(Range&: Regs, d_first: Regs.begin(), F: [this](Register Reg) {
6090 return emitScalarToVector(EltSize: 64, DstRC: &AArch64::FPR128RegClass, Scalar: Reg, MIRBuilder&: MIB)
6091 ->getOperand(i: 0)
6092 .getReg();
6093 });
6094
6095 Register Tuple = createQTuple(Regs, MIB);
6096
6097 auto LaneNo = getIConstantVRegVal(VReg: I.getOperand(i: 1 + NumVecs).getReg(), MRI);
6098 if (!LaneNo)
6099 return false;
6100 Register Ptr = I.getOperand(i: 1 + NumVecs + 1).getReg();
6101 auto Store = MIB.buildInstr(Opc, DstOps: {}, SrcOps: {})
6102 .addReg(RegNo: Tuple)
6103 .addImm(Val: LaneNo->getZExtValue())
6104 .addReg(RegNo: Ptr);
6105 Store.cloneMemRefs(OtherMI: I);
6106 constrainSelectedInstRegOperands(I&: *Store, TII, TRI, RBI);
6107 return true;
6108}
6109
6110bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
6111 MachineInstr &I, MachineRegisterInfo &MRI) {
6112 // Find the intrinsic ID.
6113 unsigned IntrinID = cast<GIntrinsic>(Val&: I).getIntrinsicID();
6114
6115 const LLT S8 = LLT::scalar(SizeInBits: 8);
6116 const LLT S16 = LLT::scalar(SizeInBits: 16);
6117 const LLT S32 = LLT::scalar(SizeInBits: 32);
6118 const LLT S64 = LLT::scalar(SizeInBits: 64);
6119 const LLT P0 = LLT::pointer(AddressSpace: 0, SizeInBits: 64);
6120 // Select the instruction.
6121 switch (IntrinID) {
6122 default:
6123 return false;
6124 case Intrinsic::aarch64_ldxp:
6125 case Intrinsic::aarch64_ldaxp: {
6126 auto NewI = MIB.buildInstr(
6127 Opc: IntrinID == Intrinsic::aarch64_ldxp ? AArch64::LDXPX : AArch64::LDAXPX,
6128 DstOps: {I.getOperand(i: 0).getReg(), I.getOperand(i: 1).getReg()},
6129 SrcOps: {I.getOperand(i: 3)});
6130 NewI.cloneMemRefs(OtherMI: I);
6131 constrainSelectedInstRegOperands(I&: *NewI, TII, TRI, RBI);
6132 break;
6133 }
6134 case Intrinsic::aarch64_neon_ld1x2: {
6135 LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg());
6136 unsigned Opc = 0;
6137 if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8))
6138 Opc = AArch64::LD1Twov8b;
6139 else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8))
6140 Opc = AArch64::LD1Twov16b;
6141 else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16))
6142 Opc = AArch64::LD1Twov4h;
6143 else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16))
6144 Opc = AArch64::LD1Twov8h;
6145 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32))
6146 Opc = AArch64::LD1Twov2s;
6147 else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32))
6148 Opc = AArch64::LD1Twov4s;
6149 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0))
6150 Opc = AArch64::LD1Twov2d;
6151 else if (Ty == S64 || Ty == P0)
6152 Opc = AArch64::LD1Twov1d;
6153 else
6154 llvm_unreachable("Unexpected type for ld1x2!");
6155 selectVectorLoadIntrinsic(Opc, NumVecs: 2, I);
6156 break;
6157 }
6158 case Intrinsic::aarch64_neon_ld1x3: {
6159 LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg());
6160 unsigned Opc = 0;
6161 if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8))
6162 Opc = AArch64::LD1Threev8b;
6163 else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8))
6164 Opc = AArch64::LD1Threev16b;
6165 else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16))
6166 Opc = AArch64::LD1Threev4h;
6167 else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16))
6168 Opc = AArch64::LD1Threev8h;
6169 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32))
6170 Opc = AArch64::LD1Threev2s;
6171 else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32))
6172 Opc = AArch64::LD1Threev4s;
6173 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0))
6174 Opc = AArch64::LD1Threev2d;
6175 else if (Ty == S64 || Ty == P0)
6176 Opc = AArch64::LD1Threev1d;
6177 else
6178 llvm_unreachable("Unexpected type for ld1x3!");
6179 selectVectorLoadIntrinsic(Opc, NumVecs: 3, I);
6180 break;
6181 }
6182 case Intrinsic::aarch64_neon_ld1x4: {
6183 LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg());
6184 unsigned Opc = 0;
6185 if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8))
6186 Opc = AArch64::LD1Fourv8b;
6187 else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8))
6188 Opc = AArch64::LD1Fourv16b;
6189 else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16))
6190 Opc = AArch64::LD1Fourv4h;
6191 else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16))
6192 Opc = AArch64::LD1Fourv8h;
6193 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32))
6194 Opc = AArch64::LD1Fourv2s;
6195 else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32))
6196 Opc = AArch64::LD1Fourv4s;
6197 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0))
6198 Opc = AArch64::LD1Fourv2d;
6199 else if (Ty == S64 || Ty == P0)
6200 Opc = AArch64::LD1Fourv1d;
6201 else
6202 llvm_unreachable("Unexpected type for ld1x4!");
6203 selectVectorLoadIntrinsic(Opc, NumVecs: 4, I);
6204 break;
6205 }
6206 case Intrinsic::aarch64_neon_ld2: {
6207 LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg());
6208 unsigned Opc = 0;
6209 if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8))
6210 Opc = AArch64::LD2Twov8b;
6211 else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8))
6212 Opc = AArch64::LD2Twov16b;
6213 else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16))
6214 Opc = AArch64::LD2Twov4h;
6215 else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16))
6216 Opc = AArch64::LD2Twov8h;
6217 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32))
6218 Opc = AArch64::LD2Twov2s;
6219 else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32))
6220 Opc = AArch64::LD2Twov4s;
6221 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0))
6222 Opc = AArch64::LD2Twov2d;
6223 else if (Ty == S64 || Ty == P0)
6224 Opc = AArch64::LD1Twov1d;
6225 else
6226 llvm_unreachable("Unexpected type for ld2!");
6227 selectVectorLoadIntrinsic(Opc, NumVecs: 2, I);
6228 break;
6229 }
6230 case Intrinsic::aarch64_neon_ld2lane: {
6231 LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg());
6232 unsigned Opc;
6233 if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8) || Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8))
6234 Opc = AArch64::LD2i8;
6235 else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16) || Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16))
6236 Opc = AArch64::LD2i16;
6237 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32) || Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32))
6238 Opc = AArch64::LD2i32;
6239 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) ||
6240 Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0) || Ty == S64 || Ty == P0)
6241 Opc = AArch64::LD2i64;
6242 else
6243 llvm_unreachable("Unexpected type for st2lane!");
6244 if (!selectVectorLoadLaneIntrinsic(Opc, NumVecs: 2, I))
6245 return false;
6246 break;
6247 }
6248 case Intrinsic::aarch64_neon_ld2r: {
6249 LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg());
6250 unsigned Opc = 0;
6251 if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8))
6252 Opc = AArch64::LD2Rv8b;
6253 else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8))
6254 Opc = AArch64::LD2Rv16b;
6255 else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16))
6256 Opc = AArch64::LD2Rv4h;
6257 else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16))
6258 Opc = AArch64::LD2Rv8h;
6259 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32))
6260 Opc = AArch64::LD2Rv2s;
6261 else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32))
6262 Opc = AArch64::LD2Rv4s;
6263 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0))
6264 Opc = AArch64::LD2Rv2d;
6265 else if (Ty == S64 || Ty == P0)
6266 Opc = AArch64::LD2Rv1d;
6267 else
6268 llvm_unreachable("Unexpected type for ld2r!");
6269 selectVectorLoadIntrinsic(Opc, NumVecs: 2, I);
6270 break;
6271 }
6272 case Intrinsic::aarch64_neon_ld3: {
6273 LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg());
6274 unsigned Opc = 0;
6275 if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8))
6276 Opc = AArch64::LD3Threev8b;
6277 else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8))
6278 Opc = AArch64::LD3Threev16b;
6279 else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16))
6280 Opc = AArch64::LD3Threev4h;
6281 else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16))
6282 Opc = AArch64::LD3Threev8h;
6283 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32))
6284 Opc = AArch64::LD3Threev2s;
6285 else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32))
6286 Opc = AArch64::LD3Threev4s;
6287 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0))
6288 Opc = AArch64::LD3Threev2d;
6289 else if (Ty == S64 || Ty == P0)
6290 Opc = AArch64::LD1Threev1d;
6291 else
6292 llvm_unreachable("Unexpected type for ld3!");
6293 selectVectorLoadIntrinsic(Opc, NumVecs: 3, I);
6294 break;
6295 }
6296 case Intrinsic::aarch64_neon_ld3lane: {
6297 LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg());
6298 unsigned Opc;
6299 if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8) || Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8))
6300 Opc = AArch64::LD3i8;
6301 else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16) || Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16))
6302 Opc = AArch64::LD3i16;
6303 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32) || Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32))
6304 Opc = AArch64::LD3i32;
6305 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) ||
6306 Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0) || Ty == S64 || Ty == P0)
6307 Opc = AArch64::LD3i64;
6308 else
6309 llvm_unreachable("Unexpected type for st3lane!");
6310 if (!selectVectorLoadLaneIntrinsic(Opc, NumVecs: 3, I))
6311 return false;
6312 break;
6313 }
6314 case Intrinsic::aarch64_neon_ld3r: {
6315 LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg());
6316 unsigned Opc = 0;
6317 if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8))
6318 Opc = AArch64::LD3Rv8b;
6319 else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8))
6320 Opc = AArch64::LD3Rv16b;
6321 else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16))
6322 Opc = AArch64::LD3Rv4h;
6323 else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16))
6324 Opc = AArch64::LD3Rv8h;
6325 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32))
6326 Opc = AArch64::LD3Rv2s;
6327 else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32))
6328 Opc = AArch64::LD3Rv4s;
6329 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0))
6330 Opc = AArch64::LD3Rv2d;
6331 else if (Ty == S64 || Ty == P0)
6332 Opc = AArch64::LD3Rv1d;
6333 else
6334 llvm_unreachable("Unexpected type for ld3r!");
6335 selectVectorLoadIntrinsic(Opc, NumVecs: 3, I);
6336 break;
6337 }
6338 case Intrinsic::aarch64_neon_ld4: {
6339 LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg());
6340 unsigned Opc = 0;
6341 if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8))
6342 Opc = AArch64::LD4Fourv8b;
6343 else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8))
6344 Opc = AArch64::LD4Fourv16b;
6345 else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16))
6346 Opc = AArch64::LD4Fourv4h;
6347 else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16))
6348 Opc = AArch64::LD4Fourv8h;
6349 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32))
6350 Opc = AArch64::LD4Fourv2s;
6351 else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32))
6352 Opc = AArch64::LD4Fourv4s;
6353 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0))
6354 Opc = AArch64::LD4Fourv2d;
6355 else if (Ty == S64 || Ty == P0)
6356 Opc = AArch64::LD1Fourv1d;
6357 else
6358 llvm_unreachable("Unexpected type for ld4!");
6359 selectVectorLoadIntrinsic(Opc, NumVecs: 4, I);
6360 break;
6361 }
6362 case Intrinsic::aarch64_neon_ld4lane: {
6363 LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg());
6364 unsigned Opc;
6365 if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8) || Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8))
6366 Opc = AArch64::LD4i8;
6367 else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16) || Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16))
6368 Opc = AArch64::LD4i16;
6369 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32) || Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32))
6370 Opc = AArch64::LD4i32;
6371 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) ||
6372 Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0) || Ty == S64 || Ty == P0)
6373 Opc = AArch64::LD4i64;
6374 else
6375 llvm_unreachable("Unexpected type for st4lane!");
6376 if (!selectVectorLoadLaneIntrinsic(Opc, NumVecs: 4, I))
6377 return false;
6378 break;
6379 }
6380 case Intrinsic::aarch64_neon_ld4r: {
6381 LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg());
6382 unsigned Opc = 0;
6383 if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8))
6384 Opc = AArch64::LD4Rv8b;
6385 else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8))
6386 Opc = AArch64::LD4Rv16b;
6387 else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16))
6388 Opc = AArch64::LD4Rv4h;
6389 else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16))
6390 Opc = AArch64::LD4Rv8h;
6391 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32))
6392 Opc = AArch64::LD4Rv2s;
6393 else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32))
6394 Opc = AArch64::LD4Rv4s;
6395 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0))
6396 Opc = AArch64::LD4Rv2d;
6397 else if (Ty == S64 || Ty == P0)
6398 Opc = AArch64::LD4Rv1d;
6399 else
6400 llvm_unreachable("Unexpected type for ld4r!");
6401 selectVectorLoadIntrinsic(Opc, NumVecs: 4, I);
6402 break;
6403 }
6404 case Intrinsic::aarch64_neon_st1x2: {
6405 LLT Ty = MRI.getType(Reg: I.getOperand(i: 1).getReg());
6406 unsigned Opc;
6407 if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8))
6408 Opc = AArch64::ST1Twov8b;
6409 else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8))
6410 Opc = AArch64::ST1Twov16b;
6411 else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16))
6412 Opc = AArch64::ST1Twov4h;
6413 else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16))
6414 Opc = AArch64::ST1Twov8h;
6415 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32))
6416 Opc = AArch64::ST1Twov2s;
6417 else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32))
6418 Opc = AArch64::ST1Twov4s;
6419 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0))
6420 Opc = AArch64::ST1Twov2d;
6421 else if (Ty == S64 || Ty == P0)
6422 Opc = AArch64::ST1Twov1d;
6423 else
6424 llvm_unreachable("Unexpected type for st1x2!");
6425 selectVectorStoreIntrinsic(I, NumVecs: 2, Opc);
6426 break;
6427 }
6428 case Intrinsic::aarch64_neon_st1x3: {
6429 LLT Ty = MRI.getType(Reg: I.getOperand(i: 1).getReg());
6430 unsigned Opc;
6431 if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8))
6432 Opc = AArch64::ST1Threev8b;
6433 else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8))
6434 Opc = AArch64::ST1Threev16b;
6435 else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16))
6436 Opc = AArch64::ST1Threev4h;
6437 else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16))
6438 Opc = AArch64::ST1Threev8h;
6439 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32))
6440 Opc = AArch64::ST1Threev2s;
6441 else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32))
6442 Opc = AArch64::ST1Threev4s;
6443 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0))
6444 Opc = AArch64::ST1Threev2d;
6445 else if (Ty == S64 || Ty == P0)
6446 Opc = AArch64::ST1Threev1d;
6447 else
6448 llvm_unreachable("Unexpected type for st1x3!");
6449 selectVectorStoreIntrinsic(I, NumVecs: 3, Opc);
6450 break;
6451 }
6452 case Intrinsic::aarch64_neon_st1x4: {
6453 LLT Ty = MRI.getType(Reg: I.getOperand(i: 1).getReg());
6454 unsigned Opc;
6455 if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8))
6456 Opc = AArch64::ST1Fourv8b;
6457 else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8))
6458 Opc = AArch64::ST1Fourv16b;
6459 else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16))
6460 Opc = AArch64::ST1Fourv4h;
6461 else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16))
6462 Opc = AArch64::ST1Fourv8h;
6463 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32))
6464 Opc = AArch64::ST1Fourv2s;
6465 else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32))
6466 Opc = AArch64::ST1Fourv4s;
6467 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0))
6468 Opc = AArch64::ST1Fourv2d;
6469 else if (Ty == S64 || Ty == P0)
6470 Opc = AArch64::ST1Fourv1d;
6471 else
6472 llvm_unreachable("Unexpected type for st1x4!");
6473 selectVectorStoreIntrinsic(I, NumVecs: 4, Opc);
6474 break;
6475 }
6476 case Intrinsic::aarch64_neon_st2: {
6477 LLT Ty = MRI.getType(Reg: I.getOperand(i: 1).getReg());
6478 unsigned Opc;
6479 if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8))
6480 Opc = AArch64::ST2Twov8b;
6481 else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8))
6482 Opc = AArch64::ST2Twov16b;
6483 else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16))
6484 Opc = AArch64::ST2Twov4h;
6485 else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16))
6486 Opc = AArch64::ST2Twov8h;
6487 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32))
6488 Opc = AArch64::ST2Twov2s;
6489 else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32))
6490 Opc = AArch64::ST2Twov4s;
6491 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0))
6492 Opc = AArch64::ST2Twov2d;
6493 else if (Ty == S64 || Ty == P0)
6494 Opc = AArch64::ST1Twov1d;
6495 else
6496 llvm_unreachable("Unexpected type for st2!");
6497 selectVectorStoreIntrinsic(I, NumVecs: 2, Opc);
6498 break;
6499 }
6500 case Intrinsic::aarch64_neon_st3: {
6501 LLT Ty = MRI.getType(Reg: I.getOperand(i: 1).getReg());
6502 unsigned Opc;
6503 if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8))
6504 Opc = AArch64::ST3Threev8b;
6505 else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8))
6506 Opc = AArch64::ST3Threev16b;
6507 else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16))
6508 Opc = AArch64::ST3Threev4h;
6509 else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16))
6510 Opc = AArch64::ST3Threev8h;
6511 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32))
6512 Opc = AArch64::ST3Threev2s;
6513 else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32))
6514 Opc = AArch64::ST3Threev4s;
6515 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0))
6516 Opc = AArch64::ST3Threev2d;
6517 else if (Ty == S64 || Ty == P0)
6518 Opc = AArch64::ST1Threev1d;
6519 else
6520 llvm_unreachable("Unexpected type for st3!");
6521 selectVectorStoreIntrinsic(I, NumVecs: 3, Opc);
6522 break;
6523 }
6524 case Intrinsic::aarch64_neon_st4: {
6525 LLT Ty = MRI.getType(Reg: I.getOperand(i: 1).getReg());
6526 unsigned Opc;
6527 if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8))
6528 Opc = AArch64::ST4Fourv8b;
6529 else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8))
6530 Opc = AArch64::ST4Fourv16b;
6531 else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16))
6532 Opc = AArch64::ST4Fourv4h;
6533 else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16))
6534 Opc = AArch64::ST4Fourv8h;
6535 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32))
6536 Opc = AArch64::ST4Fourv2s;
6537 else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32))
6538 Opc = AArch64::ST4Fourv4s;
6539 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0))
6540 Opc = AArch64::ST4Fourv2d;
6541 else if (Ty == S64 || Ty == P0)
6542 Opc = AArch64::ST1Fourv1d;
6543 else
6544 llvm_unreachable("Unexpected type for st4!");
6545 selectVectorStoreIntrinsic(I, NumVecs: 4, Opc);
6546 break;
6547 }
6548 case Intrinsic::aarch64_neon_st2lane: {
6549 LLT Ty = MRI.getType(Reg: I.getOperand(i: 1).getReg());
6550 unsigned Opc;
6551 if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8) || Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8))
6552 Opc = AArch64::ST2i8;
6553 else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16) || Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16))
6554 Opc = AArch64::ST2i16;
6555 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32) || Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32))
6556 Opc = AArch64::ST2i32;
6557 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) ||
6558 Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0) || Ty == S64 || Ty == P0)
6559 Opc = AArch64::ST2i64;
6560 else
6561 llvm_unreachable("Unexpected type for st2lane!");
6562 if (!selectVectorStoreLaneIntrinsic(I, NumVecs: 2, Opc))
6563 return false;
6564 break;
6565 }
6566 case Intrinsic::aarch64_neon_st3lane: {
6567 LLT Ty = MRI.getType(Reg: I.getOperand(i: 1).getReg());
6568 unsigned Opc;
6569 if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8) || Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8))
6570 Opc = AArch64::ST3i8;
6571 else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16) || Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16))
6572 Opc = AArch64::ST3i16;
6573 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32) || Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32))
6574 Opc = AArch64::ST3i32;
6575 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) ||
6576 Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0) || Ty == S64 || Ty == P0)
6577 Opc = AArch64::ST3i64;
6578 else
6579 llvm_unreachable("Unexpected type for st3lane!");
6580 if (!selectVectorStoreLaneIntrinsic(I, NumVecs: 3, Opc))
6581 return false;
6582 break;
6583 }
6584 case Intrinsic::aarch64_neon_st4lane: {
6585 LLT Ty = MRI.getType(Reg: I.getOperand(i: 1).getReg());
6586 unsigned Opc;
6587 if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8) || Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8))
6588 Opc = AArch64::ST4i8;
6589 else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16) || Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16))
6590 Opc = AArch64::ST4i16;
6591 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32) || Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32))
6592 Opc = AArch64::ST4i32;
6593 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) ||
6594 Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0) || Ty == S64 || Ty == P0)
6595 Opc = AArch64::ST4i64;
6596 else
6597 llvm_unreachable("Unexpected type for st4lane!");
6598 if (!selectVectorStoreLaneIntrinsic(I, NumVecs: 4, Opc))
6599 return false;
6600 break;
6601 }
6602 case Intrinsic::aarch64_mops_memset_tag: {
6603 // Transform
6604 // %dst:gpr(p0) = \
6605 // G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.mops.memset.tag),
6606 // \ %dst:gpr(p0), %val:gpr(s64), %n:gpr(s64)
6607 // where %dst is updated, into
6608 // %Rd:GPR64common, %Rn:GPR64) = \
6609 // MOPSMemorySetTaggingPseudo \
6610 // %Rd:GPR64common, %Rn:GPR64, %Rm:GPR64
6611 // where Rd and Rn are tied.
6612 // It is expected that %val has been extended to s64 in legalization.
6613 // Note that the order of the size/value operands are swapped.
6614
6615 Register DstDef = I.getOperand(i: 0).getReg();
6616 // I.getOperand(1) is the intrinsic function
6617 Register DstUse = I.getOperand(i: 2).getReg();
6618 Register ValUse = I.getOperand(i: 3).getReg();
6619 Register SizeUse = I.getOperand(i: 4).getReg();
6620
6621 // MOPSMemorySetTaggingPseudo has two defs; the intrinsic call has only one.
6622 // Therefore an additional virtual register is required for the updated size
6623 // operand. This value is not accessible via the semantics of the intrinsic.
6624 Register SizeDef = MRI.createGenericVirtualRegister(Ty: LLT::scalar(SizeInBits: 64));
6625
6626 auto Memset = MIB.buildInstr(Opc: AArch64::MOPSMemorySetTaggingPseudo,
6627 DstOps: {DstDef, SizeDef}, SrcOps: {DstUse, SizeUse, ValUse});
6628 Memset.cloneMemRefs(OtherMI: I);
6629 constrainSelectedInstRegOperands(I&: *Memset, TII, TRI, RBI);
6630 break;
6631 }
6632 case Intrinsic::ptrauth_resign_load_relative: {
6633 Register DstReg = I.getOperand(i: 0).getReg();
6634 Register ValReg = I.getOperand(i: 2).getReg();
6635 uint64_t AUTKey = I.getOperand(i: 3).getImm();
6636 Register AUTDisc = I.getOperand(i: 4).getReg();
6637 uint64_t PACKey = I.getOperand(i: 5).getImm();
6638 Register PACDisc = I.getOperand(i: 6).getReg();
6639 int64_t Addend = I.getOperand(i: 7).getImm();
6640
6641 Register AUTAddrDisc = AUTDisc;
6642 uint16_t AUTConstDiscC = 0;
6643 std::tie(args&: AUTConstDiscC, args&: AUTAddrDisc) =
6644 extractPtrauthBlendDiscriminators(Disc: AUTDisc, MRI);
6645
6646 Register PACAddrDisc = PACDisc;
6647 uint16_t PACConstDiscC = 0;
6648 std::tie(args&: PACConstDiscC, args&: PACAddrDisc) =
6649 extractPtrauthBlendDiscriminators(Disc: PACDisc, MRI);
6650
6651 MIB.buildCopy(Res: {AArch64::X16}, Op: {ValReg});
6652
6653 MIB.buildInstr(Opcode: AArch64::AUTRELLOADPAC)
6654 .addImm(Val: AUTKey)
6655 .addImm(Val: AUTConstDiscC)
6656 .addUse(RegNo: AUTAddrDisc)
6657 .addImm(Val: PACKey)
6658 .addImm(Val: PACConstDiscC)
6659 .addUse(RegNo: PACAddrDisc)
6660 .addImm(Val: Addend)
6661 .constrainAllUses(TII, TRI, RBI);
6662 MIB.buildCopy(Res: {DstReg}, Op: Register(AArch64::X16));
6663
6664 RBI.constrainGenericRegister(Reg: DstReg, RC: AArch64::GPR64RegClass, MRI);
6665 I.eraseFromParent();
6666 return true;
6667 }
6668 }
6669
6670 I.eraseFromParent();
6671 return true;
6672}
6673
6674bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
6675 MachineRegisterInfo &MRI) {
6676 unsigned IntrinID = cast<GIntrinsic>(Val&: I).getIntrinsicID();
6677
6678 switch (IntrinID) {
6679 default:
6680 break;
6681 case Intrinsic::ptrauth_resign: {
6682 Register DstReg = I.getOperand(i: 0).getReg();
6683 Register ValReg = I.getOperand(i: 2).getReg();
6684 uint64_t AUTKey = I.getOperand(i: 3).getImm();
6685 Register AUTDisc = I.getOperand(i: 4).getReg();
6686 uint64_t PACKey = I.getOperand(i: 5).getImm();
6687 Register PACDisc = I.getOperand(i: 6).getReg();
6688
6689 Register AUTAddrDisc = AUTDisc;
6690 uint16_t AUTConstDiscC = 0;
6691 std::tie(args&: AUTConstDiscC, args&: AUTAddrDisc) =
6692 extractPtrauthBlendDiscriminators(Disc: AUTDisc, MRI);
6693
6694 Register PACAddrDisc = PACDisc;
6695 uint16_t PACConstDiscC = 0;
6696 std::tie(args&: PACConstDiscC, args&: PACAddrDisc) =
6697 extractPtrauthBlendDiscriminators(Disc: PACDisc, MRI);
6698
6699 MIB.buildCopy(Res: {AArch64::X16}, Op: {ValReg});
6700 MIB.buildInstr(Opc: TargetOpcode::IMPLICIT_DEF, DstOps: {AArch64::X17}, SrcOps: {});
6701 MIB.buildInstr(Opcode: AArch64::AUTPAC)
6702 .addImm(Val: AUTKey)
6703 .addImm(Val: AUTConstDiscC)
6704 .addUse(RegNo: AUTAddrDisc)
6705 .addImm(Val: PACKey)
6706 .addImm(Val: PACConstDiscC)
6707 .addUse(RegNo: PACAddrDisc)
6708 .constrainAllUses(TII, TRI, RBI);
6709 MIB.buildCopy(Res: {DstReg}, Op: Register(AArch64::X16));
6710
6711 RBI.constrainGenericRegister(Reg: DstReg, RC: AArch64::GPR64RegClass, MRI);
6712 I.eraseFromParent();
6713 return true;
6714 }
6715 case Intrinsic::ptrauth_auth: {
6716 Register DstReg = I.getOperand(i: 0).getReg();
6717 Register ValReg = I.getOperand(i: 2).getReg();
6718 uint64_t AUTKey = I.getOperand(i: 3).getImm();
6719 Register AUTDisc = I.getOperand(i: 4).getReg();
6720
6721 Register AUTAddrDisc = AUTDisc;
6722 uint16_t AUTConstDiscC = 0;
6723 std::tie(args&: AUTConstDiscC, args&: AUTAddrDisc) =
6724 extractPtrauthBlendDiscriminators(Disc: AUTDisc, MRI);
6725
6726 if (STI.isX16X17Safer()) {
6727 MIB.buildCopy(Res: {AArch64::X16}, Op: {ValReg});
6728 MIB.buildInstr(Opc: TargetOpcode::IMPLICIT_DEF, DstOps: {AArch64::X17}, SrcOps: {});
6729 MIB.buildInstr(Opcode: AArch64::AUTx16x17)
6730 .addImm(Val: AUTKey)
6731 .addImm(Val: AUTConstDiscC)
6732 .addUse(RegNo: AUTAddrDisc)
6733 .constrainAllUses(TII, TRI, RBI);
6734 MIB.buildCopy(Res: {DstReg}, Op: Register(AArch64::X16));
6735 } else {
6736 Register ScratchReg =
6737 MRI.createVirtualRegister(RegClass: &AArch64::GPR64commonRegClass);
6738 MIB.buildInstr(Opcode: AArch64::AUTxMxN)
6739 .addDef(RegNo: DstReg)
6740 .addDef(RegNo: ScratchReg)
6741 .addUse(RegNo: ValReg)
6742 .addImm(Val: AUTKey)
6743 .addImm(Val: AUTConstDiscC)
6744 .addUse(RegNo: AUTAddrDisc)
6745 .constrainAllUses(TII, TRI, RBI);
6746 }
6747
6748 RBI.constrainGenericRegister(Reg: DstReg, RC: AArch64::GPR64RegClass, MRI);
6749 I.eraseFromParent();
6750 return true;
6751 }
6752 case Intrinsic::frameaddress:
6753 case Intrinsic::returnaddress: {
6754 MachineFunction &MF = *I.getParent()->getParent();
6755 MachineFrameInfo &MFI = MF.getFrameInfo();
6756
6757 unsigned Depth = I.getOperand(i: 2).getImm();
6758 Register DstReg = I.getOperand(i: 0).getReg();
6759 RBI.constrainGenericRegister(Reg: DstReg, RC: AArch64::GPR64RegClass, MRI);
6760
6761 if (Depth == 0 && IntrinID == Intrinsic::returnaddress) {
6762 if (!MFReturnAddr) {
6763 // Insert the copy from LR/X30 into the entry block, before it can be
6764 // clobbered by anything.
6765 MFI.setReturnAddressIsTaken(true);
6766 MFReturnAddr = getFunctionLiveInPhysReg(
6767 MF, TII, PhysReg: AArch64::LR, RC: AArch64::GPR64RegClass, DL: I.getDebugLoc());
6768 }
6769
6770 if (STI.hasPAuth()) {
6771 MIB.buildInstr(Opc: AArch64::XPACI, DstOps: {DstReg}, SrcOps: {MFReturnAddr});
6772 } else {
6773 MIB.buildCopy(Res: {Register(AArch64::LR)}, Op: {MFReturnAddr});
6774 MIB.buildInstr(Opcode: AArch64::XPACLRI);
6775 MIB.buildCopy(Res: {DstReg}, Op: {Register(AArch64::LR)});
6776 }
6777
6778 I.eraseFromParent();
6779 return true;
6780 }
6781
6782 MFI.setFrameAddressIsTaken(true);
6783 Register FrameAddr(AArch64::FP);
6784 while (Depth--) {
6785 Register NextFrame = MRI.createVirtualRegister(RegClass: &AArch64::GPR64spRegClass);
6786 auto Ldr =
6787 MIB.buildInstr(Opc: AArch64::LDRXui, DstOps: {NextFrame}, SrcOps: {FrameAddr}).addImm(Val: 0);
6788 constrainSelectedInstRegOperands(I&: *Ldr, TII, TRI, RBI);
6789 FrameAddr = NextFrame;
6790 }
6791
6792 if (IntrinID == Intrinsic::frameaddress)
6793 MIB.buildCopy(Res: {DstReg}, Op: {FrameAddr});
6794 else {
6795 MFI.setReturnAddressIsTaken(true);
6796
6797 if (STI.hasPAuth()) {
6798 Register TmpReg = MRI.createVirtualRegister(RegClass: &AArch64::GPR64RegClass);
6799 MIB.buildInstr(Opc: AArch64::LDRXui, DstOps: {TmpReg}, SrcOps: {FrameAddr}).addImm(Val: 1);
6800 MIB.buildInstr(Opc: AArch64::XPACI, DstOps: {DstReg}, SrcOps: {TmpReg});
6801 } else {
6802 MIB.buildInstr(Opc: AArch64::LDRXui, DstOps: {Register(AArch64::LR)}, SrcOps: {FrameAddr})
6803 .addImm(Val: 1);
6804 MIB.buildInstr(Opcode: AArch64::XPACLRI);
6805 MIB.buildCopy(Res: {DstReg}, Op: {Register(AArch64::LR)});
6806 }
6807 }
6808
6809 I.eraseFromParent();
6810 return true;
6811 }
6812 case Intrinsic::aarch64_neon_tbl2:
6813 SelectTable(I, MRI, NumVecs: 2, Opc1: AArch64::TBLv8i8Two, Opc2: AArch64::TBLv16i8Two, isExt: false);
6814 return true;
6815 case Intrinsic::aarch64_neon_tbl3:
6816 SelectTable(I, MRI, NumVecs: 3, Opc1: AArch64::TBLv8i8Three, Opc2: AArch64::TBLv16i8Three,
6817 isExt: false);
6818 return true;
6819 case Intrinsic::aarch64_neon_tbl4:
6820 SelectTable(I, MRI, NumVecs: 4, Opc1: AArch64::TBLv8i8Four, Opc2: AArch64::TBLv16i8Four, isExt: false);
6821 return true;
6822 case Intrinsic::aarch64_neon_tbx2:
6823 SelectTable(I, MRI, NumVecs: 2, Opc1: AArch64::TBXv8i8Two, Opc2: AArch64::TBXv16i8Two, isExt: true);
6824 return true;
6825 case Intrinsic::aarch64_neon_tbx3:
6826 SelectTable(I, MRI, NumVecs: 3, Opc1: AArch64::TBXv8i8Three, Opc2: AArch64::TBXv16i8Three, isExt: true);
6827 return true;
6828 case Intrinsic::aarch64_neon_tbx4:
6829 SelectTable(I, MRI, NumVecs: 4, Opc1: AArch64::TBXv8i8Four, Opc2: AArch64::TBXv16i8Four, isExt: true);
6830 return true;
6831 case Intrinsic::swift_async_context_addr:
6832 auto Sub = MIB.buildInstr(Opc: AArch64::SUBXri, DstOps: {I.getOperand(i: 0).getReg()},
6833 SrcOps: {Register(AArch64::FP)})
6834 .addImm(Val: 8)
6835 .addImm(Val: 0);
6836 constrainSelectedInstRegOperands(I&: *Sub, TII, TRI, RBI);
6837
6838 MF->getFrameInfo().setFrameAddressIsTaken(true);
6839 MF->getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
6840 I.eraseFromParent();
6841 return true;
6842 }
6843 return false;
6844}
6845
6846// G_PTRAUTH_GLOBAL_VALUE lowering
6847//
6848// We have 3 lowering alternatives to choose from:
6849// - MOVaddrPAC: similar to MOVaddr, with added PAC.
6850// If the GV doesn't need a GOT load (i.e., is locally defined)
6851// materialize the pointer using adrp+add+pac. See LowerMOVaddrPAC.
6852//
6853// - LOADgotPAC: similar to LOADgot, with added PAC.
6854// If the GV needs a GOT load, materialize the pointer using the usual
6855// GOT adrp+ldr, +pac. Pointers in GOT are assumed to be not signed, the GOT
6856// section is assumed to be read-only (for example, via relro mechanism). See
6857// LowerMOVaddrPAC.
6858//
6859// - LOADauthptrstatic: similar to LOADgot, but use a
6860// special stub slot instead of a GOT slot.
6861// Load a signed pointer for symbol 'sym' from a stub slot named
6862// 'sym$auth_ptr$key$disc' filled by dynamic linker during relocation
6863// resolving. This usually lowers to adrp+ldr, but also emits an entry into
6864// .data with an
6865// @AUTH relocation. See LowerLOADauthptrstatic.
6866//
6867// All 3 are pseudos that are expand late to longer sequences: this lets us
6868// provide integrity guarantees on the to-be-signed intermediate values.
6869//
6870// LOADauthptrstatic is undesirable because it requires a large section filled
6871// with often similarly-signed pointers, making it a good harvesting target.
6872// Thus, it's only used for ptrauth references to extern_weak to avoid null
6873// checks.
6874
6875bool AArch64InstructionSelector::selectPtrAuthGlobalValue(
6876 MachineInstr &I, MachineRegisterInfo &MRI) const {
6877 Register DefReg = I.getOperand(i: 0).getReg();
6878 Register Addr = I.getOperand(i: 1).getReg();
6879 uint64_t Key = I.getOperand(i: 2).getImm();
6880 Register AddrDisc = I.getOperand(i: 3).getReg();
6881 uint64_t Disc = I.getOperand(i: 4).getImm();
6882 int64_t Offset = 0;
6883
6884 if (Key > AArch64PACKey::LAST)
6885 report_fatal_error(reason: "key in ptrauth global out of range [0, " +
6886 Twine((int)AArch64PACKey::LAST) + "]");
6887
6888 // Blend only works if the integer discriminator is 16-bit wide.
6889 if (!isUInt<16>(x: Disc))
6890 report_fatal_error(
6891 reason: "constant discriminator in ptrauth global out of range [0, 0xffff]");
6892
6893 // Choosing between 3 lowering alternatives is target-specific.
6894 if (!STI.isTargetELF() && !STI.isTargetMachO())
6895 report_fatal_error(reason: "ptrauth global lowering only supported on MachO/ELF");
6896
6897 if (!MRI.hasOneDef(RegNo: Addr))
6898 return false;
6899
6900 // First match any offset we take from the real global.
6901 const MachineInstr *DefMI = &*MRI.def_instr_begin(RegNo: Addr);
6902 if (DefMI->getOpcode() == TargetOpcode::G_PTR_ADD) {
6903 Register OffsetReg = DefMI->getOperand(i: 2).getReg();
6904 if (!MRI.hasOneDef(RegNo: OffsetReg))
6905 return false;
6906 const MachineInstr &OffsetMI = *MRI.def_instr_begin(RegNo: OffsetReg);
6907 if (OffsetMI.getOpcode() != TargetOpcode::G_CONSTANT)
6908 return false;
6909
6910 Addr = DefMI->getOperand(i: 1).getReg();
6911 if (!MRI.hasOneDef(RegNo: Addr))
6912 return false;
6913
6914 DefMI = &*MRI.def_instr_begin(RegNo: Addr);
6915 Offset = OffsetMI.getOperand(i: 1).getCImm()->getSExtValue();
6916 }
6917
6918 // We should be left with a genuine unauthenticated GlobalValue.
6919 const GlobalValue *GV;
6920 if (DefMI->getOpcode() == TargetOpcode::G_GLOBAL_VALUE) {
6921 GV = DefMI->getOperand(i: 1).getGlobal();
6922 Offset += DefMI->getOperand(i: 1).getOffset();
6923 } else if (DefMI->getOpcode() == AArch64::G_ADD_LOW) {
6924 GV = DefMI->getOperand(i: 2).getGlobal();
6925 Offset += DefMI->getOperand(i: 2).getOffset();
6926 } else {
6927 return false;
6928 }
6929
6930 MachineIRBuilder MIB(I);
6931
6932 // Classify the reference to determine whether it needs a GOT load.
6933 unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM);
6934 const bool NeedsGOTLoad = ((OpFlags & AArch64II::MO_GOT) != 0);
6935 assert(((OpFlags & (~AArch64II::MO_GOT)) == 0) &&
6936 "unsupported non-GOT op flags on ptrauth global reference");
6937 assert((!GV->hasExternalWeakLinkage() || NeedsGOTLoad) &&
6938 "unsupported non-GOT reference to weak ptrauth global");
6939
6940 std::optional<APInt> AddrDiscVal = getIConstantVRegVal(VReg: AddrDisc, MRI);
6941 bool HasAddrDisc = !AddrDiscVal || *AddrDiscVal != 0;
6942
6943 // Non-extern_weak:
6944 // - No GOT load needed -> MOVaddrPAC
6945 // - GOT load for non-extern_weak -> LOADgotPAC
6946 // Note that we disallow extern_weak refs to avoid null checks later.
6947 if (!GV->hasExternalWeakLinkage()) {
6948 MIB.buildInstr(Opc: TargetOpcode::IMPLICIT_DEF, DstOps: {AArch64::X16}, SrcOps: {});
6949 MIB.buildInstr(Opc: TargetOpcode::IMPLICIT_DEF, DstOps: {AArch64::X17}, SrcOps: {});
6950 MIB.buildInstr(Opcode: NeedsGOTLoad ? AArch64::LOADgotPAC : AArch64::MOVaddrPAC)
6951 .addGlobalAddress(GV, Offset)
6952 .addImm(Val: Key)
6953 .addReg(RegNo: HasAddrDisc ? AddrDisc : AArch64::XZR)
6954 .addImm(Val: Disc)
6955 .constrainAllUses(TII, TRI, RBI);
6956 MIB.buildCopy(Res: DefReg, Op: Register(AArch64::X16));
6957 RBI.constrainGenericRegister(Reg: DefReg, RC: AArch64::GPR64RegClass, MRI);
6958 I.eraseFromParent();
6959 return true;
6960 }
6961
6962 // extern_weak -> LOADauthptrstatic
6963
6964 // Offsets and extern_weak don't mix well: ptrauth aside, you'd get the
6965 // offset alone as a pointer if the symbol wasn't available, which would
6966 // probably break null checks in users. Ptrauth complicates things further:
6967 // error out.
6968 if (Offset != 0)
6969 report_fatal_error(
6970 reason: "unsupported non-zero offset in weak ptrauth global reference");
6971
6972 if (HasAddrDisc)
6973 report_fatal_error(reason: "unsupported weak addr-div ptrauth global");
6974
6975 MIB.buildInstr(Opc: AArch64::LOADauthptrstatic, DstOps: {DefReg}, SrcOps: {})
6976 .addGlobalAddress(GV, Offset)
6977 .addImm(Val: Key)
6978 .addImm(Val: Disc);
6979 RBI.constrainGenericRegister(Reg: DefReg, RC: AArch64::GPR64RegClass, MRI);
6980
6981 I.eraseFromParent();
6982 return true;
6983}
6984
6985void AArch64InstructionSelector::SelectTable(MachineInstr &I,
6986 MachineRegisterInfo &MRI,
6987 unsigned NumVec, unsigned Opc1,
6988 unsigned Opc2, bool isExt) {
6989 Register DstReg = I.getOperand(i: 0).getReg();
6990 unsigned Opc = MRI.getType(Reg: DstReg) == LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 8) ? Opc1 : Opc2;
6991
6992 // Create the REG_SEQUENCE
6993 SmallVector<Register, 4> Regs;
6994 for (unsigned i = 0; i < NumVec; i++)
6995 Regs.push_back(Elt: I.getOperand(i: i + 2 + isExt).getReg());
6996 Register RegSeq = createQTuple(Regs, MIB);
6997
6998 Register IdxReg = I.getOperand(i: 2 + NumVec + isExt).getReg();
6999 MachineInstrBuilder Instr;
7000 if (isExt) {
7001 Register Reg = I.getOperand(i: 2).getReg();
7002 Instr = MIB.buildInstr(Opc, DstOps: {DstReg}, SrcOps: {Reg, RegSeq, IdxReg});
7003 } else
7004 Instr = MIB.buildInstr(Opc, DstOps: {DstReg}, SrcOps: {RegSeq, IdxReg});
7005 constrainSelectedInstRegOperands(I&: *Instr, TII, TRI, RBI);
7006 I.eraseFromParent();
7007}
7008
7009InstructionSelector::ComplexRendererFns
7010AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const {
7011 auto MaybeImmed = getImmedFromMO(Root);
7012 if (MaybeImmed == std::nullopt || *MaybeImmed > 31)
7013 return std::nullopt;
7014 uint64_t Enc = (32 - *MaybeImmed) & 0x1f;
7015 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Val: Enc); }}};
7016}
7017
7018InstructionSelector::ComplexRendererFns
7019AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const {
7020 auto MaybeImmed = getImmedFromMO(Root);
7021 if (MaybeImmed == std::nullopt || *MaybeImmed > 31)
7022 return std::nullopt;
7023 uint64_t Enc = 31 - *MaybeImmed;
7024 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Val: Enc); }}};
7025}
7026
7027InstructionSelector::ComplexRendererFns
7028AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const {
7029 auto MaybeImmed = getImmedFromMO(Root);
7030 if (MaybeImmed == std::nullopt || *MaybeImmed > 63)
7031 return std::nullopt;
7032 uint64_t Enc = (64 - *MaybeImmed) & 0x3f;
7033 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Val: Enc); }}};
7034}
7035
7036InstructionSelector::ComplexRendererFns
7037AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const {
7038 auto MaybeImmed = getImmedFromMO(Root);
7039 if (MaybeImmed == std::nullopt || *MaybeImmed > 63)
7040 return std::nullopt;
7041 uint64_t Enc = 63 - *MaybeImmed;
7042 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Val: Enc); }}};
7043}
7044
7045/// Helper to select an immediate value that can be represented as a 12-bit
7046/// value shifted left by either 0 or 12. If it is possible to do so, return
7047/// the immediate and shift value. If not, return std::nullopt.
7048///
7049/// Used by selectArithImmed and selectNegArithImmed.
7050InstructionSelector::ComplexRendererFns
7051AArch64InstructionSelector::select12BitValueWithLeftShift(
7052 uint64_t Immed) const {
7053 unsigned ShiftAmt;
7054 if (Immed >> 12 == 0) {
7055 ShiftAmt = 0;
7056 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
7057 ShiftAmt = 12;
7058 Immed = Immed >> 12;
7059 } else
7060 return std::nullopt;
7061
7062 unsigned ShVal = AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: ShiftAmt);
7063 return {{
7064 [=](MachineInstrBuilder &MIB) { MIB.addImm(Val: Immed); },
7065 [=](MachineInstrBuilder &MIB) { MIB.addImm(Val: ShVal); },
7066 }};
7067}
7068
7069/// SelectArithImmed - Select an immediate value that can be represented as
7070/// a 12-bit value shifted left by either 0 or 12. If so, return true with
7071/// Val set to the 12-bit value and Shift set to the shifter operand.
7072InstructionSelector::ComplexRendererFns
7073AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
7074 // This function is called from the addsub_shifted_imm ComplexPattern,
7075 // which lists [imm] as the list of opcode it's interested in, however
7076 // we still need to check whether the operand is actually an immediate
7077 // here because the ComplexPattern opcode list is only used in
7078 // root-level opcode matching.
7079 auto MaybeImmed = getImmedFromMO(Root);
7080 if (MaybeImmed == std::nullopt)
7081 return std::nullopt;
7082 return select12BitValueWithLeftShift(Immed: *MaybeImmed);
7083}
7084
7085/// SelectNegArithImmed - As above, but negates the value before trying to
7086/// select it.
7087InstructionSelector::ComplexRendererFns
7088AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const {
7089 // We need a register here, because we need to know if we have a 64 or 32
7090 // bit immediate.
7091 if (!Root.isReg())
7092 return std::nullopt;
7093 auto MaybeImmed = getImmedFromMO(Root);
7094 if (MaybeImmed == std::nullopt)
7095 return std::nullopt;
7096 uint64_t Immed = *MaybeImmed;
7097
7098 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
7099 // have the opposite effect on the C flag, so this pattern mustn't match under
7100 // those circumstances.
7101 if (Immed == 0)
7102 return std::nullopt;
7103
7104 // Check if we're dealing with a 32-bit type on the root or a 64-bit type on
7105 // the root.
7106 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7107 if (MRI.getType(Reg: Root.getReg()).getSizeInBits() == 32)
7108 Immed = ~((uint32_t)Immed) + 1;
7109 else
7110 Immed = ~Immed + 1ULL;
7111
7112 if (Immed & 0xFFFFFFFFFF000000ULL)
7113 return std::nullopt;
7114
7115 Immed &= 0xFFFFFFULL;
7116 return select12BitValueWithLeftShift(Immed);
7117}
7118
7119/// Checks if we are sure that folding MI into load/store addressing mode is
7120/// beneficial or not.
7121///
7122/// Returns:
7123/// - true if folding MI would be beneficial.
7124/// - false if folding MI would be bad.
7125/// - std::nullopt if it is not sure whether folding MI is beneficial.
7126///
7127/// \p MI can be the offset operand of G_PTR_ADD, e.g. G_SHL in the example:
7128///
7129/// %13:gpr(s64) = G_CONSTANT i64 1
7130/// %8:gpr(s64) = G_SHL %6, %13(s64)
7131/// %9:gpr(p0) = G_PTR_ADD %0, %8(s64)
7132/// %12:gpr(s32) = G_LOAD %9(p0) :: (load (s16))
7133std::optional<bool> AArch64InstructionSelector::isWorthFoldingIntoAddrMode(
7134 const MachineInstr &MI, const MachineRegisterInfo &MRI) const {
7135 if (MI.getOpcode() == AArch64::G_SHL) {
7136 // Address operands with shifts are free, except for running on subtargets
7137 // with AddrLSLSlow14.
7138 if (const auto ValAndVeg = getIConstantVRegValWithLookThrough(
7139 VReg: MI.getOperand(i: 2).getReg(), MRI)) {
7140 const APInt ShiftVal = ValAndVeg->Value;
7141
7142 // Don't fold if we know this will be slow.
7143 return !(STI.hasAddrLSLSlow14() && (ShiftVal == 1 || ShiftVal == 4));
7144 }
7145 }
7146 return std::nullopt;
7147}
7148
7149/// Return true if it is worth folding MI into an extended register. That is,
7150/// if it's safe to pull it into the addressing mode of a load or store as a
7151/// shift.
7152/// \p IsAddrOperand whether the def of MI is used as an address operand
7153/// (e.g. feeding into an LDR/STR).
7154bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
7155 const MachineInstr &MI, const MachineRegisterInfo &MRI,
7156 bool IsAddrOperand) const {
7157
7158 // Always fold if there is one use, or if we're optimizing for size.
7159 Register DefReg = MI.getOperand(i: 0).getReg();
7160 if (MRI.hasOneNonDBGUse(RegNo: DefReg) ||
7161 MI.getParent()->getParent()->getFunction().hasOptSize())
7162 return true;
7163
7164 if (IsAddrOperand) {
7165 // If we are already sure that folding MI is good or bad, return the result.
7166 if (const auto Worth = isWorthFoldingIntoAddrMode(MI, MRI))
7167 return *Worth;
7168
7169 // Fold G_PTR_ADD if its offset operand can be folded
7170 if (MI.getOpcode() == AArch64::G_PTR_ADD) {
7171 MachineInstr *OffsetInst =
7172 getDefIgnoringCopies(Reg: MI.getOperand(i: 2).getReg(), MRI);
7173
7174 // Note, we already know G_PTR_ADD is used by at least two instructions.
7175 // If we are also sure about whether folding is beneficial or not,
7176 // return the result.
7177 if (const auto Worth = isWorthFoldingIntoAddrMode(MI: *OffsetInst, MRI))
7178 return *Worth;
7179 }
7180 }
7181
7182 // FIXME: Consider checking HasALULSLFast as appropriate.
7183
7184 // We have a fastpath, so folding a shift in and potentially computing it
7185 // many times may be beneficial. Check if this is only used in memory ops.
7186 // If it is, then we should fold.
7187 return all_of(Range: MRI.use_nodbg_instructions(Reg: DefReg),
7188 P: [](MachineInstr &Use) { return Use.mayLoadOrStore(); });
7189}
7190
7191static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type) {
7192 switch (Type) {
7193 case AArch64_AM::SXTB:
7194 case AArch64_AM::SXTH:
7195 case AArch64_AM::SXTW:
7196 return true;
7197 default:
7198 return false;
7199 }
7200}
7201
7202InstructionSelector::ComplexRendererFns
7203AArch64InstructionSelector::selectExtendedSHL(
7204 MachineOperand &Root, MachineOperand &Base, MachineOperand &Offset,
7205 unsigned SizeInBytes, bool WantsExt) const {
7206 assert(Base.isReg() && "Expected base to be a register operand");
7207 assert(Offset.isReg() && "Expected offset to be a register operand");
7208
7209 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7210 MachineInstr *OffsetInst = MRI.getVRegDef(Reg: Offset.getReg());
7211
7212 unsigned OffsetOpc = OffsetInst->getOpcode();
7213 bool LookedThroughZExt = false;
7214 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) {
7215 // Try to look through a ZEXT.
7216 if (OffsetOpc != TargetOpcode::G_ZEXT || !WantsExt)
7217 return std::nullopt;
7218
7219 OffsetInst = MRI.getVRegDef(Reg: OffsetInst->getOperand(i: 1).getReg());
7220 OffsetOpc = OffsetInst->getOpcode();
7221 LookedThroughZExt = true;
7222
7223 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
7224 return std::nullopt;
7225 }
7226 // Make sure that the memory op is a valid size.
7227 int64_t LegalShiftVal = Log2_32(Value: SizeInBytes);
7228 if (LegalShiftVal == 0)
7229 return std::nullopt;
7230 if (!isWorthFoldingIntoExtendedReg(MI: *OffsetInst, MRI, IsAddrOperand: true))
7231 return std::nullopt;
7232
7233 // Now, try to find the specific G_CONSTANT. Start by assuming that the
7234 // register we will offset is the LHS, and the register containing the
7235 // constant is the RHS.
7236 Register OffsetReg = OffsetInst->getOperand(i: 1).getReg();
7237 Register ConstantReg = OffsetInst->getOperand(i: 2).getReg();
7238 auto ValAndVReg = getIConstantVRegValWithLookThrough(VReg: ConstantReg, MRI);
7239 if (!ValAndVReg) {
7240 // We didn't get a constant on the RHS. If the opcode is a shift, then
7241 // we're done.
7242 if (OffsetOpc == TargetOpcode::G_SHL)
7243 return std::nullopt;
7244
7245 // If we have a G_MUL, we can use either register. Try looking at the RHS.
7246 std::swap(a&: OffsetReg, b&: ConstantReg);
7247 ValAndVReg = getIConstantVRegValWithLookThrough(VReg: ConstantReg, MRI);
7248 if (!ValAndVReg)
7249 return std::nullopt;
7250 }
7251
7252 // The value must fit into 3 bits, and must be positive. Make sure that is
7253 // true.
7254 int64_t ImmVal = ValAndVReg->Value.getSExtValue();
7255
7256 // Since we're going to pull this into a shift, the constant value must be
7257 // a power of 2. If we got a multiply, then we need to check this.
7258 if (OffsetOpc == TargetOpcode::G_MUL) {
7259 if (!llvm::has_single_bit<uint32_t>(Value: ImmVal))
7260 return std::nullopt;
7261
7262 // Got a power of 2. So, the amount we'll shift is the log base-2 of that.
7263 ImmVal = Log2_32(Value: ImmVal);
7264 }
7265
7266 if ((ImmVal & 0x7) != ImmVal)
7267 return std::nullopt;
7268
7269 // We are only allowed to shift by LegalShiftVal. This shift value is built
7270 // into the instruction, so we can't just use whatever we want.
7271 if (ImmVal != LegalShiftVal)
7272 return std::nullopt;
7273
7274 unsigned SignExtend = 0;
7275 if (WantsExt) {
7276 // Check if the offset is defined by an extend, unless we looked through a
7277 // G_ZEXT earlier.
7278 if (!LookedThroughZExt) {
7279 MachineInstr *ExtInst = getDefIgnoringCopies(Reg: OffsetReg, MRI);
7280 auto Ext = getExtendTypeForInst(MI&: *ExtInst, MRI, IsLoadStore: true);
7281 if (Ext == AArch64_AM::InvalidShiftExtend)
7282 return std::nullopt;
7283
7284 SignExtend = isSignExtendShiftType(Type: Ext) ? 1 : 0;
7285 // We only support SXTW for signed extension here.
7286 if (SignExtend && Ext != AArch64_AM::SXTW)
7287 return std::nullopt;
7288 OffsetReg = ExtInst->getOperand(i: 1).getReg();
7289 }
7290
7291 // Need a 32-bit wide register here.
7292 MachineIRBuilder MIB(*MRI.getVRegDef(Reg: Root.getReg()));
7293 OffsetReg = moveScalarRegClass(Reg: OffsetReg, RC: AArch64::GPR32RegClass, MIB);
7294 }
7295
7296 // We can use the LHS of the GEP as the base, and the LHS of the shift as an
7297 // offset. Signify that we are shifting by setting the shift flag to 1.
7298 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: Base.getReg()); },
7299 [=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: OffsetReg); },
7300 [=](MachineInstrBuilder &MIB) {
7301 // Need to add both immediates here to make sure that they are both
7302 // added to the instruction.
7303 MIB.addImm(Val: SignExtend);
7304 MIB.addImm(Val: 1);
7305 }}};
7306}
7307
7308/// This is used for computing addresses like this:
7309///
7310/// ldr x1, [x2, x3, lsl #3]
7311///
7312/// Where x2 is the base register, and x3 is an offset register. The shift-left
7313/// is a constant value specific to this load instruction. That is, we'll never
7314/// see anything other than a 3 here (which corresponds to the size of the
7315/// element being loaded.)
7316InstructionSelector::ComplexRendererFns
7317AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
7318 MachineOperand &Root, unsigned SizeInBytes) const {
7319 if (!Root.isReg())
7320 return std::nullopt;
7321 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7322
7323 // We want to find something like this:
7324 //
7325 // val = G_CONSTANT LegalShiftVal
7326 // shift = G_SHL off_reg val
7327 // ptr = G_PTR_ADD base_reg shift
7328 // x = G_LOAD ptr
7329 //
7330 // And fold it into this addressing mode:
7331 //
7332 // ldr x, [base_reg, off_reg, lsl #LegalShiftVal]
7333
7334 // Check if we can find the G_PTR_ADD.
7335 MachineInstr *PtrAdd =
7336 getOpcodeDef(Opcode: TargetOpcode::G_PTR_ADD, Reg: Root.getReg(), MRI);
7337 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(MI: *PtrAdd, MRI, IsAddrOperand: true))
7338 return std::nullopt;
7339
7340 // Now, try to match an opcode which will match our specific offset.
7341 // We want a G_SHL or a G_MUL.
7342 MachineInstr *OffsetInst =
7343 getDefIgnoringCopies(Reg: PtrAdd->getOperand(i: 2).getReg(), MRI);
7344 return selectExtendedSHL(Root, Base&: PtrAdd->getOperand(i: 1),
7345 Offset&: OffsetInst->getOperand(i: 0), SizeInBytes,
7346 /*WantsExt=*/false);
7347}
7348
7349/// This is used for computing addresses like this:
7350///
7351/// ldr x1, [x2, x3]
7352///
7353/// Where x2 is the base register, and x3 is an offset register.
7354///
7355/// When possible (or profitable) to fold a G_PTR_ADD into the address
7356/// calculation, this will do so. Otherwise, it will return std::nullopt.
7357InstructionSelector::ComplexRendererFns
7358AArch64InstructionSelector::selectAddrModeRegisterOffset(
7359 MachineOperand &Root) const {
7360 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7361
7362 // We need a GEP.
7363 MachineInstr *Gep = MRI.getVRegDef(Reg: Root.getReg());
7364 if (Gep->getOpcode() != TargetOpcode::G_PTR_ADD)
7365 return std::nullopt;
7366
7367 // If this is used more than once, let's not bother folding.
7368 // TODO: Check if they are memory ops. If they are, then we can still fold
7369 // without having to recompute anything.
7370 if (!MRI.hasOneNonDBGUse(RegNo: Gep->getOperand(i: 0).getReg()))
7371 return std::nullopt;
7372
7373 // Base is the GEP's LHS, offset is its RHS.
7374 return {{[=](MachineInstrBuilder &MIB) {
7375 MIB.addUse(RegNo: Gep->getOperand(i: 1).getReg());
7376 },
7377 [=](MachineInstrBuilder &MIB) {
7378 MIB.addUse(RegNo: Gep->getOperand(i: 2).getReg());
7379 },
7380 [=](MachineInstrBuilder &MIB) {
7381 // Need to add both immediates here to make sure that they are both
7382 // added to the instruction.
7383 MIB.addImm(Val: 0);
7384 MIB.addImm(Val: 0);
7385 }}};
7386}
7387
7388/// This is intended to be equivalent to selectAddrModeXRO in
7389/// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads.
7390InstructionSelector::ComplexRendererFns
7391AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
7392 unsigned SizeInBytes) const {
7393 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7394 if (!Root.isReg())
7395 return std::nullopt;
7396 MachineInstr *PtrAdd =
7397 getOpcodeDef(Opcode: TargetOpcode::G_PTR_ADD, Reg: Root.getReg(), MRI);
7398 if (!PtrAdd)
7399 return std::nullopt;
7400
7401 // Check for an immediates which cannot be encoded in the [base + imm]
7402 // addressing mode, and can't be encoded in an add/sub. If this happens, we'll
7403 // end up with code like:
7404 //
7405 // mov x0, wide
7406 // add x1 base, x0
7407 // ldr x2, [x1, x0]
7408 //
7409 // In this situation, we can use the [base, xreg] addressing mode to save an
7410 // add/sub:
7411 //
7412 // mov x0, wide
7413 // ldr x2, [base, x0]
7414 auto ValAndVReg =
7415 getIConstantVRegValWithLookThrough(VReg: PtrAdd->getOperand(i: 2).getReg(), MRI);
7416 if (ValAndVReg) {
7417 unsigned Scale = Log2_32(Value: SizeInBytes);
7418 int64_t ImmOff = ValAndVReg->Value.getSExtValue();
7419
7420 // Skip immediates that can be selected in the load/store addressing
7421 // mode.
7422 if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 &&
7423 ImmOff < (0x1000 << Scale))
7424 return std::nullopt;
7425
7426 // Helper lambda to decide whether or not it is preferable to emit an add.
7427 auto isPreferredADD = [](int64_t ImmOff) {
7428 // Constants in [0x0, 0xfff] can be encoded in an add.
7429 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
7430 return true;
7431
7432 // Can it be encoded in an add lsl #12?
7433 if ((ImmOff & 0xffffffffff000fffLL) != 0x0LL)
7434 return false;
7435
7436 // It can be encoded in an add lsl #12, but we may not want to. If it is
7437 // possible to select this as a single movz, then prefer that. A single
7438 // movz is faster than an add with a shift.
7439 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
7440 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
7441 };
7442
7443 // If the immediate can be encoded in a single add/sub, then bail out.
7444 if (isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
7445 return std::nullopt;
7446 }
7447
7448 // Try to fold shifts into the addressing mode.
7449 auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
7450 if (AddrModeFns)
7451 return AddrModeFns;
7452
7453 // If that doesn't work, see if it's possible to fold in registers from
7454 // a GEP.
7455 return selectAddrModeRegisterOffset(Root);
7456}
7457
7458/// This is used for computing addresses like this:
7459///
7460/// ldr x0, [xBase, wOffset, sxtw #LegalShiftVal]
7461///
7462/// Where we have a 64-bit base register, a 32-bit offset register, and an
7463/// extend (which may or may not be signed).
7464InstructionSelector::ComplexRendererFns
7465AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root,
7466 unsigned SizeInBytes) const {
7467 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7468
7469 MachineInstr *PtrAdd =
7470 getOpcodeDef(Opcode: TargetOpcode::G_PTR_ADD, Reg: Root.getReg(), MRI);
7471 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(MI: *PtrAdd, MRI, IsAddrOperand: true))
7472 return std::nullopt;
7473
7474 MachineOperand &LHS = PtrAdd->getOperand(i: 1);
7475 MachineOperand &RHS = PtrAdd->getOperand(i: 2);
7476 MachineInstr *OffsetInst = getDefIgnoringCopies(Reg: RHS.getReg(), MRI);
7477
7478 // The first case is the same as selectAddrModeXRO, except we need an extend.
7479 // In this case, we try to find a shift and extend, and fold them into the
7480 // addressing mode.
7481 //
7482 // E.g.
7483 //
7484 // off_reg = G_Z/S/ANYEXT ext_reg
7485 // val = G_CONSTANT LegalShiftVal
7486 // shift = G_SHL off_reg val
7487 // ptr = G_PTR_ADD base_reg shift
7488 // x = G_LOAD ptr
7489 //
7490 // In this case we can get a load like this:
7491 //
7492 // ldr x0, [base_reg, ext_reg, sxtw #LegalShiftVal]
7493 auto ExtendedShl = selectExtendedSHL(Root, Base&: LHS, Offset&: OffsetInst->getOperand(i: 0),
7494 SizeInBytes, /*WantsExt=*/true);
7495 if (ExtendedShl)
7496 return ExtendedShl;
7497
7498 // There was no shift. We can try and fold a G_Z/S/ANYEXT in alone though.
7499 //
7500 // e.g.
7501 // ldr something, [base_reg, ext_reg, sxtw]
7502 if (!isWorthFoldingIntoExtendedReg(MI: *OffsetInst, MRI, IsAddrOperand: true))
7503 return std::nullopt;
7504
7505 // Check if this is an extend. We'll get an extend type if it is.
7506 AArch64_AM::ShiftExtendType Ext =
7507 getExtendTypeForInst(MI&: *OffsetInst, MRI, /*IsLoadStore=*/true);
7508 if (Ext == AArch64_AM::InvalidShiftExtend)
7509 return std::nullopt;
7510
7511 // Need a 32-bit wide register.
7512 MachineIRBuilder MIB(*PtrAdd);
7513 Register ExtReg = moveScalarRegClass(Reg: OffsetInst->getOperand(i: 1).getReg(),
7514 RC: AArch64::GPR32RegClass, MIB);
7515 unsigned SignExtend = Ext == AArch64_AM::SXTW;
7516
7517 // Base is LHS, offset is ExtReg.
7518 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: LHS.getReg()); },
7519 [=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: ExtReg); },
7520 [=](MachineInstrBuilder &MIB) {
7521 MIB.addImm(Val: SignExtend);
7522 MIB.addImm(Val: 0);
7523 }}};
7524}
7525
7526/// Select a "register plus unscaled signed 9-bit immediate" address. This
7527/// should only match when there is an offset that is not valid for a scaled
7528/// immediate addressing mode. The "Size" argument is the size in bytes of the
7529/// memory reference, which is needed here to know what is valid for a scaled
7530/// immediate.
7531InstructionSelector::ComplexRendererFns
7532AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
7533 unsigned Size) const {
7534 MachineRegisterInfo &MRI =
7535 Root.getParent()->getParent()->getParent()->getRegInfo();
7536
7537 if (!Root.isReg())
7538 return std::nullopt;
7539
7540 if (!isBaseWithConstantOffset(Root, MRI))
7541 return std::nullopt;
7542
7543 MachineInstr *RootDef = MRI.getVRegDef(Reg: Root.getReg());
7544
7545 MachineOperand &OffImm = RootDef->getOperand(i: 2);
7546 if (!OffImm.isReg())
7547 return std::nullopt;
7548 MachineInstr *RHS = MRI.getVRegDef(Reg: OffImm.getReg());
7549 if (RHS->getOpcode() != TargetOpcode::G_CONSTANT)
7550 return std::nullopt;
7551 int64_t RHSC;
7552 MachineOperand &RHSOp1 = RHS->getOperand(i: 1);
7553 if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64)
7554 return std::nullopt;
7555 RHSC = RHSOp1.getCImm()->getSExtValue();
7556
7557 if (RHSC >= -256 && RHSC < 256) {
7558 MachineOperand &Base = RootDef->getOperand(i: 1);
7559 return {{
7560 [=](MachineInstrBuilder &MIB) { MIB.add(MO: Base); },
7561 [=](MachineInstrBuilder &MIB) { MIB.addImm(Val: RHSC); },
7562 }};
7563 }
7564 return std::nullopt;
7565}
7566
7567InstructionSelector::ComplexRendererFns
7568AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef,
7569 unsigned Size,
7570 MachineRegisterInfo &MRI) const {
7571 if (RootDef.getOpcode() != AArch64::G_ADD_LOW)
7572 return std::nullopt;
7573 MachineInstr &Adrp = *MRI.getVRegDef(Reg: RootDef.getOperand(i: 1).getReg());
7574 if (Adrp.getOpcode() != AArch64::ADRP)
7575 return std::nullopt;
7576
7577 // TODO: add heuristics like isWorthFoldingADDlow() from SelectionDAG.
7578 auto Offset = Adrp.getOperand(i: 1).getOffset();
7579 if (Offset % Size != 0)
7580 return std::nullopt;
7581
7582 auto GV = Adrp.getOperand(i: 1).getGlobal();
7583 if (GV->isThreadLocal())
7584 return std::nullopt;
7585
7586 auto &MF = *RootDef.getParent()->getParent();
7587 if (GV->getPointerAlignment(DL: MF.getDataLayout()) < Size)
7588 return std::nullopt;
7589
7590 unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM: MF.getTarget());
7591 MachineIRBuilder MIRBuilder(RootDef);
7592 Register AdrpReg = Adrp.getOperand(i: 0).getReg();
7593 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: AdrpReg); },
7594 [=](MachineInstrBuilder &MIB) {
7595 MIB.addGlobalAddress(GV, Offset,
7596 TargetFlags: OpFlags | AArch64II::MO_PAGEOFF |
7597 AArch64II::MO_NC);
7598 }}};
7599}
7600
7601/// Select a "register plus scaled unsigned 12-bit immediate" address. The
7602/// "Size" argument is the size in bytes of the memory reference, which
7603/// determines the scale.
7604InstructionSelector::ComplexRendererFns
7605AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
7606 unsigned Size) const {
7607 MachineFunction &MF = *Root.getParent()->getParent()->getParent();
7608 MachineRegisterInfo &MRI = MF.getRegInfo();
7609
7610 if (!Root.isReg())
7611 return std::nullopt;
7612
7613 MachineInstr *RootDef = MRI.getVRegDef(Reg: Root.getReg());
7614 if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
7615 return {{
7616 [=](MachineInstrBuilder &MIB) { MIB.add(MO: RootDef->getOperand(i: 1)); },
7617 [=](MachineInstrBuilder &MIB) { MIB.addImm(Val: 0); },
7618 }};
7619 }
7620
7621 CodeModel::Model CM = MF.getTarget().getCodeModel();
7622 // Check if we can fold in the ADD of small code model ADRP + ADD address.
7623 // HACK: ld64 on Darwin doesn't support relocations on PRFM, so we can't fold
7624 // globals into the offset.
7625 MachineInstr *RootParent = Root.getParent();
7626 if (CM == CodeModel::Small &&
7627 !(RootParent->getOpcode() == AArch64::G_AARCH64_PREFETCH &&
7628 STI.isTargetDarwin())) {
7629 auto OpFns = tryFoldAddLowIntoImm(RootDef&: *RootDef, Size, MRI);
7630 if (OpFns)
7631 return OpFns;
7632 }
7633
7634 if (isBaseWithConstantOffset(Root, MRI)) {
7635 MachineOperand &LHS = RootDef->getOperand(i: 1);
7636 MachineOperand &RHS = RootDef->getOperand(i: 2);
7637 MachineInstr *LHSDef = MRI.getVRegDef(Reg: LHS.getReg());
7638 MachineInstr *RHSDef = MRI.getVRegDef(Reg: RHS.getReg());
7639
7640 int64_t RHSC = (int64_t)RHSDef->getOperand(i: 1).getCImm()->getZExtValue();
7641 unsigned Scale = Log2_32(Value: Size);
7642 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
7643 if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
7644 return {{
7645 [=](MachineInstrBuilder &MIB) { MIB.add(MO: LHSDef->getOperand(i: 1)); },
7646 [=](MachineInstrBuilder &MIB) { MIB.addImm(Val: RHSC >> Scale); },
7647 }};
7648
7649 return {{
7650 [=](MachineInstrBuilder &MIB) { MIB.add(MO: LHS); },
7651 [=](MachineInstrBuilder &MIB) { MIB.addImm(Val: RHSC >> Scale); },
7652 }};
7653 }
7654 }
7655
7656 // Before falling back to our general case, check if the unscaled
7657 // instructions can handle this. If so, that's preferable.
7658 if (selectAddrModeUnscaled(Root, Size))
7659 return std::nullopt;
7660
7661 return {{
7662 [=](MachineInstrBuilder &MIB) { MIB.add(MO: Root); },
7663 [=](MachineInstrBuilder &MIB) { MIB.addImm(Val: 0); },
7664 }};
7665}
7666
7667/// Given a shift instruction, return the correct shift type for that
7668/// instruction.
7669static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI) {
7670 switch (MI.getOpcode()) {
7671 default:
7672 return AArch64_AM::InvalidShiftExtend;
7673 case TargetOpcode::G_SHL:
7674 return AArch64_AM::LSL;
7675 case TargetOpcode::G_LSHR:
7676 return AArch64_AM::LSR;
7677 case TargetOpcode::G_ASHR:
7678 return AArch64_AM::ASR;
7679 case TargetOpcode::G_ROTR:
7680 return AArch64_AM::ROR;
7681 }
7682}
7683
7684/// Select a "shifted register" operand. If the value is not shifted, set the
7685/// shift operand to a default value of "lsl 0".
7686InstructionSelector::ComplexRendererFns
7687AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root,
7688 bool AllowROR) const {
7689 if (!Root.isReg())
7690 return std::nullopt;
7691 MachineRegisterInfo &MRI =
7692 Root.getParent()->getParent()->getParent()->getRegInfo();
7693
7694 // Check if the operand is defined by an instruction which corresponds to
7695 // a ShiftExtendType. E.g. a G_SHL, G_LSHR, etc.
7696 MachineInstr *ShiftInst = MRI.getVRegDef(Reg: Root.getReg());
7697 AArch64_AM::ShiftExtendType ShType = getShiftTypeForInst(MI&: *ShiftInst);
7698 if (ShType == AArch64_AM::InvalidShiftExtend)
7699 return std::nullopt;
7700 if (ShType == AArch64_AM::ROR && !AllowROR)
7701 return std::nullopt;
7702 if (!isWorthFoldingIntoExtendedReg(MI: *ShiftInst, MRI, IsAddrOperand: false))
7703 return std::nullopt;
7704
7705 // Need an immediate on the RHS.
7706 MachineOperand &ShiftRHS = ShiftInst->getOperand(i: 2);
7707 auto Immed = getImmedFromMO(Root: ShiftRHS);
7708 if (!Immed)
7709 return std::nullopt;
7710
7711 // We have something that we can fold. Fold in the shift's LHS and RHS into
7712 // the instruction.
7713 MachineOperand &ShiftLHS = ShiftInst->getOperand(i: 1);
7714 Register ShiftReg = ShiftLHS.getReg();
7715
7716 unsigned NumBits = MRI.getType(Reg: ShiftReg).getSizeInBits();
7717 unsigned Val = *Immed & (NumBits - 1);
7718 unsigned ShiftVal = AArch64_AM::getShifterImm(ST: ShType, Imm: Val);
7719
7720 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: ShiftReg); },
7721 [=](MachineInstrBuilder &MIB) { MIB.addImm(Val: ShiftVal); }}};
7722}
7723
7724AArch64_AM::ShiftExtendType AArch64InstructionSelector::getExtendTypeForInst(
7725 MachineInstr &MI, MachineRegisterInfo &MRI, bool IsLoadStore) const {
7726 unsigned Opc = MI.getOpcode();
7727
7728 // Handle explicit extend instructions first.
7729 if (Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG) {
7730 unsigned Size;
7731 if (Opc == TargetOpcode::G_SEXT)
7732 Size = MRI.getType(Reg: MI.getOperand(i: 1).getReg()).getSizeInBits();
7733 else
7734 Size = MI.getOperand(i: 2).getImm();
7735 assert(Size != 64 && "Extend from 64 bits?");
7736 switch (Size) {
7737 case 8:
7738 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTB;
7739 case 16:
7740 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTH;
7741 case 32:
7742 return AArch64_AM::SXTW;
7743 default:
7744 return AArch64_AM::InvalidShiftExtend;
7745 }
7746 }
7747
7748 if (Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_ANYEXT) {
7749 unsigned Size = MRI.getType(Reg: MI.getOperand(i: 1).getReg()).getSizeInBits();
7750 assert(Size != 64 && "Extend from 64 bits?");
7751 switch (Size) {
7752 case 8:
7753 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTB;
7754 case 16:
7755 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTH;
7756 case 32:
7757 return AArch64_AM::UXTW;
7758 default:
7759 return AArch64_AM::InvalidShiftExtend;
7760 }
7761 }
7762
7763 // Don't have an explicit extend. Try to handle a G_AND with a constant mask
7764 // on the RHS.
7765 if (Opc != TargetOpcode::G_AND)
7766 return AArch64_AM::InvalidShiftExtend;
7767
7768 std::optional<uint64_t> MaybeAndMask = getImmedFromMO(Root: MI.getOperand(i: 2));
7769 if (!MaybeAndMask)
7770 return AArch64_AM::InvalidShiftExtend;
7771 uint64_t AndMask = *MaybeAndMask;
7772 switch (AndMask) {
7773 default:
7774 return AArch64_AM::InvalidShiftExtend;
7775 case 0xFF:
7776 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
7777 case 0xFFFF:
7778 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
7779 case 0xFFFFFFFF:
7780 return AArch64_AM::UXTW;
7781 }
7782}
7783
7784Register AArch64InstructionSelector::moveScalarRegClass(
7785 Register Reg, const TargetRegisterClass &RC, MachineIRBuilder &MIB) const {
7786 MachineRegisterInfo &MRI = *MIB.getMRI();
7787 auto Ty = MRI.getType(Reg);
7788 assert(!Ty.isVector() && "Expected scalars only!");
7789 if (Ty.getSizeInBits() == TRI.getRegSizeInBits(RC))
7790 return Reg;
7791
7792 // Create a copy and immediately select it.
7793 // FIXME: We should have an emitCopy function?
7794 auto Copy = MIB.buildCopy(Res: {&RC}, Op: {Reg});
7795 selectCopy(I&: *Copy, TII, MRI, TRI, RBI);
7796 return Copy.getReg(Idx: 0);
7797}
7798
7799/// Select an "extended register" operand. This operand folds in an extend
7800/// followed by an optional left shift.
7801InstructionSelector::ComplexRendererFns
7802AArch64InstructionSelector::selectArithExtendedRegister(
7803 MachineOperand &Root) const {
7804 if (!Root.isReg())
7805 return std::nullopt;
7806 MachineRegisterInfo &MRI =
7807 Root.getParent()->getParent()->getParent()->getRegInfo();
7808
7809 uint64_t ShiftVal = 0;
7810 Register ExtReg;
7811 AArch64_AM::ShiftExtendType Ext;
7812 MachineInstr *RootDef = getDefIgnoringCopies(Reg: Root.getReg(), MRI);
7813 if (!RootDef)
7814 return std::nullopt;
7815
7816 if (!isWorthFoldingIntoExtendedReg(MI: *RootDef, MRI, IsAddrOperand: false))
7817 return std::nullopt;
7818
7819 // Check if we can fold a shift and an extend.
7820 if (RootDef->getOpcode() == TargetOpcode::G_SHL) {
7821 // Look for a constant on the RHS of the shift.
7822 MachineOperand &RHS = RootDef->getOperand(i: 2);
7823 std::optional<uint64_t> MaybeShiftVal = getImmedFromMO(Root: RHS);
7824 if (!MaybeShiftVal)
7825 return std::nullopt;
7826 ShiftVal = *MaybeShiftVal;
7827 if (ShiftVal > 4)
7828 return std::nullopt;
7829 // Look for a valid extend instruction on the LHS of the shift.
7830 MachineOperand &LHS = RootDef->getOperand(i: 1);
7831 MachineInstr *ExtDef = getDefIgnoringCopies(Reg: LHS.getReg(), MRI);
7832 if (!ExtDef)
7833 return std::nullopt;
7834 Ext = getExtendTypeForInst(MI&: *ExtDef, MRI);
7835 if (Ext == AArch64_AM::InvalidShiftExtend)
7836 return std::nullopt;
7837 ExtReg = ExtDef->getOperand(i: 1).getReg();
7838 } else {
7839 // Didn't get a shift. Try just folding an extend.
7840 Ext = getExtendTypeForInst(MI&: *RootDef, MRI);
7841 if (Ext == AArch64_AM::InvalidShiftExtend)
7842 return std::nullopt;
7843 ExtReg = RootDef->getOperand(i: 1).getReg();
7844
7845 // If we have a 32 bit instruction which zeroes out the high half of a
7846 // register, we get an implicit zero extend for free. Check if we have one.
7847 // FIXME: We actually emit the extend right now even though we don't have
7848 // to.
7849 if (Ext == AArch64_AM::UXTW && MRI.getType(Reg: ExtReg).getSizeInBits() == 32) {
7850 MachineInstr *ExtInst = MRI.getVRegDef(Reg: ExtReg);
7851 if (isDef32(MI: *ExtInst))
7852 return std::nullopt;
7853 }
7854 }
7855
7856 // We require a GPR32 here. Narrow the ExtReg if needed using a subregister
7857 // copy.
7858 MachineIRBuilder MIB(*RootDef);
7859 ExtReg = moveScalarRegClass(Reg: ExtReg, RC: AArch64::GPR32RegClass, MIB);
7860
7861 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: ExtReg); },
7862 [=](MachineInstrBuilder &MIB) {
7863 MIB.addImm(Val: getArithExtendImm(ET: Ext, Imm: ShiftVal));
7864 }}};
7865}
7866
7867InstructionSelector::ComplexRendererFns
7868AArch64InstructionSelector::selectExtractHigh(MachineOperand &Root) const {
7869 if (!Root.isReg())
7870 return std::nullopt;
7871 MachineRegisterInfo &MRI =
7872 Root.getParent()->getParent()->getParent()->getRegInfo();
7873
7874 auto Extract = getDefSrcRegIgnoringCopies(Reg: Root.getReg(), MRI);
7875 while (Extract && Extract->MI->getOpcode() == TargetOpcode::G_BITCAST &&
7876 STI.isLittleEndian())
7877 Extract =
7878 getDefSrcRegIgnoringCopies(Reg: Extract->MI->getOperand(i: 1).getReg(), MRI);
7879 if (!Extract)
7880 return std::nullopt;
7881
7882 if (Extract->MI->getOpcode() == TargetOpcode::G_UNMERGE_VALUES) {
7883 if (Extract->Reg == Extract->MI->getOperand(i: 1).getReg()) {
7884 Register ExtReg = Extract->MI->getOperand(i: 2).getReg();
7885 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: ExtReg); }}};
7886 }
7887 }
7888 if (Extract->MI->getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT) {
7889 LLT SrcTy = MRI.getType(Reg: Extract->MI->getOperand(i: 1).getReg());
7890 auto LaneIdx = getIConstantVRegValWithLookThrough(
7891 VReg: Extract->MI->getOperand(i: 2).getReg(), MRI);
7892 if (LaneIdx && SrcTy == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 64) &&
7893 LaneIdx->Value.getSExtValue() == 1) {
7894 Register ExtReg = Extract->MI->getOperand(i: 1).getReg();
7895 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: ExtReg); }}};
7896 }
7897 }
7898
7899 return std::nullopt;
7900}
7901
7902InstructionSelector::ComplexRendererFns
7903AArch64InstructionSelector::selectCVTFixedPointVecBase(
7904 const MachineOperand &Root) const {
7905 if (!Root.isReg())
7906 return std::nullopt;
7907 const MachineRegisterInfo &MRI =
7908 Root.getParent()->getParent()->getParent()->getRegInfo();
7909
7910 MachineInstr *Dup = getDefIgnoringCopies(Reg: Root.getReg(), MRI);
7911 if (Dup->getOpcode() != AArch64::G_DUP)
7912 return std::nullopt;
7913 std::optional<ValueAndVReg> CstVal =
7914 getAnyConstantVRegValWithLookThrough(VReg: Dup->getOperand(i: 1).getReg(), MRI);
7915 if (!CstVal)
7916 return std::nullopt;
7917
7918 unsigned RegWidth = MRI.getType(Reg: Root.getReg()).getScalarSizeInBits();
7919 APFloat FVal(0.0);
7920 switch (RegWidth) {
7921 case 16:
7922 FVal = APFloat(APFloat::IEEEhalf(), CstVal->Value);
7923 break;
7924 case 32:
7925 FVal = APFloat(APFloat::IEEEsingle(), CstVal->Value);
7926 break;
7927 case 64:
7928 FVal = APFloat(APFloat::IEEEdouble(), CstVal->Value);
7929 break;
7930 default:
7931 return std::nullopt;
7932 };
7933 if (unsigned FBits = CheckFixedPointOperandConstant(FVal, RegWidth,
7934 /*isReciprocal*/ false))
7935 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Val: FBits); }}};
7936
7937 return std::nullopt;
7938}
7939
7940InstructionSelector::ComplexRendererFns
7941AArch64InstructionSelector::selectCVTFixedPointVec(MachineOperand &Root) const {
7942 return selectCVTFixedPointVecBase(Root);
7943}
7944
7945void AArch64InstructionSelector::renderFixedPointXForm(MachineInstrBuilder &MIB,
7946 const MachineInstr &MI,
7947 int OpIdx) const {
7948 // FIXME: This is only needed to satisfy the type checking in tablegen, and
7949 // should be able to reuse the Renderers already calculated by
7950 // selectCVTFixedPointVecBase.
7951 InstructionSelector::ComplexRendererFns Renderer =
7952 selectCVTFixedPointVecBase(Root: MI.getOperand(i: 2));
7953 assert((Renderer && Renderer->size() == 1) &&
7954 "Expected selectCVTFixedPointVec to provide a function\n");
7955 (Renderer->front())(MIB);
7956}
7957
7958void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
7959 const MachineInstr &MI,
7960 int OpIdx) const {
7961 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
7962 assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7963 "Expected G_CONSTANT");
7964 std::optional<int64_t> CstVal =
7965 getIConstantVRegSExtVal(VReg: MI.getOperand(i: 0).getReg(), MRI);
7966 assert(CstVal && "Expected constant value");
7967 MIB.addImm(Val: *CstVal);
7968}
7969
7970void AArch64InstructionSelector::renderLogicalImm32(
7971 MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
7972 assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7973 "Expected G_CONSTANT");
7974 uint64_t CstVal = I.getOperand(i: 1).getCImm()->getZExtValue();
7975 uint64_t Enc = AArch64_AM::encodeLogicalImmediate(imm: CstVal, regSize: 32);
7976 MIB.addImm(Val: Enc);
7977}
7978
7979void AArch64InstructionSelector::renderLogicalImm64(
7980 MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
7981 assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7982 "Expected G_CONSTANT");
7983 uint64_t CstVal = I.getOperand(i: 1).getCImm()->getZExtValue();
7984 uint64_t Enc = AArch64_AM::encodeLogicalImmediate(imm: CstVal, regSize: 64);
7985 MIB.addImm(Val: Enc);
7986}
7987
7988void AArch64InstructionSelector::renderUbsanTrap(MachineInstrBuilder &MIB,
7989 const MachineInstr &MI,
7990 int OpIdx) const {
7991 assert(MI.getOpcode() == TargetOpcode::G_UBSANTRAP && OpIdx == 0 &&
7992 "Expected G_UBSANTRAP");
7993 MIB.addImm(Val: MI.getOperand(i: 0).getImm() | ('U' << 8));
7994}
7995
7996void AArch64InstructionSelector::renderFPImm16(MachineInstrBuilder &MIB,
7997 const MachineInstr &MI,
7998 int OpIdx) const {
7999 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
8000 "Expected G_FCONSTANT");
8001 MIB.addImm(
8002 Val: AArch64_AM::getFP16Imm(FPImm: MI.getOperand(i: 1).getFPImm()->getValueAPF()));
8003}
8004
8005void AArch64InstructionSelector::renderFPImm32(MachineInstrBuilder &MIB,
8006 const MachineInstr &MI,
8007 int OpIdx) const {
8008 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
8009 "Expected G_FCONSTANT");
8010 MIB.addImm(
8011 Val: AArch64_AM::getFP32Imm(FPImm: MI.getOperand(i: 1).getFPImm()->getValueAPF()));
8012}
8013
8014void AArch64InstructionSelector::renderFPImm64(MachineInstrBuilder &MIB,
8015 const MachineInstr &MI,
8016 int OpIdx) const {
8017 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
8018 "Expected G_FCONSTANT");
8019 MIB.addImm(
8020 Val: AArch64_AM::getFP64Imm(FPImm: MI.getOperand(i: 1).getFPImm()->getValueAPF()));
8021}
8022
8023void AArch64InstructionSelector::renderFPImm32SIMDModImmType4(
8024 MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const {
8025 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
8026 "Expected G_FCONSTANT");
8027 MIB.addImm(Val: AArch64_AM::encodeAdvSIMDModImmType4(Imm: MI.getOperand(i: 1)
8028 .getFPImm()
8029 ->getValueAPF()
8030 .bitcastToAPInt()
8031 .getZExtValue()));
8032}
8033
8034bool AArch64InstructionSelector::isLoadStoreOfNumBytes(
8035 const MachineInstr &MI, unsigned NumBytes) const {
8036 if (!MI.mayLoadOrStore())
8037 return false;
8038 assert(MI.hasOneMemOperand() &&
8039 "Expected load/store to have only one mem op!");
8040 return (*MI.memoperands_begin())->getSize() == NumBytes;
8041}
8042
8043bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const {
8044 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
8045 if (MRI.getType(Reg: MI.getOperand(i: 0).getReg()).getSizeInBits() != 32)
8046 return false;
8047
8048 // Only return true if we know the operation will zero-out the high half of
8049 // the 64-bit register. Truncates can be subregister copies, which don't
8050 // zero out the high bits. Copies and other copy-like instructions can be
8051 // fed by truncates, or could be lowered as subregister copies.
8052 switch (MI.getOpcode()) {
8053 default:
8054 return true;
8055 case TargetOpcode::COPY:
8056 case TargetOpcode::G_BITCAST:
8057 case TargetOpcode::G_TRUNC:
8058 case TargetOpcode::G_PHI:
8059 return false;
8060 }
8061}
8062
8063
8064// Perform fixups on the given PHI instruction's operands to force them all
8065// to be the same as the destination regbank.
8066static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI,
8067 const AArch64RegisterBankInfo &RBI) {
8068 assert(MI.getOpcode() == TargetOpcode::G_PHI && "Expected a G_PHI");
8069 Register DstReg = MI.getOperand(i: 0).getReg();
8070 const RegisterBank *DstRB = MRI.getRegBankOrNull(Reg: DstReg);
8071 assert(DstRB && "Expected PHI dst to have regbank assigned");
8072 MachineIRBuilder MIB(MI);
8073
8074 // Go through each operand and ensure it has the same regbank.
8075 for (MachineOperand &MO : llvm::drop_begin(RangeOrContainer: MI.operands())) {
8076 if (!MO.isReg())
8077 continue;
8078 Register OpReg = MO.getReg();
8079 const RegisterBank *RB = MRI.getRegBankOrNull(Reg: OpReg);
8080 if (RB != DstRB) {
8081 // Insert a cross-bank copy.
8082 auto *OpDef = MRI.getVRegDef(Reg: OpReg);
8083 const LLT &Ty = MRI.getType(Reg: OpReg);
8084 MachineBasicBlock &OpDefBB = *OpDef->getParent();
8085
8086 // Any instruction we insert must appear after all PHIs in the block
8087 // for the block to be valid MIR.
8088 MachineBasicBlock::iterator InsertPt = std::next(x: OpDef->getIterator());
8089 if (InsertPt != OpDefBB.end() && InsertPt->isPHI())
8090 InsertPt = OpDefBB.getFirstNonPHI();
8091 MIB.setInsertPt(MBB&: *OpDef->getParent(), II: InsertPt);
8092 auto Copy = MIB.buildCopy(Res: Ty, Op: OpReg);
8093 MRI.setRegBank(Reg: Copy.getReg(Idx: 0), RegBank: *DstRB);
8094 MO.setReg(Copy.getReg(Idx: 0));
8095 }
8096 }
8097}
8098
8099void AArch64InstructionSelector::processPHIs(MachineFunction &MF) {
8100 // We're looking for PHIs, build a list so we don't invalidate iterators.
8101 MachineRegisterInfo &MRI = MF.getRegInfo();
8102 SmallVector<MachineInstr *, 32> Phis;
8103 for (auto &BB : MF) {
8104 for (auto &MI : BB) {
8105 if (MI.getOpcode() == TargetOpcode::G_PHI)
8106 Phis.emplace_back(Args: &MI);
8107 }
8108 }
8109
8110 for (auto *MI : Phis) {
8111 // We need to do some work here if the operand types are < 16 bit and they
8112 // are split across fpr/gpr banks. Since all types <32b on gpr
8113 // end up being assigned gpr32 regclasses, we can end up with PHIs here
8114 // which try to select between a gpr32 and an fpr16. Ideally RBS shouldn't
8115 // be selecting heterogenous regbanks for operands if possible, but we
8116 // still need to be able to deal with it here.
8117 //
8118 // To fix this, if we have a gpr-bank operand < 32b in size and at least
8119 // one other operand is on the fpr bank, then we add cross-bank copies
8120 // to homogenize the operand banks. For simplicity the bank that we choose
8121 // to settle on is whatever bank the def operand has. For example:
8122 //
8123 // %endbb:
8124 // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2:fpr(s16), %bb2
8125 // =>
8126 // %bb2:
8127 // ...
8128 // %in2_copy:gpr(s16) = COPY %in2:fpr(s16)
8129 // ...
8130 // %endbb:
8131 // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2_copy:gpr(s16), %bb2
8132 bool HasGPROp = false, HasFPROp = false;
8133 for (const MachineOperand &MO : llvm::drop_begin(RangeOrContainer: MI->operands())) {
8134 if (!MO.isReg())
8135 continue;
8136 const LLT &Ty = MRI.getType(Reg: MO.getReg());
8137 if (!Ty.isValid() || !Ty.isScalar())
8138 break;
8139 if (Ty.getSizeInBits() >= 32)
8140 break;
8141 const RegisterBank *RB = MRI.getRegBankOrNull(Reg: MO.getReg());
8142 // If for some reason we don't have a regbank yet. Don't try anything.
8143 if (!RB)
8144 break;
8145
8146 if (RB->getID() == AArch64::GPRRegBankID)
8147 HasGPROp = true;
8148 else
8149 HasFPROp = true;
8150 }
8151 // We have heterogenous regbanks, need to fixup.
8152 if (HasGPROp && HasFPROp)
8153 fixupPHIOpBanks(MI&: *MI, MRI, RBI);
8154 }
8155}
8156
8157namespace llvm {
8158InstructionSelector *
8159createAArch64InstructionSelector(const AArch64TargetMachine &TM,
8160 const AArch64Subtarget &Subtarget,
8161 const AArch64RegisterBankInfo &RBI) {
8162 return new AArch64InstructionSelector(TM, Subtarget, RBI);
8163}
8164}
8165