1//===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the InstructionSelector class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
14#include "AArch64GlobalISelUtils.h"
15#include "AArch64InstrInfo.h"
16#include "AArch64MachineFunctionInfo.h"
17#include "AArch64RegisterBankInfo.h"
18#include "AArch64RegisterInfo.h"
19#include "AArch64Subtarget.h"
20#include "AArch64TargetMachine.h"
21#include "MCTargetDesc/AArch64AddressingModes.h"
22#include "MCTargetDesc/AArch64MCTargetDesc.h"
23#include "llvm/BinaryFormat/Dwarf.h"
24#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
25#include "llvm/CodeGen/GlobalISel/GISelValueTracking.h"
26#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
27#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
28#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
29#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
30#include "llvm/CodeGen/GlobalISel/Utils.h"
31#include "llvm/CodeGen/MachineBasicBlock.h"
32#include "llvm/CodeGen/MachineConstantPool.h"
33#include "llvm/CodeGen/MachineFrameInfo.h"
34#include "llvm/CodeGen/MachineFunction.h"
35#include "llvm/CodeGen/MachineInstr.h"
36#include "llvm/CodeGen/MachineInstrBuilder.h"
37#include "llvm/CodeGen/MachineMemOperand.h"
38#include "llvm/CodeGen/MachineOperand.h"
39#include "llvm/CodeGen/MachineRegisterInfo.h"
40#include "llvm/CodeGen/TargetOpcodes.h"
41#include "llvm/CodeGen/TargetRegisterInfo.h"
42#include "llvm/IR/Constants.h"
43#include "llvm/IR/DerivedTypes.h"
44#include "llvm/IR/Instructions.h"
45#include "llvm/IR/IntrinsicsAArch64.h"
46#include "llvm/IR/Type.h"
47#include "llvm/Pass.h"
48#include "llvm/Support/Debug.h"
49#include "llvm/Support/raw_ostream.h"
50#include <optional>
51
52#define DEBUG_TYPE "aarch64-isel"
53
54using namespace llvm;
55using namespace MIPatternMatch;
56using namespace AArch64GISelUtils;
57
58namespace llvm {
59class BlockFrequencyInfo;
60class ProfileSummaryInfo;
61}
62
63namespace {
64
65#define GET_GLOBALISEL_PREDICATE_BITSET
66#include "AArch64GenGlobalISel.inc"
67#undef GET_GLOBALISEL_PREDICATE_BITSET
68
69
70class AArch64InstructionSelector : public InstructionSelector {
71public:
72 AArch64InstructionSelector(const AArch64TargetMachine &TM,
73 const AArch64Subtarget &STI,
74 const AArch64RegisterBankInfo &RBI);
75
76 bool select(MachineInstr &I) override;
77 static const char *getName() { return DEBUG_TYPE; }
78
79 void setupMF(MachineFunction &MF, GISelValueTracking *VT,
80 CodeGenCoverage *CoverageInfo, ProfileSummaryInfo *PSI,
81 BlockFrequencyInfo *BFI) override {
82 InstructionSelector::setupMF(mf&: MF, vt: VT, covinfo: CoverageInfo, psi: PSI, bfi: BFI);
83 MIB.setMF(MF);
84
85 // hasFnAttribute() is expensive to call on every BRCOND selection, so
86 // cache it here for each run of the selector.
87 ProduceNonFlagSettingCondBr =
88 !MF.getFunction().hasFnAttribute(Kind: Attribute::SpeculativeLoadHardening);
89 MFReturnAddr = Register();
90
91 processPHIs(MF);
92 }
93
94private:
95 /// tblgen-erated 'select' implementation, used as the initial selector for
96 /// the patterns that don't require complex C++.
97 bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
98
99 // A lowering phase that runs before any selection attempts.
100 // Returns true if the instruction was modified.
101 bool preISelLower(MachineInstr &I);
102
103 // An early selection function that runs before the selectImpl() call.
104 bool earlySelect(MachineInstr &I);
105
106 /// Save state that is shared between select calls, call select on \p I and
107 /// then restore the saved state. This can be used to recursively call select
108 /// within a select call.
109 bool selectAndRestoreState(MachineInstr &I);
110
111 // Do some preprocessing of G_PHIs before we begin selection.
112 void processPHIs(MachineFunction &MF);
113
114 bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI);
115
116 /// Eliminate same-sized cross-bank copies into stores before selectImpl().
117 bool contractCrossBankCopyIntoStore(MachineInstr &I,
118 MachineRegisterInfo &MRI);
119
120 bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI);
121
122 bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
123 MachineRegisterInfo &MRI) const;
124 bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
125 MachineRegisterInfo &MRI) const;
126
127 ///@{
128 /// Helper functions for selectCompareBranch.
129 bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp,
130 MachineIRBuilder &MIB) const;
131 bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
132 MachineIRBuilder &MIB) const;
133 bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
134 MachineIRBuilder &MIB) const;
135 bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert,
136 MachineBasicBlock *DstMBB,
137 MachineIRBuilder &MIB) const;
138 ///@}
139
140 bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
141 MachineRegisterInfo &MRI);
142
143 bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI);
144 bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI);
145
146 // Helper to generate an equivalent of scalar_to_vector into a new register,
147 // returned via 'Dst'.
148 MachineInstr *emitScalarToVector(unsigned EltSize,
149 const TargetRegisterClass *DstRC,
150 Register Scalar,
151 MachineIRBuilder &MIRBuilder) const;
152 /// Helper to narrow vector that was widened by emitScalarToVector.
153 /// Copy lowest part of 128-bit or 64-bit vector to 64-bit or 32-bit
154 /// vector, correspondingly.
155 MachineInstr *emitNarrowVector(Register DstReg, Register SrcReg,
156 MachineIRBuilder &MIRBuilder,
157 MachineRegisterInfo &MRI) const;
158
159 /// Emit a lane insert into \p DstReg, or a new vector register if
160 /// std::nullopt is provided.
161 ///
162 /// The lane inserted into is defined by \p LaneIdx. The vector source
163 /// register is given by \p SrcReg. The register containing the element is
164 /// given by \p EltReg.
165 MachineInstr *emitLaneInsert(std::optional<Register> DstReg, Register SrcReg,
166 Register EltReg, unsigned LaneIdx,
167 const RegisterBank &RB,
168 MachineIRBuilder &MIRBuilder) const;
169
170 /// Emit a sequence of instructions representing a constant \p CV for a
171 /// vector register \p Dst. (E.g. a MOV, or a load from a constant pool.)
172 ///
173 /// \returns the last instruction in the sequence on success, and nullptr
174 /// otherwise.
175 MachineInstr *emitConstantVector(Register Dst, Constant *CV,
176 MachineIRBuilder &MIRBuilder,
177 MachineRegisterInfo &MRI);
178
179 MachineInstr *tryAdvSIMDModImm8(Register Dst, unsigned DstSize, APInt Bits,
180 MachineIRBuilder &MIRBuilder);
181
182 MachineInstr *tryAdvSIMDModImm16(Register Dst, unsigned DstSize, APInt Bits,
183 MachineIRBuilder &MIRBuilder, bool Inv);
184
185 MachineInstr *tryAdvSIMDModImm32(Register Dst, unsigned DstSize, APInt Bits,
186 MachineIRBuilder &MIRBuilder, bool Inv);
187 MachineInstr *tryAdvSIMDModImm64(Register Dst, unsigned DstSize, APInt Bits,
188 MachineIRBuilder &MIRBuilder);
189 MachineInstr *tryAdvSIMDModImm321s(Register Dst, unsigned DstSize, APInt Bits,
190 MachineIRBuilder &MIRBuilder, bool Inv);
191 MachineInstr *tryAdvSIMDModImmFP(Register Dst, unsigned DstSize, APInt Bits,
192 MachineIRBuilder &MIRBuilder);
193
194 bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
195 MachineRegisterInfo &MRI);
196 /// \returns true if a G_BUILD_VECTOR instruction \p MI can be selected as a
197 /// SUBREG_TO_REG.
198 bool tryOptBuildVecToSubregToReg(MachineInstr &MI, MachineRegisterInfo &MRI);
199 bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI);
200 bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI);
201 bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI);
202
203 bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI);
204 bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI);
205 bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI);
206 bool selectSplitVectorUnmerge(MachineInstr &I, MachineRegisterInfo &MRI);
207
208 /// Helper function to select vector load intrinsics like
209 /// @llvm.aarch64.neon.ld2.*, @llvm.aarch64.neon.ld4.*, etc.
210 /// \p Opc is the opcode that the selected instruction should use.
211 /// \p NumVecs is the number of vector destinations for the instruction.
212 /// \p I is the original G_INTRINSIC_W_SIDE_EFFECTS instruction.
213 bool selectVectorLoadIntrinsic(unsigned Opc, unsigned NumVecs,
214 MachineInstr &I);
215 bool selectVectorLoadLaneIntrinsic(unsigned Opc, unsigned NumVecs,
216 MachineInstr &I);
217 void selectVectorStoreIntrinsic(MachineInstr &I, unsigned NumVecs,
218 unsigned Opc);
219 bool selectVectorStoreLaneIntrinsic(MachineInstr &I, unsigned NumVecs,
220 unsigned Opc);
221 bool selectIntrinsicWithSideEffects(MachineInstr &I,
222 MachineRegisterInfo &MRI);
223 bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI);
224 bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI);
225 bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI);
226 bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI);
227 bool selectPtrAuthGlobalValue(MachineInstr &I,
228 MachineRegisterInfo &MRI) const;
229 bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI);
230 bool selectMOPS(MachineInstr &I, MachineRegisterInfo &MRI);
231 bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI);
232 void SelectTable(MachineInstr &I, MachineRegisterInfo &MRI, unsigned NumVecs,
233 unsigned Opc1, unsigned Opc2, bool isExt);
234
235 bool selectIndexedExtLoad(MachineInstr &I, MachineRegisterInfo &MRI);
236 bool selectIndexedLoad(MachineInstr &I, MachineRegisterInfo &MRI);
237 bool selectIndexedStore(GIndexedStore &I, MachineRegisterInfo &MRI);
238
239 unsigned emitConstantPoolEntry(const Constant *CPVal,
240 MachineFunction &MF) const;
241 MachineInstr *emitLoadFromConstantPool(const Constant *CPVal,
242 MachineIRBuilder &MIRBuilder) const;
243
244 // Emit a vector concat operation.
245 MachineInstr *emitVectorConcat(std::optional<Register> Dst, Register Op1,
246 Register Op2,
247 MachineIRBuilder &MIRBuilder) const;
248
249 // Emit an integer compare between LHS and RHS, which checks for Predicate.
250 MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
251 MachineOperand &Predicate,
252 MachineIRBuilder &MIRBuilder) const;
253
254 /// Emit a floating point comparison between \p LHS and \p RHS.
255 /// \p Pred if given is the intended predicate to use.
256 MachineInstr *
257 emitFPCompare(Register LHS, Register RHS, MachineIRBuilder &MIRBuilder,
258 std::optional<CmpInst::Predicate> = std::nullopt) const;
259
260 MachineInstr *
261 emitInstr(unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
262 std::initializer_list<llvm::SrcOp> SrcOps,
263 MachineIRBuilder &MIRBuilder,
264 const ComplexRendererFns &RenderFns = std::nullopt) const;
265 /// Helper function to emit an add or sub instruction.
266 ///
267 /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above
268 /// in a specific order.
269 ///
270 /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode.
271 ///
272 /// \code
273 /// const std::array<std::array<unsigned, 2>, 4> Table {
274 /// {{AArch64::ADDXri, AArch64::ADDWri},
275 /// {AArch64::ADDXrs, AArch64::ADDWrs},
276 /// {AArch64::ADDXrr, AArch64::ADDWrr},
277 /// {AArch64::SUBXri, AArch64::SUBWri},
278 /// {AArch64::ADDXrx, AArch64::ADDWrx}}};
279 /// \endcode
280 ///
281 /// Each row in the table corresponds to a different addressing mode. Each
282 /// column corresponds to a different register size.
283 ///
284 /// \attention Rows must be structured as follows:
285 /// - Row 0: The ri opcode variants
286 /// - Row 1: The rs opcode variants
287 /// - Row 2: The rr opcode variants
288 /// - Row 3: The ri opcode variants for negative immediates
289 /// - Row 4: The rx opcode variants
290 ///
291 /// \attention Columns must be structured as follows:
292 /// - Column 0: The 64-bit opcode variants
293 /// - Column 1: The 32-bit opcode variants
294 ///
295 /// \p Dst is the destination register of the binop to emit.
296 /// \p LHS is the left-hand operand of the binop to emit.
297 /// \p RHS is the right-hand operand of the binop to emit.
298 MachineInstr *emitAddSub(
299 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
300 Register Dst, MachineOperand &LHS, MachineOperand &RHS,
301 MachineIRBuilder &MIRBuilder) const;
302 MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,
303 MachineOperand &RHS,
304 MachineIRBuilder &MIRBuilder) const;
305 MachineInstr *emitADDS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
306 MachineIRBuilder &MIRBuilder) const;
307 MachineInstr *emitSUBS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
308 MachineIRBuilder &MIRBuilder) const;
309 MachineInstr *emitADCS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
310 MachineIRBuilder &MIRBuilder) const;
311 MachineInstr *emitSBCS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
312 MachineIRBuilder &MIRBuilder) const;
313 MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS,
314 MachineIRBuilder &MIRBuilder) const;
315 MachineInstr *emitTST(MachineOperand &LHS, MachineOperand &RHS,
316 MachineIRBuilder &MIRBuilder) const;
317 MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS,
318 AArch64CC::CondCode CC,
319 MachineIRBuilder &MIRBuilder) const;
320 MachineInstr *emitExtractVectorElt(std::optional<Register> DstReg,
321 const RegisterBank &DstRB, LLT ScalarTy,
322 Register VecReg, unsigned LaneIdx,
323 MachineIRBuilder &MIRBuilder) const;
324 MachineInstr *emitCSINC(Register Dst, Register Src1, Register Src2,
325 AArch64CC::CondCode Pred,
326 MachineIRBuilder &MIRBuilder) const;
327 /// Emit a CSet for a FP compare.
328 ///
329 /// \p Dst is expected to be a 32-bit scalar register.
330 MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred,
331 MachineIRBuilder &MIRBuilder) const;
332
333 /// Emit an instruction that sets NZCV to the carry-in expected by \p I.
334 /// Might elide the instruction if the previous instruction already sets NZCV
335 /// correctly.
336 MachineInstr *emitCarryIn(MachineInstr &I, Register CarryReg);
337
338 /// Emit the overflow op for \p Opcode.
339 ///
340 /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO,
341 /// G_USUBO, etc.
342 std::pair<MachineInstr *, AArch64CC::CondCode>
343 emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS,
344 MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const;
345
346 bool selectOverflowOp(MachineInstr &I, MachineRegisterInfo &MRI);
347
348 /// Emit expression as a conjunction (a series of CCMP/CFCMP ops).
349 /// In some cases this is even possible with OR operations in the expression.
350 MachineInstr *emitConjunction(Register Val, AArch64CC::CondCode &OutCC,
351 MachineIRBuilder &MIB) const;
352 MachineInstr *emitConditionalComparison(Register LHS, Register RHS,
353 CmpInst::Predicate CC,
354 AArch64CC::CondCode Predicate,
355 AArch64CC::CondCode OutCC,
356 MachineIRBuilder &MIB) const;
357 MachineInstr *emitConjunctionRec(Register Val, AArch64CC::CondCode &OutCC,
358 bool Negate, Register CCOp,
359 AArch64CC::CondCode Predicate,
360 MachineIRBuilder &MIB) const;
361
362 /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.
363 /// \p IsNegative is true if the test should be "not zero".
364 /// This will also optimize the test bit instruction when possible.
365 MachineInstr *emitTestBit(Register TestReg, uint64_t Bit, bool IsNegative,
366 MachineBasicBlock *DstMBB,
367 MachineIRBuilder &MIB) const;
368
369 /// Emit a CB(N)Z instruction which branches to \p DestMBB.
370 MachineInstr *emitCBZ(Register CompareReg, bool IsNegative,
371 MachineBasicBlock *DestMBB,
372 MachineIRBuilder &MIB) const;
373
374 // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
375 // We use these manually instead of using the importer since it doesn't
376 // support SDNodeXForm.
377 ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
378 ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
379 ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
380 ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
381
382 ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const;
383 ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
384 ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const;
385
386 ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
387 unsigned Size) const;
388
389 ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
390 return selectAddrModeUnscaled(Root, Size: 1);
391 }
392 ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
393 return selectAddrModeUnscaled(Root, Size: 2);
394 }
395 ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
396 return selectAddrModeUnscaled(Root, Size: 4);
397 }
398 ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
399 return selectAddrModeUnscaled(Root, Size: 8);
400 }
401 ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
402 return selectAddrModeUnscaled(Root, Size: 16);
403 }
404
405 /// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used
406 /// from complex pattern matchers like selectAddrModeIndexed().
407 ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size,
408 MachineRegisterInfo &MRI) const;
409
410 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
411 unsigned Size) const;
412 template <int Width>
413 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
414 return selectAddrModeIndexed(Root, Size: Width / 8);
415 }
416
417 std::optional<bool>
418 isWorthFoldingIntoAddrMode(MachineInstr &MI,
419 const MachineRegisterInfo &MRI) const;
420
421 bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,
422 const MachineRegisterInfo &MRI,
423 bool IsAddrOperand) const;
424 ComplexRendererFns
425 selectAddrModeShiftedExtendXReg(MachineOperand &Root,
426 unsigned SizeInBytes) const;
427
428 /// Returns a \p ComplexRendererFns which contains a base, offset, and whether
429 /// or not a shift + extend should be folded into an addressing mode. Returns
430 /// None when this is not profitable or possible.
431 ComplexRendererFns
432 selectExtendedSHL(MachineOperand &Root, MachineOperand &Base,
433 MachineOperand &Offset, unsigned SizeInBytes,
434 bool WantsExt) const;
435 ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
436 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
437 unsigned SizeInBytes) const;
438 template <int Width>
439 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const {
440 return selectAddrModeXRO(Root, SizeInBytes: Width / 8);
441 }
442
443 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root,
444 unsigned SizeInBytes) const;
445 template <int Width>
446 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const {
447 return selectAddrModeWRO(Root, SizeInBytes: Width / 8);
448 }
449
450 ComplexRendererFns selectShiftedRegister(MachineOperand &Root,
451 bool AllowROR = false) const;
452
453 ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
454 return selectShiftedRegister(Root);
455 }
456
457 ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
458 return selectShiftedRegister(Root, AllowROR: true);
459 }
460
461 /// Given an extend instruction, determine the correct shift-extend type for
462 /// that instruction.
463 ///
464 /// If the instruction is going to be used in a load or store, pass
465 /// \p IsLoadStore = true.
466 AArch64_AM::ShiftExtendType
467 getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI,
468 bool IsLoadStore = false) const;
469
470 /// Move \p Reg to \p RC if \p Reg is not already on \p RC.
471 ///
472 /// \returns Either \p Reg if no change was necessary, or the new register
473 /// created by moving \p Reg.
474 ///
475 /// Note: This uses emitCopy right now.
476 Register moveScalarRegClass(Register Reg, const TargetRegisterClass &RC,
477 MachineIRBuilder &MIB) const;
478
479 ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
480
481 ComplexRendererFns selectExtractHigh(MachineOperand &Root) const;
482
483 void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
484 int OpIdx = -1) const;
485 void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I,
486 int OpIdx = -1) const;
487 void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I,
488 int OpIdx = -1) const;
489 void renderUbsanTrap(MachineInstrBuilder &MIB, const MachineInstr &MI,
490 int OpIdx) const;
491 void renderFPImm16(MachineInstrBuilder &MIB, const MachineInstr &MI,
492 int OpIdx = -1) const;
493 void renderFPImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
494 int OpIdx = -1) const;
495 void renderFPImm64(MachineInstrBuilder &MIB, const MachineInstr &MI,
496 int OpIdx = -1) const;
497 void renderFPImm32SIMDModImmType4(MachineInstrBuilder &MIB,
498 const MachineInstr &MI,
499 int OpIdx = -1) const;
500
501 // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
502 void materializeLargeCMVal(MachineInstr &I, const Value *V, unsigned OpFlags);
503
504 // Optimization methods.
505 bool tryOptSelect(GSelect &Sel);
506 bool tryOptSelectConjunction(GSelect &Sel, MachineInstr &CondMI);
507 MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
508 MachineOperand &Predicate,
509 MachineIRBuilder &MIRBuilder) const;
510
511 /// Return true if \p MI is a load or store of \p NumBytes bytes.
512 bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
513
514 /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit
515 /// register zeroed out. In other words, the result of MI has been explicitly
516 /// zero extended.
517 bool isDef32(const MachineInstr &MI) const;
518
519 const AArch64TargetMachine &TM;
520 const AArch64Subtarget &STI;
521 const AArch64InstrInfo &TII;
522 const AArch64RegisterInfo &TRI;
523 const AArch64RegisterBankInfo &RBI;
524
525 bool ProduceNonFlagSettingCondBr = false;
526
527 // Some cached values used during selection.
528 // We use LR as a live-in register, and we keep track of it here as it can be
529 // clobbered by calls.
530 Register MFReturnAddr;
531
532 MachineIRBuilder MIB;
533
534#define GET_GLOBALISEL_PREDICATES_DECL
535#include "AArch64GenGlobalISel.inc"
536#undef GET_GLOBALISEL_PREDICATES_DECL
537
538// We declare the temporaries used by selectImpl() in the class to minimize the
539// cost of constructing placeholder values.
540#define GET_GLOBALISEL_TEMPORARIES_DECL
541#include "AArch64GenGlobalISel.inc"
542#undef GET_GLOBALISEL_TEMPORARIES_DECL
543};
544
545} // end anonymous namespace
546
547#define GET_GLOBALISEL_IMPL
548#include "AArch64GenGlobalISel.inc"
549#undef GET_GLOBALISEL_IMPL
550
551AArch64InstructionSelector::AArch64InstructionSelector(
552 const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
553 const AArch64RegisterBankInfo &RBI)
554 : TM(TM), STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()),
555 RBI(RBI),
556#define GET_GLOBALISEL_PREDICATES_INIT
557#include "AArch64GenGlobalISel.inc"
558#undef GET_GLOBALISEL_PREDICATES_INIT
559#define GET_GLOBALISEL_TEMPORARIES_INIT
560#include "AArch64GenGlobalISel.inc"
561#undef GET_GLOBALISEL_TEMPORARIES_INIT
562{
563}
564
565// FIXME: This should be target-independent, inferred from the types declared
566// for each class in the bank.
567//
568/// Given a register bank, and a type, return the smallest register class that
569/// can represent that combination.
570static const TargetRegisterClass *
571getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
572 bool GetAllRegSet = false) {
573 if (RB.getID() == AArch64::GPRRegBankID) {
574 if (Ty.getSizeInBits() <= 32)
575 return GetAllRegSet ? &AArch64::GPR32allRegClass
576 : &AArch64::GPR32RegClass;
577 if (Ty.getSizeInBits() == 64)
578 return GetAllRegSet ? &AArch64::GPR64allRegClass
579 : &AArch64::GPR64RegClass;
580 if (Ty.getSizeInBits() == 128)
581 return &AArch64::XSeqPairsClassRegClass;
582 return nullptr;
583 }
584
585 if (RB.getID() == AArch64::FPRRegBankID) {
586 switch (Ty.getSizeInBits()) {
587 case 8:
588 return &AArch64::FPR8RegClass;
589 case 16:
590 return &AArch64::FPR16RegClass;
591 case 32:
592 return &AArch64::FPR32RegClass;
593 case 64:
594 return &AArch64::FPR64RegClass;
595 case 128:
596 return &AArch64::FPR128RegClass;
597 }
598 return nullptr;
599 }
600
601 return nullptr;
602}
603
604/// Given a register bank, and size in bits, return the smallest register class
605/// that can represent that combination.
606static const TargetRegisterClass *
607getMinClassForRegBank(const RegisterBank &RB, TypeSize SizeInBits,
608 bool GetAllRegSet = false) {
609 if (SizeInBits.isScalable()) {
610 assert(RB.getID() == AArch64::FPRRegBankID &&
611 "Expected FPR regbank for scalable type size");
612 return &AArch64::ZPRRegClass;
613 }
614
615 unsigned RegBankID = RB.getID();
616
617 if (RegBankID == AArch64::GPRRegBankID) {
618 assert(!SizeInBits.isScalable() && "Unexpected scalable register size");
619 if (SizeInBits <= 32)
620 return GetAllRegSet ? &AArch64::GPR32allRegClass
621 : &AArch64::GPR32RegClass;
622 if (SizeInBits == 64)
623 return GetAllRegSet ? &AArch64::GPR64allRegClass
624 : &AArch64::GPR64RegClass;
625 if (SizeInBits == 128)
626 return &AArch64::XSeqPairsClassRegClass;
627 }
628
629 if (RegBankID == AArch64::FPRRegBankID) {
630 if (SizeInBits.isScalable()) {
631 assert(SizeInBits == TypeSize::getScalable(128) &&
632 "Unexpected scalable register size");
633 return &AArch64::ZPRRegClass;
634 }
635
636 switch (SizeInBits) {
637 default:
638 return nullptr;
639 case 8:
640 return &AArch64::FPR8RegClass;
641 case 16:
642 return &AArch64::FPR16RegClass;
643 case 32:
644 return &AArch64::FPR32RegClass;
645 case 64:
646 return &AArch64::FPR64RegClass;
647 case 128:
648 return &AArch64::FPR128RegClass;
649 }
650 }
651
652 return nullptr;
653}
654
655/// Returns the correct subregister to use for a given register class.
656static bool getSubRegForClass(const TargetRegisterClass *RC,
657 const TargetRegisterInfo &TRI, unsigned &SubReg) {
658 switch (TRI.getRegSizeInBits(RC: *RC)) {
659 case 8:
660 SubReg = AArch64::bsub;
661 break;
662 case 16:
663 SubReg = AArch64::hsub;
664 break;
665 case 32:
666 if (RC != &AArch64::FPR32RegClass)
667 SubReg = AArch64::sub_32;
668 else
669 SubReg = AArch64::ssub;
670 break;
671 case 64:
672 SubReg = AArch64::dsub;
673 break;
674 default:
675 LLVM_DEBUG(
676 dbgs() << "Couldn't find appropriate subregister for register class.");
677 return false;
678 }
679
680 return true;
681}
682
683/// Returns the minimum size the given register bank can hold.
684static unsigned getMinSizeForRegBank(const RegisterBank &RB) {
685 switch (RB.getID()) {
686 case AArch64::GPRRegBankID:
687 return 32;
688 case AArch64::FPRRegBankID:
689 return 8;
690 default:
691 llvm_unreachable("Tried to get minimum size for unknown register bank.");
692 }
693}
694
695/// Create a REG_SEQUENCE instruction using the registers in \p Regs.
696/// Helper function for functions like createDTuple and createQTuple.
697///
698/// \p RegClassIDs - The list of register class IDs available for some tuple of
699/// a scalar class. E.g. QQRegClassID, QQQRegClassID, QQQQRegClassID. This is
700/// expected to contain between 2 and 4 tuple classes.
701///
702/// \p SubRegs - The list of subregister classes associated with each register
703/// class ID in \p RegClassIDs. E.g., QQRegClassID should use the qsub0
704/// subregister class. The index of each subregister class is expected to
705/// correspond with the index of each register class.
706///
707/// \returns Either the destination register of REG_SEQUENCE instruction that
708/// was created, or the 0th element of \p Regs if \p Regs contains a single
709/// element.
710static Register createTuple(ArrayRef<Register> Regs,
711 const unsigned RegClassIDs[],
712 const unsigned SubRegs[], MachineIRBuilder &MIB) {
713 unsigned NumRegs = Regs.size();
714 if (NumRegs == 1)
715 return Regs[0];
716 assert(NumRegs >= 2 && NumRegs <= 4 &&
717 "Only support between two and 4 registers in a tuple!");
718 const TargetRegisterInfo *TRI = MIB.getMF().getSubtarget().getRegisterInfo();
719 auto *DesiredClass = TRI->getRegClass(i: RegClassIDs[NumRegs - 2]);
720 auto RegSequence =
721 MIB.buildInstr(Opc: TargetOpcode::REG_SEQUENCE, DstOps: {DesiredClass}, SrcOps: {});
722 for (unsigned I = 0, E = Regs.size(); I < E; ++I) {
723 RegSequence.addUse(RegNo: Regs[I]);
724 RegSequence.addImm(Val: SubRegs[I]);
725 }
726 return RegSequence.getReg(Idx: 0);
727}
728
729/// Create a tuple of D-registers using the registers in \p Regs.
730static Register createDTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) {
731 static const unsigned RegClassIDs[] = {
732 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
733 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
734 AArch64::dsub2, AArch64::dsub3};
735 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
736}
737
738/// Create a tuple of Q-registers using the registers in \p Regs.
739static Register createQTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) {
740 static const unsigned RegClassIDs[] = {
741 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
742 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
743 AArch64::qsub2, AArch64::qsub3};
744 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
745}
746
747static std::optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
748 auto &MI = *Root.getParent();
749 auto &MBB = *MI.getParent();
750 auto &MF = *MBB.getParent();
751 auto &MRI = MF.getRegInfo();
752 uint64_t Immed;
753 if (Root.isImm())
754 Immed = Root.getImm();
755 else if (Root.isCImm())
756 Immed = Root.getCImm()->getZExtValue();
757 else if (Root.isReg()) {
758 auto ValAndVReg =
759 getIConstantVRegValWithLookThrough(VReg: Root.getReg(), MRI, LookThroughInstrs: true);
760 if (!ValAndVReg)
761 return std::nullopt;
762 Immed = ValAndVReg->Value.getSExtValue();
763 } else
764 return std::nullopt;
765 return Immed;
766}
767
768/// Check whether \p I is a currently unsupported binary operation:
769/// - it has an unsized type
770/// - an operand is not a vreg
771/// - all operands are not in the same bank
772/// These are checks that should someday live in the verifier, but right now,
773/// these are mostly limitations of the aarch64 selector.
774static bool unsupportedBinOp(const MachineInstr &I,
775 const AArch64RegisterBankInfo &RBI,
776 const MachineRegisterInfo &MRI,
777 const AArch64RegisterInfo &TRI) {
778 LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg());
779 if (!Ty.isValid()) {
780 LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n");
781 return true;
782 }
783
784 const RegisterBank *PrevOpBank = nullptr;
785 for (auto &MO : I.operands()) {
786 // FIXME: Support non-register operands.
787 if (!MO.isReg()) {
788 LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n");
789 return true;
790 }
791
792 // FIXME: Can generic operations have physical registers operands? If
793 // so, this will need to be taught about that, and we'll need to get the
794 // bank out of the minimal class for the register.
795 // Either way, this needs to be documented (and possibly verified).
796 if (!MO.getReg().isVirtual()) {
797 LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n");
798 return true;
799 }
800
801 const RegisterBank *OpBank = RBI.getRegBank(Reg: MO.getReg(), MRI, TRI);
802 if (!OpBank) {
803 LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n");
804 return true;
805 }
806
807 if (PrevOpBank && OpBank != PrevOpBank) {
808 LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n");
809 return true;
810 }
811 PrevOpBank = OpBank;
812 }
813 return false;
814}
815
816/// Select the AArch64 opcode for the basic binary operation \p GenericOpc
817/// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
818/// and of size \p OpSize.
819/// \returns \p GenericOpc if the combination is unsupported.
820static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
821 unsigned OpSize) {
822 switch (RegBankID) {
823 case AArch64::GPRRegBankID:
824 if (OpSize == 32) {
825 switch (GenericOpc) {
826 case TargetOpcode::G_SHL:
827 return AArch64::LSLVWr;
828 case TargetOpcode::G_LSHR:
829 return AArch64::LSRVWr;
830 case TargetOpcode::G_ASHR:
831 return AArch64::ASRVWr;
832 default:
833 return GenericOpc;
834 }
835 } else if (OpSize == 64) {
836 switch (GenericOpc) {
837 case TargetOpcode::G_PTR_ADD:
838 return AArch64::ADDXrr;
839 case TargetOpcode::G_SHL:
840 return AArch64::LSLVXr;
841 case TargetOpcode::G_LSHR:
842 return AArch64::LSRVXr;
843 case TargetOpcode::G_ASHR:
844 return AArch64::ASRVXr;
845 default:
846 return GenericOpc;
847 }
848 }
849 break;
850 case AArch64::FPRRegBankID:
851 switch (OpSize) {
852 case 32:
853 switch (GenericOpc) {
854 case TargetOpcode::G_FADD:
855 return AArch64::FADDSrr;
856 case TargetOpcode::G_FSUB:
857 return AArch64::FSUBSrr;
858 case TargetOpcode::G_FMUL:
859 return AArch64::FMULSrr;
860 case TargetOpcode::G_FDIV:
861 return AArch64::FDIVSrr;
862 default:
863 return GenericOpc;
864 }
865 case 64:
866 switch (GenericOpc) {
867 case TargetOpcode::G_FADD:
868 return AArch64::FADDDrr;
869 case TargetOpcode::G_FSUB:
870 return AArch64::FSUBDrr;
871 case TargetOpcode::G_FMUL:
872 return AArch64::FMULDrr;
873 case TargetOpcode::G_FDIV:
874 return AArch64::FDIVDrr;
875 case TargetOpcode::G_OR:
876 return AArch64::ORRv8i8;
877 default:
878 return GenericOpc;
879 }
880 }
881 break;
882 }
883 return GenericOpc;
884}
885
886/// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
887/// appropriate for the (value) register bank \p RegBankID and of memory access
888/// size \p OpSize. This returns the variant with the base+unsigned-immediate
889/// addressing mode (e.g., LDRXui).
890/// \returns \p GenericOpc if the combination is unsupported.
891static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
892 unsigned OpSize) {
893 const bool isStore = GenericOpc == TargetOpcode::G_STORE;
894 switch (RegBankID) {
895 case AArch64::GPRRegBankID:
896 switch (OpSize) {
897 case 8:
898 return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
899 case 16:
900 return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
901 case 32:
902 return isStore ? AArch64::STRWui : AArch64::LDRWui;
903 case 64:
904 return isStore ? AArch64::STRXui : AArch64::LDRXui;
905 }
906 break;
907 case AArch64::FPRRegBankID:
908 switch (OpSize) {
909 case 8:
910 return isStore ? AArch64::STRBui : AArch64::LDRBui;
911 case 16:
912 return isStore ? AArch64::STRHui : AArch64::LDRHui;
913 case 32:
914 return isStore ? AArch64::STRSui : AArch64::LDRSui;
915 case 64:
916 return isStore ? AArch64::STRDui : AArch64::LDRDui;
917 case 128:
918 return isStore ? AArch64::STRQui : AArch64::LDRQui;
919 }
920 break;
921 }
922 return GenericOpc;
923}
924
925/// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg
926/// to \p *To.
927///
928/// E.g "To = COPY SrcReg:SubReg"
929static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI,
930 const RegisterBankInfo &RBI, Register SrcReg,
931 const TargetRegisterClass *To, unsigned SubReg) {
932 assert(SrcReg.isValid() && "Expected a valid source register?");
933 assert(To && "Destination register class cannot be null");
934 assert(SubReg && "Expected a valid subregister");
935
936 MachineIRBuilder MIB(I);
937 auto SubRegCopy =
938 MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {To}, SrcOps: {}).addReg(RegNo: SrcReg, flags: 0, SubReg);
939 MachineOperand &RegOp = I.getOperand(i: 1);
940 RegOp.setReg(SubRegCopy.getReg(Idx: 0));
941
942 // It's possible that the destination register won't be constrained. Make
943 // sure that happens.
944 if (!I.getOperand(i: 0).getReg().isPhysical())
945 RBI.constrainGenericRegister(Reg: I.getOperand(i: 0).getReg(), RC: *To, MRI);
946
947 return true;
948}
949
950/// Helper function to get the source and destination register classes for a
951/// copy. Returns a std::pair containing the source register class for the
952/// copy, and the destination register class for the copy. If a register class
953/// cannot be determined, then it will be nullptr.
954static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
955getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,
956 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
957 const RegisterBankInfo &RBI) {
958 Register DstReg = I.getOperand(i: 0).getReg();
959 Register SrcReg = I.getOperand(i: 1).getReg();
960 const RegisterBank &DstRegBank = *RBI.getRegBank(Reg: DstReg, MRI, TRI);
961 const RegisterBank &SrcRegBank = *RBI.getRegBank(Reg: SrcReg, MRI, TRI);
962
963 TypeSize DstSize = RBI.getSizeInBits(Reg: DstReg, MRI, TRI);
964 TypeSize SrcSize = RBI.getSizeInBits(Reg: SrcReg, MRI, TRI);
965
966 // Special casing for cross-bank copies of s1s. We can technically represent
967 // a 1-bit value with any size of register. The minimum size for a GPR is 32
968 // bits. So, we need to put the FPR on 32 bits as well.
969 //
970 // FIXME: I'm not sure if this case holds true outside of copies. If it does,
971 // then we can pull it into the helpers that get the appropriate class for a
972 // register bank. Or make a new helper that carries along some constraint
973 // information.
974 if (SrcRegBank != DstRegBank &&
975 (DstSize == TypeSize::getFixed(ExactSize: 1) && SrcSize == TypeSize::getFixed(ExactSize: 1)))
976 SrcSize = DstSize = TypeSize::getFixed(ExactSize: 32);
977
978 return {getMinClassForRegBank(RB: SrcRegBank, SizeInBits: SrcSize, GetAllRegSet: true),
979 getMinClassForRegBank(RB: DstRegBank, SizeInBits: DstSize, GetAllRegSet: true)};
980}
981
982// FIXME: We need some sort of API in RBI/TRI to allow generic code to
983// constrain operands of simple instructions given a TargetRegisterClass
984// and LLT
985static bool selectDebugInstr(MachineInstr &I, MachineRegisterInfo &MRI,
986 const RegisterBankInfo &RBI) {
987 for (MachineOperand &MO : I.operands()) {
988 if (!MO.isReg())
989 continue;
990 Register Reg = MO.getReg();
991 if (!Reg)
992 continue;
993 if (Reg.isPhysical())
994 continue;
995 LLT Ty = MRI.getType(Reg);
996 const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
997 const TargetRegisterClass *RC =
998 dyn_cast<const TargetRegisterClass *>(Val: RegClassOrBank);
999 if (!RC) {
1000 const RegisterBank &RB = *cast<const RegisterBank *>(Val: RegClassOrBank);
1001 RC = getRegClassForTypeOnBank(Ty, RB);
1002 if (!RC) {
1003 LLVM_DEBUG(
1004 dbgs() << "Warning: DBG_VALUE operand has unexpected size/bank\n");
1005 break;
1006 }
1007 }
1008 RBI.constrainGenericRegister(Reg, RC: *RC, MRI);
1009 }
1010
1011 return true;
1012}
1013
1014static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
1015 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
1016 const RegisterBankInfo &RBI) {
1017 Register DstReg = I.getOperand(i: 0).getReg();
1018 Register SrcReg = I.getOperand(i: 1).getReg();
1019 const RegisterBank &DstRegBank = *RBI.getRegBank(Reg: DstReg, MRI, TRI);
1020 const RegisterBank &SrcRegBank = *RBI.getRegBank(Reg: SrcReg, MRI, TRI);
1021
1022 // Find the correct register classes for the source and destination registers.
1023 const TargetRegisterClass *SrcRC;
1024 const TargetRegisterClass *DstRC;
1025 std::tie(args&: SrcRC, args&: DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
1026
1027 if (!DstRC) {
1028 LLVM_DEBUG(dbgs() << "Unexpected dest size "
1029 << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n');
1030 return false;
1031 }
1032
1033 // Is this a copy? If so, then we may need to insert a subregister copy.
1034 if (I.isCopy()) {
1035 // Yes. Check if there's anything to fix up.
1036 if (!SrcRC) {
1037 LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
1038 return false;
1039 }
1040
1041 const TypeSize SrcSize = TRI.getRegSizeInBits(RC: *SrcRC);
1042 const TypeSize DstSize = TRI.getRegSizeInBits(RC: *DstRC);
1043 unsigned SubReg;
1044
1045 // If the source bank doesn't support a subregister copy small enough,
1046 // then we first need to copy to the destination bank.
1047 if (getMinSizeForRegBank(RB: SrcRegBank) > DstSize) {
1048 const TargetRegisterClass *DstTempRC =
1049 getMinClassForRegBank(RB: DstRegBank, SizeInBits: SrcSize, /* GetAllRegSet */ true);
1050 getSubRegForClass(RC: DstRC, TRI, SubReg);
1051
1052 MachineIRBuilder MIB(I);
1053 auto Copy = MIB.buildCopy(Res: {DstTempRC}, Op: {SrcReg});
1054 copySubReg(I, MRI, RBI, SrcReg: Copy.getReg(Idx: 0), To: DstRC, SubReg);
1055 } else if (SrcSize > DstSize) {
1056 // If the source register is bigger than the destination we need to
1057 // perform a subregister copy.
1058 const TargetRegisterClass *SubRegRC =
1059 getMinClassForRegBank(RB: SrcRegBank, SizeInBits: DstSize, /* GetAllRegSet */ true);
1060 getSubRegForClass(RC: SubRegRC, TRI, SubReg);
1061 copySubReg(I, MRI, RBI, SrcReg, To: DstRC, SubReg);
1062 } else if (DstSize > SrcSize) {
1063 // If the destination register is bigger than the source we need to do
1064 // a promotion using SUBREG_TO_REG.
1065 const TargetRegisterClass *PromotionRC =
1066 getMinClassForRegBank(RB: SrcRegBank, SizeInBits: DstSize, /* GetAllRegSet */ true);
1067 getSubRegForClass(RC: SrcRC, TRI, SubReg);
1068
1069 Register PromoteReg = MRI.createVirtualRegister(RegClass: PromotionRC);
1070 BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(),
1071 MCID: TII.get(Opcode: AArch64::SUBREG_TO_REG), DestReg: PromoteReg)
1072 .addImm(Val: 0)
1073 .addUse(RegNo: SrcReg)
1074 .addImm(Val: SubReg);
1075 MachineOperand &RegOp = I.getOperand(i: 1);
1076 RegOp.setReg(PromoteReg);
1077 }
1078
1079 // If the destination is a physical register, then there's nothing to
1080 // change, so we're done.
1081 if (DstReg.isPhysical())
1082 return true;
1083 }
1084
1085 // No need to constrain SrcReg. It will get constrained when we hit another
1086 // of its use or its defs. Copies do not have constraints.
1087 if (!RBI.constrainGenericRegister(Reg: DstReg, RC: *DstRC, MRI)) {
1088 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
1089 << " operand\n");
1090 return false;
1091 }
1092
1093 // If this a GPR ZEXT that we want to just reduce down into a copy.
1094 // The sizes will be mismatched with the source < 32b but that's ok.
1095 if (I.getOpcode() == TargetOpcode::G_ZEXT) {
1096 I.setDesc(TII.get(Opcode: AArch64::COPY));
1097 assert(SrcRegBank.getID() == AArch64::GPRRegBankID);
1098 return selectCopy(I, TII, MRI, TRI, RBI);
1099 }
1100
1101 I.setDesc(TII.get(Opcode: AArch64::COPY));
1102 return true;
1103}
1104
1105static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
1106 if (!DstTy.isScalar() || !SrcTy.isScalar())
1107 return GenericOpc;
1108
1109 const unsigned DstSize = DstTy.getSizeInBits();
1110 const unsigned SrcSize = SrcTy.getSizeInBits();
1111
1112 switch (DstSize) {
1113 case 32:
1114 switch (SrcSize) {
1115 case 32:
1116 switch (GenericOpc) {
1117 case TargetOpcode::G_SITOFP:
1118 return AArch64::SCVTFUWSri;
1119 case TargetOpcode::G_UITOFP:
1120 return AArch64::UCVTFUWSri;
1121 case TargetOpcode::G_FPTOSI:
1122 return AArch64::FCVTZSUWSr;
1123 case TargetOpcode::G_FPTOUI:
1124 return AArch64::FCVTZUUWSr;
1125 default:
1126 return GenericOpc;
1127 }
1128 case 64:
1129 switch (GenericOpc) {
1130 case TargetOpcode::G_SITOFP:
1131 return AArch64::SCVTFUXSri;
1132 case TargetOpcode::G_UITOFP:
1133 return AArch64::UCVTFUXSri;
1134 case TargetOpcode::G_FPTOSI:
1135 return AArch64::FCVTZSUWDr;
1136 case TargetOpcode::G_FPTOUI:
1137 return AArch64::FCVTZUUWDr;
1138 default:
1139 return GenericOpc;
1140 }
1141 default:
1142 return GenericOpc;
1143 }
1144 case 64:
1145 switch (SrcSize) {
1146 case 32:
1147 switch (GenericOpc) {
1148 case TargetOpcode::G_SITOFP:
1149 return AArch64::SCVTFUWDri;
1150 case TargetOpcode::G_UITOFP:
1151 return AArch64::UCVTFUWDri;
1152 case TargetOpcode::G_FPTOSI:
1153 return AArch64::FCVTZSUXSr;
1154 case TargetOpcode::G_FPTOUI:
1155 return AArch64::FCVTZUUXSr;
1156 default:
1157 return GenericOpc;
1158 }
1159 case 64:
1160 switch (GenericOpc) {
1161 case TargetOpcode::G_SITOFP:
1162 return AArch64::SCVTFUXDri;
1163 case TargetOpcode::G_UITOFP:
1164 return AArch64::UCVTFUXDri;
1165 case TargetOpcode::G_FPTOSI:
1166 return AArch64::FCVTZSUXDr;
1167 case TargetOpcode::G_FPTOUI:
1168 return AArch64::FCVTZUUXDr;
1169 default:
1170 return GenericOpc;
1171 }
1172 default:
1173 return GenericOpc;
1174 }
1175 default:
1176 return GenericOpc;
1177 };
1178 return GenericOpc;
1179}
1180
1181MachineInstr *
1182AArch64InstructionSelector::emitSelect(Register Dst, Register True,
1183 Register False, AArch64CC::CondCode CC,
1184 MachineIRBuilder &MIB) const {
1185 MachineRegisterInfo &MRI = *MIB.getMRI();
1186 assert(RBI.getRegBank(False, MRI, TRI)->getID() ==
1187 RBI.getRegBank(True, MRI, TRI)->getID() &&
1188 "Expected both select operands to have the same regbank?");
1189 LLT Ty = MRI.getType(Reg: True);
1190 if (Ty.isVector())
1191 return nullptr;
1192 const unsigned Size = Ty.getSizeInBits();
1193 assert((Size == 32 || Size == 64) &&
1194 "Expected 32 bit or 64 bit select only?");
1195 const bool Is32Bit = Size == 32;
1196 if (RBI.getRegBank(Reg: True, MRI, TRI)->getID() != AArch64::GPRRegBankID) {
1197 unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
1198 auto FCSel = MIB.buildInstr(Opc, DstOps: {Dst}, SrcOps: {True, False}).addImm(Val: CC);
1199 constrainSelectedInstRegOperands(I&: *FCSel, TII, TRI, RBI);
1200 return &*FCSel;
1201 }
1202
1203 // By default, we'll try and emit a CSEL.
1204 unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
1205 bool Optimized = false;
1206 auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI,
1207 &Optimized](Register &Reg, Register &OtherReg,
1208 bool Invert) {
1209 if (Optimized)
1210 return false;
1211
1212 // Attempt to fold:
1213 //
1214 // %sub = G_SUB 0, %x
1215 // %select = G_SELECT cc, %reg, %sub
1216 //
1217 // Into:
1218 // %select = CSNEG %reg, %x, cc
1219 Register MatchReg;
1220 if (mi_match(R: Reg, MRI, P: m_Neg(Src: m_Reg(R&: MatchReg)))) {
1221 Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
1222 Reg = MatchReg;
1223 if (Invert) {
1224 CC = AArch64CC::getInvertedCondCode(Code: CC);
1225 std::swap(a&: Reg, b&: OtherReg);
1226 }
1227 return true;
1228 }
1229
1230 // Attempt to fold:
1231 //
1232 // %xor = G_XOR %x, -1
1233 // %select = G_SELECT cc, %reg, %xor
1234 //
1235 // Into:
1236 // %select = CSINV %reg, %x, cc
1237 if (mi_match(R: Reg, MRI, P: m_Not(Src: m_Reg(R&: MatchReg)))) {
1238 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1239 Reg = MatchReg;
1240 if (Invert) {
1241 CC = AArch64CC::getInvertedCondCode(Code: CC);
1242 std::swap(a&: Reg, b&: OtherReg);
1243 }
1244 return true;
1245 }
1246
1247 // Attempt to fold:
1248 //
1249 // %add = G_ADD %x, 1
1250 // %select = G_SELECT cc, %reg, %add
1251 //
1252 // Into:
1253 // %select = CSINC %reg, %x, cc
1254 if (mi_match(R: Reg, MRI,
1255 P: m_any_of(preds: m_GAdd(L: m_Reg(R&: MatchReg), R: m_SpecificICst(RequestedValue: 1)),
1256 preds: m_GPtrAdd(L: m_Reg(R&: MatchReg), R: m_SpecificICst(RequestedValue: 1))))) {
1257 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1258 Reg = MatchReg;
1259 if (Invert) {
1260 CC = AArch64CC::getInvertedCondCode(Code: CC);
1261 std::swap(a&: Reg, b&: OtherReg);
1262 }
1263 return true;
1264 }
1265
1266 return false;
1267 };
1268
1269 // Helper lambda which tries to use CSINC/CSINV for the instruction when its
1270 // true/false values are constants.
1271 // FIXME: All of these patterns already exist in tablegen. We should be
1272 // able to import these.
1273 auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI,
1274 &Optimized]() {
1275 if (Optimized)
1276 return false;
1277 auto TrueCst = getIConstantVRegValWithLookThrough(VReg: True, MRI);
1278 auto FalseCst = getIConstantVRegValWithLookThrough(VReg: False, MRI);
1279 if (!TrueCst && !FalseCst)
1280 return false;
1281
1282 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
1283 if (TrueCst && FalseCst) {
1284 int64_t T = TrueCst->Value.getSExtValue();
1285 int64_t F = FalseCst->Value.getSExtValue();
1286
1287 if (T == 0 && F == 1) {
1288 // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
1289 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1290 True = ZReg;
1291 False = ZReg;
1292 return true;
1293 }
1294
1295 if (T == 0 && F == -1) {
1296 // G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
1297 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1298 True = ZReg;
1299 False = ZReg;
1300 return true;
1301 }
1302 }
1303
1304 if (TrueCst) {
1305 int64_t T = TrueCst->Value.getSExtValue();
1306 if (T == 1) {
1307 // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
1308 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1309 True = False;
1310 False = ZReg;
1311 CC = AArch64CC::getInvertedCondCode(Code: CC);
1312 return true;
1313 }
1314
1315 if (T == -1) {
1316 // G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
1317 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1318 True = False;
1319 False = ZReg;
1320 CC = AArch64CC::getInvertedCondCode(Code: CC);
1321 return true;
1322 }
1323 }
1324
1325 if (FalseCst) {
1326 int64_t F = FalseCst->Value.getSExtValue();
1327 if (F == 1) {
1328 // G_SELECT cc, t, 1 -> CSINC t, zreg, cc
1329 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1330 False = ZReg;
1331 return true;
1332 }
1333
1334 if (F == -1) {
1335 // G_SELECT cc, t, -1 -> CSINC t, zreg, cc
1336 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1337 False = ZReg;
1338 return true;
1339 }
1340 }
1341 return false;
1342 };
1343
1344 Optimized |= TryFoldBinOpIntoSelect(False, True, /*Invert = */ false);
1345 Optimized |= TryFoldBinOpIntoSelect(True, False, /*Invert = */ true);
1346 Optimized |= TryOptSelectCst();
1347 auto SelectInst = MIB.buildInstr(Opc, DstOps: {Dst}, SrcOps: {True, False}).addImm(Val: CC);
1348 constrainSelectedInstRegOperands(I&: *SelectInst, TII, TRI, RBI);
1349 return &*SelectInst;
1350}
1351
1352static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
1353 switch (P) {
1354 default:
1355 llvm_unreachable("Unknown condition code!");
1356 case CmpInst::ICMP_NE:
1357 return AArch64CC::NE;
1358 case CmpInst::ICMP_EQ:
1359 return AArch64CC::EQ;
1360 case CmpInst::ICMP_SGT:
1361 return AArch64CC::GT;
1362 case CmpInst::ICMP_SGE:
1363 return AArch64CC::GE;
1364 case CmpInst::ICMP_SLT:
1365 return AArch64CC::LT;
1366 case CmpInst::ICMP_SLE:
1367 return AArch64CC::LE;
1368 case CmpInst::ICMP_UGT:
1369 return AArch64CC::HI;
1370 case CmpInst::ICMP_UGE:
1371 return AArch64CC::HS;
1372 case CmpInst::ICMP_ULT:
1373 return AArch64CC::LO;
1374 case CmpInst::ICMP_ULE:
1375 return AArch64CC::LS;
1376 }
1377}
1378
1379/// changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC.
1380static void changeFPCCToORAArch64CC(CmpInst::Predicate CC,
1381 AArch64CC::CondCode &CondCode,
1382 AArch64CC::CondCode &CondCode2) {
1383 CondCode2 = AArch64CC::AL;
1384 switch (CC) {
1385 default:
1386 llvm_unreachable("Unknown FP condition!");
1387 case CmpInst::FCMP_OEQ:
1388 CondCode = AArch64CC::EQ;
1389 break;
1390 case CmpInst::FCMP_OGT:
1391 CondCode = AArch64CC::GT;
1392 break;
1393 case CmpInst::FCMP_OGE:
1394 CondCode = AArch64CC::GE;
1395 break;
1396 case CmpInst::FCMP_OLT:
1397 CondCode = AArch64CC::MI;
1398 break;
1399 case CmpInst::FCMP_OLE:
1400 CondCode = AArch64CC::LS;
1401 break;
1402 case CmpInst::FCMP_ONE:
1403 CondCode = AArch64CC::MI;
1404 CondCode2 = AArch64CC::GT;
1405 break;
1406 case CmpInst::FCMP_ORD:
1407 CondCode = AArch64CC::VC;
1408 break;
1409 case CmpInst::FCMP_UNO:
1410 CondCode = AArch64CC::VS;
1411 break;
1412 case CmpInst::FCMP_UEQ:
1413 CondCode = AArch64CC::EQ;
1414 CondCode2 = AArch64CC::VS;
1415 break;
1416 case CmpInst::FCMP_UGT:
1417 CondCode = AArch64CC::HI;
1418 break;
1419 case CmpInst::FCMP_UGE:
1420 CondCode = AArch64CC::PL;
1421 break;
1422 case CmpInst::FCMP_ULT:
1423 CondCode = AArch64CC::LT;
1424 break;
1425 case CmpInst::FCMP_ULE:
1426 CondCode = AArch64CC::LE;
1427 break;
1428 case CmpInst::FCMP_UNE:
1429 CondCode = AArch64CC::NE;
1430 break;
1431 }
1432}
1433
1434/// Convert an IR fp condition code to an AArch64 CC.
1435/// This differs from changeFPCCToAArch64CC in that it returns cond codes that
1436/// should be AND'ed instead of OR'ed.
1437static void changeFPCCToANDAArch64CC(CmpInst::Predicate CC,
1438 AArch64CC::CondCode &CondCode,
1439 AArch64CC::CondCode &CondCode2) {
1440 CondCode2 = AArch64CC::AL;
1441 switch (CC) {
1442 default:
1443 changeFPCCToORAArch64CC(CC, CondCode, CondCode2);
1444 assert(CondCode2 == AArch64CC::AL);
1445 break;
1446 case CmpInst::FCMP_ONE:
1447 // (a one b)
1448 // == ((a olt b) || (a ogt b))
1449 // == ((a ord b) && (a une b))
1450 CondCode = AArch64CC::VC;
1451 CondCode2 = AArch64CC::NE;
1452 break;
1453 case CmpInst::FCMP_UEQ:
1454 // (a ueq b)
1455 // == ((a uno b) || (a oeq b))
1456 // == ((a ule b) && (a uge b))
1457 CondCode = AArch64CC::PL;
1458 CondCode2 = AArch64CC::LE;
1459 break;
1460 }
1461}
1462
1463/// Return a register which can be used as a bit to test in a TB(N)Z.
1464static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
1465 MachineRegisterInfo &MRI) {
1466 assert(Reg.isValid() && "Expected valid register!");
1467 bool HasZext = false;
1468 while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) {
1469 unsigned Opc = MI->getOpcode();
1470
1471 if (!MI->getOperand(i: 0).isReg() ||
1472 !MRI.hasOneNonDBGUse(RegNo: MI->getOperand(i: 0).getReg()))
1473 break;
1474
1475 // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
1476 //
1477 // (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number
1478 // on the truncated x is the same as the bit number on x.
1479 if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
1480 Opc == TargetOpcode::G_TRUNC) {
1481 if (Opc == TargetOpcode::G_ZEXT)
1482 HasZext = true;
1483
1484 Register NextReg = MI->getOperand(i: 1).getReg();
1485 // Did we find something worth folding?
1486 if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(RegNo: NextReg))
1487 break;
1488
1489 // NextReg is worth folding. Keep looking.
1490 Reg = NextReg;
1491 continue;
1492 }
1493
1494 // Attempt to find a suitable operation with a constant on one side.
1495 std::optional<uint64_t> C;
1496 Register TestReg;
1497 switch (Opc) {
1498 default:
1499 break;
1500 case TargetOpcode::G_AND:
1501 case TargetOpcode::G_XOR: {
1502 TestReg = MI->getOperand(i: 1).getReg();
1503 Register ConstantReg = MI->getOperand(i: 2).getReg();
1504 auto VRegAndVal = getIConstantVRegValWithLookThrough(VReg: ConstantReg, MRI);
1505 if (!VRegAndVal) {
1506 // AND commutes, check the other side for a constant.
1507 // FIXME: Can we canonicalize the constant so that it's always on the
1508 // same side at some point earlier?
1509 std::swap(a&: ConstantReg, b&: TestReg);
1510 VRegAndVal = getIConstantVRegValWithLookThrough(VReg: ConstantReg, MRI);
1511 }
1512 if (VRegAndVal) {
1513 if (HasZext)
1514 C = VRegAndVal->Value.getZExtValue();
1515 else
1516 C = VRegAndVal->Value.getSExtValue();
1517 }
1518 break;
1519 }
1520 case TargetOpcode::G_ASHR:
1521 case TargetOpcode::G_LSHR:
1522 case TargetOpcode::G_SHL: {
1523 TestReg = MI->getOperand(i: 1).getReg();
1524 auto VRegAndVal =
1525 getIConstantVRegValWithLookThrough(VReg: MI->getOperand(i: 2).getReg(), MRI);
1526 if (VRegAndVal)
1527 C = VRegAndVal->Value.getSExtValue();
1528 break;
1529 }
1530 }
1531
1532 // Didn't find a constant or viable register. Bail out of the loop.
1533 if (!C || !TestReg.isValid())
1534 break;
1535
1536 // We found a suitable instruction with a constant. Check to see if we can
1537 // walk through the instruction.
1538 Register NextReg;
1539 unsigned TestRegSize = MRI.getType(Reg: TestReg).getSizeInBits();
1540 switch (Opc) {
1541 default:
1542 break;
1543 case TargetOpcode::G_AND:
1544 // (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set.
1545 if ((*C >> Bit) & 1)
1546 NextReg = TestReg;
1547 break;
1548 case TargetOpcode::G_SHL:
1549 // (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in
1550 // the type of the register.
1551 if (*C <= Bit && (Bit - *C) < TestRegSize) {
1552 NextReg = TestReg;
1553 Bit = Bit - *C;
1554 }
1555 break;
1556 case TargetOpcode::G_ASHR:
1557 // (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits
1558 // in x
1559 NextReg = TestReg;
1560 Bit = Bit + *C;
1561 if (Bit >= TestRegSize)
1562 Bit = TestRegSize - 1;
1563 break;
1564 case TargetOpcode::G_LSHR:
1565 // (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x
1566 if ((Bit + *C) < TestRegSize) {
1567 NextReg = TestReg;
1568 Bit = Bit + *C;
1569 }
1570 break;
1571 case TargetOpcode::G_XOR:
1572 // We can walk through a G_XOR by inverting whether we use tbz/tbnz when
1573 // appropriate.
1574 //
1575 // e.g. If x' = xor x, c, and the b-th bit is set in c then
1576 //
1577 // tbz x', b -> tbnz x, b
1578 //
1579 // Because x' only has the b-th bit set if x does not.
1580 if ((*C >> Bit) & 1)
1581 Invert = !Invert;
1582 NextReg = TestReg;
1583 break;
1584 }
1585
1586 // Check if we found anything worth folding.
1587 if (!NextReg.isValid())
1588 return Reg;
1589 Reg = NextReg;
1590 }
1591
1592 return Reg;
1593}
1594
1595MachineInstr *AArch64InstructionSelector::emitTestBit(
1596 Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB,
1597 MachineIRBuilder &MIB) const {
1598 assert(TestReg.isValid());
1599 assert(ProduceNonFlagSettingCondBr &&
1600 "Cannot emit TB(N)Z with speculation tracking!");
1601 MachineRegisterInfo &MRI = *MIB.getMRI();
1602
1603 // Attempt to optimize the test bit by walking over instructions.
1604 TestReg = getTestBitReg(Reg: TestReg, Bit, Invert&: IsNegative, MRI);
1605 LLT Ty = MRI.getType(Reg: TestReg);
1606 unsigned Size = Ty.getSizeInBits();
1607 assert(!Ty.isVector() && "Expected a scalar!");
1608 assert(Bit < 64 && "Bit is too large!");
1609
1610 // When the test register is a 64-bit register, we have to narrow to make
1611 // TBNZW work.
1612 bool UseWReg = Bit < 32;
1613 unsigned NecessarySize = UseWReg ? 32 : 64;
1614 if (Size != NecessarySize)
1615 TestReg = moveScalarRegClass(
1616 Reg: TestReg, RC: UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass,
1617 MIB);
1618
1619 static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
1620 {AArch64::TBZW, AArch64::TBNZW}};
1621 unsigned Opc = OpcTable[UseWReg][IsNegative];
1622 auto TestBitMI =
1623 MIB.buildInstr(Opcode: Opc).addReg(RegNo: TestReg).addImm(Val: Bit).addMBB(MBB: DstMBB);
1624 constrainSelectedInstRegOperands(I&: *TestBitMI, TII, TRI, RBI);
1625 return &*TestBitMI;
1626}
1627
1628bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
1629 MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB,
1630 MachineIRBuilder &MIB) const {
1631 assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?");
1632 // Given something like this:
1633 //
1634 // %x = ...Something...
1635 // %one = G_CONSTANT i64 1
1636 // %zero = G_CONSTANT i64 0
1637 // %and = G_AND %x, %one
1638 // %cmp = G_ICMP intpred(ne), %and, %zero
1639 // %cmp_trunc = G_TRUNC %cmp
1640 // G_BRCOND %cmp_trunc, %bb.3
1641 //
1642 // We want to try and fold the AND into the G_BRCOND and produce either a
1643 // TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)).
1644 //
1645 // In this case, we'd get
1646 //
1647 // TBNZ %x %bb.3
1648 //
1649
1650 // Check if the AND has a constant on its RHS which we can use as a mask.
1651 // If it's a power of 2, then it's the same as checking a specific bit.
1652 // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)
1653 auto MaybeBit = getIConstantVRegValWithLookThrough(
1654 VReg: AndInst.getOperand(i: 2).getReg(), MRI: *MIB.getMRI());
1655 if (!MaybeBit)
1656 return false;
1657
1658 int32_t Bit = MaybeBit->Value.exactLogBase2();
1659 if (Bit < 0)
1660 return false;
1661
1662 Register TestReg = AndInst.getOperand(i: 1).getReg();
1663
1664 // Emit a TB(N)Z.
1665 emitTestBit(TestReg, Bit, IsNegative: Invert, DstMBB, MIB);
1666 return true;
1667}
1668
1669MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg,
1670 bool IsNegative,
1671 MachineBasicBlock *DestMBB,
1672 MachineIRBuilder &MIB) const {
1673 assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!");
1674 MachineRegisterInfo &MRI = *MIB.getMRI();
1675 assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() ==
1676 AArch64::GPRRegBankID &&
1677 "Expected GPRs only?");
1678 auto Ty = MRI.getType(Reg: CompareReg);
1679 unsigned Width = Ty.getSizeInBits();
1680 assert(!Ty.isVector() && "Expected scalar only?");
1681 assert(Width <= 64 && "Expected width to be at most 64?");
1682 static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
1683 {AArch64::CBNZW, AArch64::CBNZX}};
1684 unsigned Opc = OpcTable[IsNegative][Width == 64];
1685 auto BranchMI = MIB.buildInstr(Opc, DstOps: {}, SrcOps: {CompareReg}).addMBB(MBB: DestMBB);
1686 constrainSelectedInstRegOperands(I&: *BranchMI, TII, TRI, RBI);
1687 return &*BranchMI;
1688}
1689
1690bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
1691 MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const {
1692 assert(FCmp.getOpcode() == TargetOpcode::G_FCMP);
1693 assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1694 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
1695 // totally clean. Some of them require two branches to implement.
1696 auto Pred = (CmpInst::Predicate)FCmp.getOperand(i: 1).getPredicate();
1697 emitFPCompare(LHS: FCmp.getOperand(i: 2).getReg(), RHS: FCmp.getOperand(i: 3).getReg(), MIRBuilder&: MIB,
1698 Pred);
1699 AArch64CC::CondCode CC1, CC2;
1700 changeFCMPPredToAArch64CC(P: static_cast<CmpInst::Predicate>(Pred), CondCode&: CC1, CondCode2&: CC2);
1701 MachineBasicBlock *DestMBB = I.getOperand(i: 1).getMBB();
1702 MIB.buildInstr(Opc: AArch64::Bcc, DstOps: {}, SrcOps: {}).addImm(Val: CC1).addMBB(MBB: DestMBB);
1703 if (CC2 != AArch64CC::AL)
1704 MIB.buildInstr(Opc: AArch64::Bcc, DstOps: {}, SrcOps: {}).addImm(Val: CC2).addMBB(MBB: DestMBB);
1705 I.eraseFromParent();
1706 return true;
1707}
1708
1709bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
1710 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1711 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
1712 assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1713 // Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z.
1714 //
1715 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1716 // instructions will not be produced, as they are conditional branch
1717 // instructions that do not set flags.
1718 if (!ProduceNonFlagSettingCondBr)
1719 return false;
1720
1721 MachineRegisterInfo &MRI = *MIB.getMRI();
1722 MachineBasicBlock *DestMBB = I.getOperand(i: 1).getMBB();
1723 auto Pred =
1724 static_cast<CmpInst::Predicate>(ICmp.getOperand(i: 1).getPredicate());
1725 Register LHS = ICmp.getOperand(i: 2).getReg();
1726 Register RHS = ICmp.getOperand(i: 3).getReg();
1727
1728 // We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that.
1729 auto VRegAndVal = getIConstantVRegValWithLookThrough(VReg: RHS, MRI);
1730 MachineInstr *AndInst = getOpcodeDef(Opcode: TargetOpcode::G_AND, Reg: LHS, MRI);
1731
1732 // When we can emit a TB(N)Z, prefer that.
1733 //
1734 // Handle non-commutative condition codes first.
1735 // Note that we don't want to do this when we have a G_AND because it can
1736 // become a tst. The tst will make the test bit in the TB(N)Z redundant.
1737 if (VRegAndVal && !AndInst) {
1738 int64_t C = VRegAndVal->Value.getSExtValue();
1739
1740 // When we have a greater-than comparison, we can just test if the msb is
1741 // zero.
1742 if (C == -1 && Pred == CmpInst::ICMP_SGT) {
1743 uint64_t Bit = MRI.getType(Reg: LHS).getSizeInBits() - 1;
1744 emitTestBit(TestReg: LHS, Bit, /*IsNegative = */ false, DstMBB: DestMBB, MIB);
1745 I.eraseFromParent();
1746 return true;
1747 }
1748
1749 // When we have a less than comparison, we can just test if the msb is not
1750 // zero.
1751 if (C == 0 && Pred == CmpInst::ICMP_SLT) {
1752 uint64_t Bit = MRI.getType(Reg: LHS).getSizeInBits() - 1;
1753 emitTestBit(TestReg: LHS, Bit, /*IsNegative = */ true, DstMBB: DestMBB, MIB);
1754 I.eraseFromParent();
1755 return true;
1756 }
1757
1758 // Inversely, if we have a signed greater-than-or-equal comparison to zero,
1759 // we can test if the msb is zero.
1760 if (C == 0 && Pred == CmpInst::ICMP_SGE) {
1761 uint64_t Bit = MRI.getType(Reg: LHS).getSizeInBits() - 1;
1762 emitTestBit(TestReg: LHS, Bit, /*IsNegative = */ false, DstMBB: DestMBB, MIB);
1763 I.eraseFromParent();
1764 return true;
1765 }
1766 }
1767
1768 // Attempt to handle commutative condition codes. Right now, that's only
1769 // eq/ne.
1770 if (ICmpInst::isEquality(P: Pred)) {
1771 if (!VRegAndVal) {
1772 std::swap(a&: RHS, b&: LHS);
1773 VRegAndVal = getIConstantVRegValWithLookThrough(VReg: RHS, MRI);
1774 AndInst = getOpcodeDef(Opcode: TargetOpcode::G_AND, Reg: LHS, MRI);
1775 }
1776
1777 if (VRegAndVal && VRegAndVal->Value == 0) {
1778 // If there's a G_AND feeding into this branch, try to fold it away by
1779 // emitting a TB(N)Z instead.
1780 //
1781 // Note: If we have LT, then it *is* possible to fold, but it wouldn't be
1782 // beneficial. When we have an AND and LT, we need a TST/ANDS, so folding
1783 // would be redundant.
1784 if (AndInst &&
1785 tryOptAndIntoCompareBranch(
1786 AndInst&: *AndInst, /*Invert = */ Pred == CmpInst::ICMP_NE, DstMBB: DestMBB, MIB)) {
1787 I.eraseFromParent();
1788 return true;
1789 }
1790
1791 // Otherwise, try to emit a CB(N)Z instead.
1792 auto LHSTy = MRI.getType(Reg: LHS);
1793 if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
1794 emitCBZ(CompareReg: LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB);
1795 I.eraseFromParent();
1796 return true;
1797 }
1798 }
1799 }
1800
1801 return false;
1802}
1803
1804bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
1805 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1806 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
1807 assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1808 if (tryOptCompareBranchFedByICmp(I, ICmp, MIB))
1809 return true;
1810
1811 // Couldn't optimize. Emit a compare + a Bcc.
1812 MachineBasicBlock *DestMBB = I.getOperand(i: 1).getMBB();
1813 auto PredOp = ICmp.getOperand(i: 1);
1814 emitIntegerCompare(LHS&: ICmp.getOperand(i: 2), RHS&: ICmp.getOperand(i: 3), Predicate&: PredOp, MIRBuilder&: MIB);
1815 const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(
1816 P: static_cast<CmpInst::Predicate>(PredOp.getPredicate()));
1817 MIB.buildInstr(Opc: AArch64::Bcc, DstOps: {}, SrcOps: {}).addImm(Val: CC).addMBB(MBB: DestMBB);
1818 I.eraseFromParent();
1819 return true;
1820}
1821
1822bool AArch64InstructionSelector::selectCompareBranch(
1823 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) {
1824 Register CondReg = I.getOperand(i: 0).getReg();
1825 MachineInstr *CCMI = MRI.getVRegDef(Reg: CondReg);
1826 // Try to select the G_BRCOND using whatever is feeding the condition if
1827 // possible.
1828 unsigned CCMIOpc = CCMI->getOpcode();
1829 if (CCMIOpc == TargetOpcode::G_FCMP)
1830 return selectCompareBranchFedByFCmp(I, FCmp&: *CCMI, MIB);
1831 if (CCMIOpc == TargetOpcode::G_ICMP)
1832 return selectCompareBranchFedByICmp(I, ICmp&: *CCMI, MIB);
1833
1834 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1835 // instructions will not be produced, as they are conditional branch
1836 // instructions that do not set flags.
1837 if (ProduceNonFlagSettingCondBr) {
1838 emitTestBit(TestReg: CondReg, /*Bit = */ 0, /*IsNegative = */ true,
1839 DstMBB: I.getOperand(i: 1).getMBB(), MIB);
1840 I.eraseFromParent();
1841 return true;
1842 }
1843
1844 // Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead.
1845 auto TstMI =
1846 MIB.buildInstr(Opc: AArch64::ANDSWri, DstOps: {LLT::scalar(SizeInBits: 32)}, SrcOps: {CondReg}).addImm(Val: 1);
1847 constrainSelectedInstRegOperands(I&: *TstMI, TII, TRI, RBI);
1848 auto Bcc = MIB.buildInstr(Opcode: AArch64::Bcc)
1849 .addImm(Val: AArch64CC::NE)
1850 .addMBB(MBB: I.getOperand(i: 1).getMBB());
1851 I.eraseFromParent();
1852 return constrainSelectedInstRegOperands(I&: *Bcc, TII, TRI, RBI);
1853}
1854
1855/// Returns the element immediate value of a vector shift operand if found.
1856/// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR.
1857static std::optional<int64_t> getVectorShiftImm(Register Reg,
1858 MachineRegisterInfo &MRI) {
1859 assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand");
1860 MachineInstr *OpMI = MRI.getVRegDef(Reg);
1861 return getAArch64VectorSplatScalar(MI: *OpMI, MRI);
1862}
1863
1864/// Matches and returns the shift immediate value for a SHL instruction given
1865/// a shift operand.
1866static std::optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg,
1867 MachineRegisterInfo &MRI) {
1868 std::optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI);
1869 if (!ShiftImm)
1870 return std::nullopt;
1871 // Check the immediate is in range for a SHL.
1872 int64_t Imm = *ShiftImm;
1873 if (Imm < 0)
1874 return std::nullopt;
1875 switch (SrcTy.getElementType().getSizeInBits()) {
1876 default:
1877 LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift");
1878 return std::nullopt;
1879 case 8:
1880 if (Imm > 7)
1881 return std::nullopt;
1882 break;
1883 case 16:
1884 if (Imm > 15)
1885 return std::nullopt;
1886 break;
1887 case 32:
1888 if (Imm > 31)
1889 return std::nullopt;
1890 break;
1891 case 64:
1892 if (Imm > 63)
1893 return std::nullopt;
1894 break;
1895 }
1896 return Imm;
1897}
1898
1899bool AArch64InstructionSelector::selectVectorSHL(MachineInstr &I,
1900 MachineRegisterInfo &MRI) {
1901 assert(I.getOpcode() == TargetOpcode::G_SHL);
1902 Register DstReg = I.getOperand(i: 0).getReg();
1903 const LLT Ty = MRI.getType(Reg: DstReg);
1904 Register Src1Reg = I.getOperand(i: 1).getReg();
1905 Register Src2Reg = I.getOperand(i: 2).getReg();
1906
1907 if (!Ty.isVector())
1908 return false;
1909
1910 // Check if we have a vector of constants on RHS that we can select as the
1911 // immediate form.
1912 std::optional<int64_t> ImmVal = getVectorSHLImm(SrcTy: Ty, Reg: Src2Reg, MRI);
1913
1914 unsigned Opc = 0;
1915 if (Ty == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 64)) {
1916 Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
1917 } else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32)) {
1918 Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
1919 } else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 32)) {
1920 Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
1921 } else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 16)) {
1922 Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
1923 } else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 16)) {
1924 Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
1925 } else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarSizeInBits: 8)) {
1926 Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
1927 } else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 8)) {
1928 Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
1929 } else {
1930 LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
1931 return false;
1932 }
1933
1934 auto Shl = MIB.buildInstr(Opc, DstOps: {DstReg}, SrcOps: {Src1Reg});
1935 if (ImmVal)
1936 Shl.addImm(Val: *ImmVal);
1937 else
1938 Shl.addUse(RegNo: Src2Reg);
1939 constrainSelectedInstRegOperands(I&: *Shl, TII, TRI, RBI);
1940 I.eraseFromParent();
1941 return true;
1942}
1943
1944bool AArch64InstructionSelector::selectVectorAshrLshr(
1945 MachineInstr &I, MachineRegisterInfo &MRI) {
1946 assert(I.getOpcode() == TargetOpcode::G_ASHR ||
1947 I.getOpcode() == TargetOpcode::G_LSHR);
1948 Register DstReg = I.getOperand(i: 0).getReg();
1949 const LLT Ty = MRI.getType(Reg: DstReg);
1950 Register Src1Reg = I.getOperand(i: 1).getReg();
1951 Register Src2Reg = I.getOperand(i: 2).getReg();
1952
1953 if (!Ty.isVector())
1954 return false;
1955
1956 bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR;
1957
1958 // We expect the immediate case to be lowered in the PostLegalCombiner to
1959 // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents.
1960
1961 // There is not a shift right register instruction, but the shift left
1962 // register instruction takes a signed value, where negative numbers specify a
1963 // right shift.
1964
1965 unsigned Opc = 0;
1966 unsigned NegOpc = 0;
1967 const TargetRegisterClass *RC =
1968 getRegClassForTypeOnBank(Ty, RB: RBI.getRegBank(ID: AArch64::FPRRegBankID));
1969 if (Ty == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 64)) {
1970 Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
1971 NegOpc = AArch64::NEGv2i64;
1972 } else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32)) {
1973 Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
1974 NegOpc = AArch64::NEGv4i32;
1975 } else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 32)) {
1976 Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
1977 NegOpc = AArch64::NEGv2i32;
1978 } else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 16)) {
1979 Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
1980 NegOpc = AArch64::NEGv4i16;
1981 } else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 16)) {
1982 Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
1983 NegOpc = AArch64::NEGv8i16;
1984 } else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarSizeInBits: 8)) {
1985 Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
1986 NegOpc = AArch64::NEGv16i8;
1987 } else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 8)) {
1988 Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
1989 NegOpc = AArch64::NEGv8i8;
1990 } else {
1991 LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
1992 return false;
1993 }
1994
1995 auto Neg = MIB.buildInstr(Opc: NegOpc, DstOps: {RC}, SrcOps: {Src2Reg});
1996 constrainSelectedInstRegOperands(I&: *Neg, TII, TRI, RBI);
1997 auto SShl = MIB.buildInstr(Opc, DstOps: {DstReg}, SrcOps: {Src1Reg, Neg});
1998 constrainSelectedInstRegOperands(I&: *SShl, TII, TRI, RBI);
1999 I.eraseFromParent();
2000 return true;
2001}
2002
2003bool AArch64InstructionSelector::selectVaStartAAPCS(
2004 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
2005
2006 if (STI.isCallingConvWin64(CC: MF.getFunction().getCallingConv(),
2007 IsVarArg: MF.getFunction().isVarArg()))
2008 return false;
2009
2010 // The layout of the va_list struct is specified in the AArch64 Procedure Call
2011 // Standard, section 10.1.5.
2012
2013 const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
2014 const unsigned PtrSize = STI.isTargetILP32() ? 4 : 8;
2015 const auto *PtrRegClass =
2016 STI.isTargetILP32() ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
2017
2018 const MCInstrDesc &MCIDAddAddr =
2019 TII.get(Opcode: STI.isTargetILP32() ? AArch64::ADDWri : AArch64::ADDXri);
2020 const MCInstrDesc &MCIDStoreAddr =
2021 TII.get(Opcode: STI.isTargetILP32() ? AArch64::STRWui : AArch64::STRXui);
2022
2023 /*
2024 * typedef struct va_list {
2025 * void * stack; // next stack param
2026 * void * gr_top; // end of GP arg reg save area
2027 * void * vr_top; // end of FP/SIMD arg reg save area
2028 * int gr_offs; // offset from gr_top to next GP register arg
2029 * int vr_offs; // offset from vr_top to next FP/SIMD register arg
2030 * } va_list;
2031 */
2032 const auto VAList = I.getOperand(i: 0).getReg();
2033
2034 // Our current offset in bytes from the va_list struct (VAList).
2035 unsigned OffsetBytes = 0;
2036
2037 // Helper function to store (FrameIndex + Imm) to VAList at offset OffsetBytes
2038 // and increment OffsetBytes by PtrSize.
2039 const auto PushAddress = [&](const int FrameIndex, const int64_t Imm) {
2040 const Register Top = MRI.createVirtualRegister(RegClass: PtrRegClass);
2041 auto MIB = BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), MCID: MCIDAddAddr)
2042 .addDef(RegNo: Top)
2043 .addFrameIndex(Idx: FrameIndex)
2044 .addImm(Val: Imm)
2045 .addImm(Val: 0);
2046 constrainSelectedInstRegOperands(I&: *MIB, TII, TRI, RBI);
2047
2048 const auto *MMO = *I.memoperands_begin();
2049 MIB = BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), MCID: MCIDStoreAddr)
2050 .addUse(RegNo: Top)
2051 .addUse(RegNo: VAList)
2052 .addImm(Val: OffsetBytes / PtrSize)
2053 .addMemOperand(MMO: MF.getMachineMemOperand(
2054 PtrInfo: MMO->getPointerInfo().getWithOffset(O: OffsetBytes),
2055 F: MachineMemOperand::MOStore, Size: PtrSize, BaseAlignment: MMO->getBaseAlign()));
2056 constrainSelectedInstRegOperands(I&: *MIB, TII, TRI, RBI);
2057
2058 OffsetBytes += PtrSize;
2059 };
2060
2061 // void* stack at offset 0
2062 PushAddress(FuncInfo->getVarArgsStackIndex(), 0);
2063
2064 // void* gr_top at offset 8 (4 on ILP32)
2065 const unsigned GPRSize = FuncInfo->getVarArgsGPRSize();
2066 PushAddress(FuncInfo->getVarArgsGPRIndex(), GPRSize);
2067
2068 // void* vr_top at offset 16 (8 on ILP32)
2069 const unsigned FPRSize = FuncInfo->getVarArgsFPRSize();
2070 PushAddress(FuncInfo->getVarArgsFPRIndex(), FPRSize);
2071
2072 // Helper function to store a 4-byte integer constant to VAList at offset
2073 // OffsetBytes, and increment OffsetBytes by 4.
2074 const auto PushIntConstant = [&](const int32_t Value) {
2075 constexpr int IntSize = 4;
2076 const Register Temp = MRI.createVirtualRegister(RegClass: &AArch64::GPR32RegClass);
2077 auto MIB =
2078 BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: AArch64::MOVi32imm))
2079 .addDef(RegNo: Temp)
2080 .addImm(Val: Value);
2081 constrainSelectedInstRegOperands(I&: *MIB, TII, TRI, RBI);
2082
2083 const auto *MMO = *I.memoperands_begin();
2084 MIB = BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: AArch64::STRWui))
2085 .addUse(RegNo: Temp)
2086 .addUse(RegNo: VAList)
2087 .addImm(Val: OffsetBytes / IntSize)
2088 .addMemOperand(MMO: MF.getMachineMemOperand(
2089 PtrInfo: MMO->getPointerInfo().getWithOffset(O: OffsetBytes),
2090 F: MachineMemOperand::MOStore, Size: IntSize, BaseAlignment: MMO->getBaseAlign()));
2091 constrainSelectedInstRegOperands(I&: *MIB, TII, TRI, RBI);
2092 OffsetBytes += IntSize;
2093 };
2094
2095 // int gr_offs at offset 24 (12 on ILP32)
2096 PushIntConstant(-static_cast<int32_t>(GPRSize));
2097
2098 // int vr_offs at offset 28 (16 on ILP32)
2099 PushIntConstant(-static_cast<int32_t>(FPRSize));
2100
2101 assert(OffsetBytes == (STI.isTargetILP32() ? 20 : 32) && "Unexpected offset");
2102
2103 I.eraseFromParent();
2104 return true;
2105}
2106
2107bool AArch64InstructionSelector::selectVaStartDarwin(
2108 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
2109 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
2110 Register ListReg = I.getOperand(i: 0).getReg();
2111
2112 Register ArgsAddrReg = MRI.createVirtualRegister(RegClass: &AArch64::GPR64RegClass);
2113
2114 int FrameIdx = FuncInfo->getVarArgsStackIndex();
2115 if (MF.getSubtarget<AArch64Subtarget>().isCallingConvWin64(
2116 CC: MF.getFunction().getCallingConv(), IsVarArg: MF.getFunction().isVarArg())) {
2117 FrameIdx = FuncInfo->getVarArgsGPRSize() > 0
2118 ? FuncInfo->getVarArgsGPRIndex()
2119 : FuncInfo->getVarArgsStackIndex();
2120 }
2121
2122 auto MIB =
2123 BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: AArch64::ADDXri))
2124 .addDef(RegNo: ArgsAddrReg)
2125 .addFrameIndex(Idx: FrameIdx)
2126 .addImm(Val: 0)
2127 .addImm(Val: 0);
2128
2129 constrainSelectedInstRegOperands(I&: *MIB, TII, TRI, RBI);
2130
2131 MIB = BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: AArch64::STRXui))
2132 .addUse(RegNo: ArgsAddrReg)
2133 .addUse(RegNo: ListReg)
2134 .addImm(Val: 0)
2135 .addMemOperand(MMO: *I.memoperands_begin());
2136
2137 constrainSelectedInstRegOperands(I&: *MIB, TII, TRI, RBI);
2138 I.eraseFromParent();
2139 return true;
2140}
2141
2142void AArch64InstructionSelector::materializeLargeCMVal(
2143 MachineInstr &I, const Value *V, unsigned OpFlags) {
2144 MachineBasicBlock &MBB = *I.getParent();
2145 MachineFunction &MF = *MBB.getParent();
2146 MachineRegisterInfo &MRI = MF.getRegInfo();
2147
2148 auto MovZ = MIB.buildInstr(Opc: AArch64::MOVZXi, DstOps: {&AArch64::GPR64RegClass}, SrcOps: {});
2149 MovZ->addOperand(MF, Op: I.getOperand(i: 1));
2150 MovZ->getOperand(i: 1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
2151 AArch64II::MO_NC);
2152 MovZ->addOperand(MF, Op: MachineOperand::CreateImm(Val: 0));
2153 constrainSelectedInstRegOperands(I&: *MovZ, TII, TRI, RBI);
2154
2155 auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
2156 Register ForceDstReg) {
2157 Register DstReg = ForceDstReg
2158 ? ForceDstReg
2159 : MRI.createVirtualRegister(RegClass: &AArch64::GPR64RegClass);
2160 auto MovI = MIB.buildInstr(Opcode: AArch64::MOVKXi).addDef(RegNo: DstReg).addUse(RegNo: SrcReg);
2161 if (auto *GV = dyn_cast<GlobalValue>(Val: V)) {
2162 MovI->addOperand(MF, Op: MachineOperand::CreateGA(
2163 GV, Offset: MovZ->getOperand(i: 1).getOffset(), TargetFlags: Flags));
2164 } else {
2165 MovI->addOperand(
2166 MF, Op: MachineOperand::CreateBA(BA: cast<BlockAddress>(Val: V),
2167 Offset: MovZ->getOperand(i: 1).getOffset(), TargetFlags: Flags));
2168 }
2169 MovI->addOperand(MF, Op: MachineOperand::CreateImm(Val: Offset));
2170 constrainSelectedInstRegOperands(I&: *MovI, TII, TRI, RBI);
2171 return DstReg;
2172 };
2173 Register DstReg = BuildMovK(MovZ.getReg(Idx: 0),
2174 AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0);
2175 DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
2176 BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(i: 0).getReg());
2177}
2178
2179bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
2180 MachineBasicBlock &MBB = *I.getParent();
2181 MachineFunction &MF = *MBB.getParent();
2182 MachineRegisterInfo &MRI = MF.getRegInfo();
2183
2184 switch (I.getOpcode()) {
2185 case TargetOpcode::G_STORE: {
2186 bool Changed = contractCrossBankCopyIntoStore(I, MRI);
2187 MachineOperand &SrcOp = I.getOperand(i: 0);
2188 if (MRI.getType(Reg: SrcOp.getReg()).isPointer()) {
2189 // Allow matching with imported patterns for stores of pointers. Unlike
2190 // G_LOAD/G_PTR_ADD, we may not have selected all users. So, emit a copy
2191 // and constrain.
2192 auto Copy = MIB.buildCopy(Res: LLT::scalar(SizeInBits: 64), Op: SrcOp);
2193 Register NewSrc = Copy.getReg(Idx: 0);
2194 SrcOp.setReg(NewSrc);
2195 RBI.constrainGenericRegister(Reg: NewSrc, RC: AArch64::GPR64RegClass, MRI);
2196 Changed = true;
2197 }
2198 return Changed;
2199 }
2200 case TargetOpcode::G_PTR_ADD: {
2201 // If Checked Pointer Arithmetic (FEAT_CPA) is present, preserve the pointer
2202 // arithmetic semantics instead of falling back to regular arithmetic.
2203 const auto &TL = STI.getTargetLowering();
2204 if (TL->shouldPreservePtrArith(F: MF.getFunction(), PtrVT: EVT()))
2205 return false;
2206 return convertPtrAddToAdd(I, MRI);
2207 }
2208 case TargetOpcode::G_LOAD: {
2209 // For scalar loads of pointers, we try to convert the dest type from p0
2210 // to s64 so that our imported patterns can match. Like with the G_PTR_ADD
2211 // conversion, this should be ok because all users should have been
2212 // selected already, so the type doesn't matter for them.
2213 Register DstReg = I.getOperand(i: 0).getReg();
2214 const LLT DstTy = MRI.getType(Reg: DstReg);
2215 if (!DstTy.isPointer())
2216 return false;
2217 MRI.setType(VReg: DstReg, Ty: LLT::scalar(SizeInBits: 64));
2218 return true;
2219 }
2220 case AArch64::G_DUP: {
2221 // Convert the type from p0 to s64 to help selection.
2222 LLT DstTy = MRI.getType(Reg: I.getOperand(i: 0).getReg());
2223 if (!DstTy.isPointerVector())
2224 return false;
2225 auto NewSrc = MIB.buildCopy(Res: LLT::scalar(SizeInBits: 64), Op: I.getOperand(i: 1).getReg());
2226 MRI.setType(VReg: I.getOperand(i: 0).getReg(),
2227 Ty: DstTy.changeElementType(NewEltTy: LLT::scalar(SizeInBits: 64)));
2228 MRI.setRegClass(Reg: NewSrc.getReg(Idx: 0), RC: &AArch64::GPR64RegClass);
2229 I.getOperand(i: 1).setReg(NewSrc.getReg(Idx: 0));
2230 return true;
2231 }
2232 case AArch64::G_INSERT_VECTOR_ELT: {
2233 // Convert the type from p0 to s64 to help selection.
2234 LLT DstTy = MRI.getType(Reg: I.getOperand(i: 0).getReg());
2235 LLT SrcVecTy = MRI.getType(Reg: I.getOperand(i: 1).getReg());
2236 if (!SrcVecTy.isPointerVector())
2237 return false;
2238 auto NewSrc = MIB.buildCopy(Res: LLT::scalar(SizeInBits: 64), Op: I.getOperand(i: 2).getReg());
2239 MRI.setType(VReg: I.getOperand(i: 1).getReg(),
2240 Ty: DstTy.changeElementType(NewEltTy: LLT::scalar(SizeInBits: 64)));
2241 MRI.setType(VReg: I.getOperand(i: 0).getReg(),
2242 Ty: DstTy.changeElementType(NewEltTy: LLT::scalar(SizeInBits: 64)));
2243 MRI.setRegClass(Reg: NewSrc.getReg(Idx: 0), RC: &AArch64::GPR64RegClass);
2244 I.getOperand(i: 2).setReg(NewSrc.getReg(Idx: 0));
2245 return true;
2246 }
2247 case TargetOpcode::G_UITOFP:
2248 case TargetOpcode::G_SITOFP: {
2249 // If both source and destination regbanks are FPR, then convert the opcode
2250 // to G_SITOF so that the importer can select it to an fpr variant.
2251 // Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank
2252 // copy.
2253 Register SrcReg = I.getOperand(i: 1).getReg();
2254 LLT SrcTy = MRI.getType(Reg: SrcReg);
2255 LLT DstTy = MRI.getType(Reg: I.getOperand(i: 0).getReg());
2256 if (SrcTy.isVector() || SrcTy.getSizeInBits() != DstTy.getSizeInBits())
2257 return false;
2258
2259 if (RBI.getRegBank(Reg: SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) {
2260 if (I.getOpcode() == TargetOpcode::G_SITOFP)
2261 I.setDesc(TII.get(Opcode: AArch64::G_SITOF));
2262 else
2263 I.setDesc(TII.get(Opcode: AArch64::G_UITOF));
2264 return true;
2265 }
2266 return false;
2267 }
2268 default:
2269 return false;
2270 }
2271}
2272
2273/// This lowering tries to look for G_PTR_ADD instructions and then converts
2274/// them to a standard G_ADD with a COPY on the source.
2275///
2276/// The motivation behind this is to expose the add semantics to the imported
2277/// tablegen patterns. We shouldn't need to check for uses being loads/stores,
2278/// because the selector works bottom up, uses before defs. By the time we
2279/// end up trying to select a G_PTR_ADD, we should have already attempted to
2280/// fold this into addressing modes and were therefore unsuccessful.
2281bool AArch64InstructionSelector::convertPtrAddToAdd(
2282 MachineInstr &I, MachineRegisterInfo &MRI) {
2283 assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
2284 Register DstReg = I.getOperand(i: 0).getReg();
2285 Register AddOp1Reg = I.getOperand(i: 1).getReg();
2286 const LLT PtrTy = MRI.getType(Reg: DstReg);
2287 if (PtrTy.getAddressSpace() != 0)
2288 return false;
2289
2290 const LLT CastPtrTy =
2291 PtrTy.isVector() ? LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 64) : LLT::scalar(SizeInBits: 64);
2292 auto PtrToInt = MIB.buildPtrToInt(Dst: CastPtrTy, Src: AddOp1Reg);
2293 // Set regbanks on the registers.
2294 if (PtrTy.isVector())
2295 MRI.setRegBank(Reg: PtrToInt.getReg(Idx: 0), RegBank: RBI.getRegBank(ID: AArch64::FPRRegBankID));
2296 else
2297 MRI.setRegBank(Reg: PtrToInt.getReg(Idx: 0), RegBank: RBI.getRegBank(ID: AArch64::GPRRegBankID));
2298
2299 // Now turn the %dst(p0) = G_PTR_ADD %base, off into:
2300 // %dst(intty) = G_ADD %intbase, off
2301 I.setDesc(TII.get(Opcode: TargetOpcode::G_ADD));
2302 MRI.setType(VReg: DstReg, Ty: CastPtrTy);
2303 I.getOperand(i: 1).setReg(PtrToInt.getReg(Idx: 0));
2304 if (!select(I&: *PtrToInt)) {
2305 LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd");
2306 return false;
2307 }
2308
2309 // Also take the opportunity here to try to do some optimization.
2310 // Try to convert this into a G_SUB if the offset is a 0-x negate idiom.
2311 Register NegatedReg;
2312 if (!mi_match(R: I.getOperand(i: 2).getReg(), MRI, P: m_Neg(Src: m_Reg(R&: NegatedReg))))
2313 return true;
2314 I.getOperand(i: 2).setReg(NegatedReg);
2315 I.setDesc(TII.get(Opcode: TargetOpcode::G_SUB));
2316 return true;
2317}
2318
2319bool AArch64InstructionSelector::earlySelectSHL(MachineInstr &I,
2320 MachineRegisterInfo &MRI) {
2321 // We try to match the immediate variant of LSL, which is actually an alias
2322 // for a special case of UBFM. Otherwise, we fall back to the imported
2323 // selector which will match the register variant.
2324 assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op");
2325 const auto &MO = I.getOperand(i: 2);
2326 auto VRegAndVal = getIConstantVRegVal(VReg: MO.getReg(), MRI);
2327 if (!VRegAndVal)
2328 return false;
2329
2330 const LLT DstTy = MRI.getType(Reg: I.getOperand(i: 0).getReg());
2331 if (DstTy.isVector())
2332 return false;
2333 bool Is64Bit = DstTy.getSizeInBits() == 64;
2334 auto Imm1Fn = Is64Bit ? selectShiftA_64(Root: MO) : selectShiftA_32(Root: MO);
2335 auto Imm2Fn = Is64Bit ? selectShiftB_64(Root: MO) : selectShiftB_32(Root: MO);
2336
2337 if (!Imm1Fn || !Imm2Fn)
2338 return false;
2339
2340 auto NewI =
2341 MIB.buildInstr(Opc: Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
2342 DstOps: {I.getOperand(i: 0).getReg()}, SrcOps: {I.getOperand(i: 1).getReg()});
2343
2344 for (auto &RenderFn : *Imm1Fn)
2345 RenderFn(NewI);
2346 for (auto &RenderFn : *Imm2Fn)
2347 RenderFn(NewI);
2348
2349 I.eraseFromParent();
2350 return constrainSelectedInstRegOperands(I&: *NewI, TII, TRI, RBI);
2351}
2352
2353bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
2354 MachineInstr &I, MachineRegisterInfo &MRI) {
2355 assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE");
2356 // If we're storing a scalar, it doesn't matter what register bank that
2357 // scalar is on. All that matters is the size.
2358 //
2359 // So, if we see something like this (with a 32-bit scalar as an example):
2360 //
2361 // %x:gpr(s32) = ... something ...
2362 // %y:fpr(s32) = COPY %x:gpr(s32)
2363 // G_STORE %y:fpr(s32)
2364 //
2365 // We can fix this up into something like this:
2366 //
2367 // G_STORE %x:gpr(s32)
2368 //
2369 // And then continue the selection process normally.
2370 Register DefDstReg = getSrcRegIgnoringCopies(Reg: I.getOperand(i: 0).getReg(), MRI);
2371 if (!DefDstReg.isValid())
2372 return false;
2373 LLT DefDstTy = MRI.getType(Reg: DefDstReg);
2374 Register StoreSrcReg = I.getOperand(i: 0).getReg();
2375 LLT StoreSrcTy = MRI.getType(Reg: StoreSrcReg);
2376
2377 // If we get something strange like a physical register, then we shouldn't
2378 // go any further.
2379 if (!DefDstTy.isValid())
2380 return false;
2381
2382 // Are the source and dst types the same size?
2383 if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
2384 return false;
2385
2386 if (RBI.getRegBank(Reg: StoreSrcReg, MRI, TRI) ==
2387 RBI.getRegBank(Reg: DefDstReg, MRI, TRI))
2388 return false;
2389
2390 // We have a cross-bank copy, which is entering a store. Let's fold it.
2391 I.getOperand(i: 0).setReg(DefDstReg);
2392 return true;
2393}
2394
2395bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
2396 assert(I.getParent() && "Instruction should be in a basic block!");
2397 assert(I.getParent()->getParent() && "Instruction should be in a function!");
2398
2399 MachineBasicBlock &MBB = *I.getParent();
2400 MachineFunction &MF = *MBB.getParent();
2401 MachineRegisterInfo &MRI = MF.getRegInfo();
2402
2403 switch (I.getOpcode()) {
2404 case AArch64::G_DUP: {
2405 // Before selecting a DUP instruction, check if it is better selected as a
2406 // MOV or load from a constant pool.
2407 Register Src = I.getOperand(i: 1).getReg();
2408 auto ValAndVReg = getAnyConstantVRegValWithLookThrough(VReg: Src, MRI);
2409 if (!ValAndVReg)
2410 return false;
2411 LLVMContext &Ctx = MF.getFunction().getContext();
2412 Register Dst = I.getOperand(i: 0).getReg();
2413 auto *CV = ConstantDataVector::getSplat(
2414 NumElts: MRI.getType(Reg: Dst).getNumElements(),
2415 Elt: ConstantInt::get(
2416 Ty: Type::getIntNTy(C&: Ctx, N: MRI.getType(Reg: Dst).getScalarSizeInBits()),
2417 V: ValAndVReg->Value.trunc(width: MRI.getType(Reg: Dst).getScalarSizeInBits())));
2418 if (!emitConstantVector(Dst, CV, MIRBuilder&: MIB, MRI))
2419 return false;
2420 I.eraseFromParent();
2421 return true;
2422 }
2423 case TargetOpcode::G_SEXT:
2424 // Check for i64 sext(i32 vector_extract) prior to tablegen to select SMOV
2425 // over a normal extend.
2426 if (selectUSMovFromExtend(I, MRI))
2427 return true;
2428 return false;
2429 case TargetOpcode::G_BR:
2430 return false;
2431 case TargetOpcode::G_SHL:
2432 return earlySelectSHL(I, MRI);
2433 case TargetOpcode::G_CONSTANT: {
2434 bool IsZero = false;
2435 if (I.getOperand(i: 1).isCImm())
2436 IsZero = I.getOperand(i: 1).getCImm()->isZero();
2437 else if (I.getOperand(i: 1).isImm())
2438 IsZero = I.getOperand(i: 1).getImm() == 0;
2439
2440 if (!IsZero)
2441 return false;
2442
2443 Register DefReg = I.getOperand(i: 0).getReg();
2444 LLT Ty = MRI.getType(Reg: DefReg);
2445 if (Ty.getSizeInBits() == 64) {
2446 I.getOperand(i: 1).ChangeToRegister(Reg: AArch64::XZR, isDef: false);
2447 RBI.constrainGenericRegister(Reg: DefReg, RC: AArch64::GPR64RegClass, MRI);
2448 } else if (Ty.getSizeInBits() == 32) {
2449 I.getOperand(i: 1).ChangeToRegister(Reg: AArch64::WZR, isDef: false);
2450 RBI.constrainGenericRegister(Reg: DefReg, RC: AArch64::GPR32RegClass, MRI);
2451 } else
2452 return false;
2453
2454 I.setDesc(TII.get(Opcode: TargetOpcode::COPY));
2455 return true;
2456 }
2457
2458 case TargetOpcode::G_ADD: {
2459 // Check if this is being fed by a G_ICMP on either side.
2460 //
2461 // (cmp pred, x, y) + z
2462 //
2463 // In the above case, when the cmp is true, we increment z by 1. So, we can
2464 // fold the add into the cset for the cmp by using cinc.
2465 //
2466 // FIXME: This would probably be a lot nicer in PostLegalizerLowering.
2467 Register AddDst = I.getOperand(i: 0).getReg();
2468 Register AddLHS = I.getOperand(i: 1).getReg();
2469 Register AddRHS = I.getOperand(i: 2).getReg();
2470 // Only handle scalars.
2471 LLT Ty = MRI.getType(Reg: AddLHS);
2472 if (Ty.isVector())
2473 return false;
2474 // Since G_ICMP is modeled as ADDS/SUBS/ANDS, we can handle 32 bits or 64
2475 // bits.
2476 unsigned Size = Ty.getSizeInBits();
2477 if (Size != 32 && Size != 64)
2478 return false;
2479 auto MatchCmp = [&](Register Reg) -> MachineInstr * {
2480 if (!MRI.hasOneNonDBGUse(RegNo: Reg))
2481 return nullptr;
2482 // If the LHS of the add is 32 bits, then we want to fold a 32-bit
2483 // compare.
2484 if (Size == 32)
2485 return getOpcodeDef(Opcode: TargetOpcode::G_ICMP, Reg, MRI);
2486 // We model scalar compares using 32-bit destinations right now.
2487 // If it's a 64-bit compare, it'll have 64-bit sources.
2488 Register ZExt;
2489 if (!mi_match(R: Reg, MRI,
2490 P: m_OneNonDBGUse(SP: m_GZExt(Src: m_OneNonDBGUse(SP: m_Reg(R&: ZExt))))))
2491 return nullptr;
2492 auto *Cmp = getOpcodeDef(Opcode: TargetOpcode::G_ICMP, Reg: ZExt, MRI);
2493 if (!Cmp ||
2494 MRI.getType(Reg: Cmp->getOperand(i: 2).getReg()).getSizeInBits() != 64)
2495 return nullptr;
2496 return Cmp;
2497 };
2498 // Try to match
2499 // z + (cmp pred, x, y)
2500 MachineInstr *Cmp = MatchCmp(AddRHS);
2501 if (!Cmp) {
2502 // (cmp pred, x, y) + z
2503 std::swap(a&: AddLHS, b&: AddRHS);
2504 Cmp = MatchCmp(AddRHS);
2505 if (!Cmp)
2506 return false;
2507 }
2508 auto &PredOp = Cmp->getOperand(i: 1);
2509 auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());
2510 const AArch64CC::CondCode InvCC =
2511 changeICMPPredToAArch64CC(P: CmpInst::getInversePredicate(pred: Pred));
2512 MIB.setInstrAndDebugLoc(I);
2513 emitIntegerCompare(/*LHS=*/Cmp->getOperand(i: 2),
2514 /*RHS=*/Cmp->getOperand(i: 3), Predicate&: PredOp, MIRBuilder&: MIB);
2515 emitCSINC(/*Dst=*/AddDst, /*Src =*/Src1: AddLHS, /*Src2=*/AddLHS, Pred: InvCC, MIRBuilder&: MIB);
2516 I.eraseFromParent();
2517 return true;
2518 }
2519 case TargetOpcode::G_OR: {
2520 // Look for operations that take the lower `Width=Size-ShiftImm` bits of
2521 // `ShiftSrc` and insert them into the upper `Width` bits of `MaskSrc` via
2522 // shifting and masking that we can replace with a BFI (encoded as a BFM).
2523 Register Dst = I.getOperand(i: 0).getReg();
2524 LLT Ty = MRI.getType(Reg: Dst);
2525
2526 if (!Ty.isScalar())
2527 return false;
2528
2529 unsigned Size = Ty.getSizeInBits();
2530 if (Size != 32 && Size != 64)
2531 return false;
2532
2533 Register ShiftSrc;
2534 int64_t ShiftImm;
2535 Register MaskSrc;
2536 int64_t MaskImm;
2537 if (!mi_match(
2538 R: Dst, MRI,
2539 P: m_GOr(L: m_OneNonDBGUse(SP: m_GShl(L: m_Reg(R&: ShiftSrc), R: m_ICst(Cst&: ShiftImm))),
2540 R: m_OneNonDBGUse(SP: m_GAnd(L: m_Reg(R&: MaskSrc), R: m_ICst(Cst&: MaskImm))))))
2541 return false;
2542
2543 if (ShiftImm > Size || ((1ULL << ShiftImm) - 1ULL) != uint64_t(MaskImm))
2544 return false;
2545
2546 int64_t Immr = Size - ShiftImm;
2547 int64_t Imms = Size - ShiftImm - 1;
2548 unsigned Opc = Size == 32 ? AArch64::BFMWri : AArch64::BFMXri;
2549 emitInstr(Opcode: Opc, DstOps: {Dst}, SrcOps: {MaskSrc, ShiftSrc, Immr, Imms}, MIRBuilder&: MIB);
2550 I.eraseFromParent();
2551 return true;
2552 }
2553 case TargetOpcode::G_FENCE: {
2554 if (I.getOperand(i: 1).getImm() == 0)
2555 BuildMI(BB&: MBB, I, MIMD: MIMetadata(I), MCID: TII.get(Opcode: TargetOpcode::MEMBARRIER));
2556 else
2557 BuildMI(BB&: MBB, I, MIMD: MIMetadata(I), MCID: TII.get(Opcode: AArch64::DMB))
2558 .addImm(Val: I.getOperand(i: 0).getImm() == 4 ? 0x9 : 0xb);
2559 I.eraseFromParent();
2560 return true;
2561 }
2562 default:
2563 return false;
2564 }
2565}
2566
2567bool AArch64InstructionSelector::select(MachineInstr &I) {
2568 assert(I.getParent() && "Instruction should be in a basic block!");
2569 assert(I.getParent()->getParent() && "Instruction should be in a function!");
2570
2571 MachineBasicBlock &MBB = *I.getParent();
2572 MachineFunction &MF = *MBB.getParent();
2573 MachineRegisterInfo &MRI = MF.getRegInfo();
2574
2575 const AArch64Subtarget *Subtarget = &MF.getSubtarget<AArch64Subtarget>();
2576 if (Subtarget->requiresStrictAlign()) {
2577 // We don't support this feature yet.
2578 LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n");
2579 return false;
2580 }
2581
2582 MIB.setInstrAndDebugLoc(I);
2583
2584 unsigned Opcode = I.getOpcode();
2585 // G_PHI requires same handling as PHI
2586 if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
2587 // Certain non-generic instructions also need some special handling.
2588
2589 if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
2590 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2591
2592 if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
2593 const Register DefReg = I.getOperand(i: 0).getReg();
2594 const LLT DefTy = MRI.getType(Reg: DefReg);
2595
2596 const RegClassOrRegBank &RegClassOrBank =
2597 MRI.getRegClassOrRegBank(Reg: DefReg);
2598
2599 const TargetRegisterClass *DefRC =
2600 dyn_cast<const TargetRegisterClass *>(Val: RegClassOrBank);
2601 if (!DefRC) {
2602 if (!DefTy.isValid()) {
2603 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
2604 return false;
2605 }
2606 const RegisterBank &RB = *cast<const RegisterBank *>(Val: RegClassOrBank);
2607 DefRC = getRegClassForTypeOnBank(Ty: DefTy, RB);
2608 if (!DefRC) {
2609 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
2610 return false;
2611 }
2612 }
2613
2614 I.setDesc(TII.get(Opcode: TargetOpcode::PHI));
2615
2616 return RBI.constrainGenericRegister(Reg: DefReg, RC: *DefRC, MRI);
2617 }
2618
2619 if (I.isCopy())
2620 return selectCopy(I, TII, MRI, TRI, RBI);
2621
2622 if (I.isDebugInstr())
2623 return selectDebugInstr(I, MRI, RBI);
2624
2625 return true;
2626 }
2627
2628
2629 if (I.getNumOperands() != I.getNumExplicitOperands()) {
2630 LLVM_DEBUG(
2631 dbgs() << "Generic instruction has unexpected implicit operands\n");
2632 return false;
2633 }
2634
2635 // Try to do some lowering before we start instruction selecting. These
2636 // lowerings are purely transformations on the input G_MIR and so selection
2637 // must continue after any modification of the instruction.
2638 if (preISelLower(I)) {
2639 Opcode = I.getOpcode(); // The opcode may have been modified, refresh it.
2640 }
2641
2642 // There may be patterns where the importer can't deal with them optimally,
2643 // but does select it to a suboptimal sequence so our custom C++ selection
2644 // code later never has a chance to work on it. Therefore, we have an early
2645 // selection attempt here to give priority to certain selection routines
2646 // over the imported ones.
2647 if (earlySelect(I))
2648 return true;
2649
2650 if (selectImpl(I, CoverageInfo&: *CoverageInfo))
2651 return true;
2652
2653 LLT Ty =
2654 I.getOperand(i: 0).isReg() ? MRI.getType(Reg: I.getOperand(i: 0).getReg()) : LLT{};
2655
2656 switch (Opcode) {
2657 case TargetOpcode::G_SBFX:
2658 case TargetOpcode::G_UBFX: {
2659 static const unsigned OpcTable[2][2] = {
2660 {AArch64::UBFMWri, AArch64::UBFMXri},
2661 {AArch64::SBFMWri, AArch64::SBFMXri}};
2662 bool IsSigned = Opcode == TargetOpcode::G_SBFX;
2663 unsigned Size = Ty.getSizeInBits();
2664 unsigned Opc = OpcTable[IsSigned][Size == 64];
2665 auto Cst1 =
2666 getIConstantVRegValWithLookThrough(VReg: I.getOperand(i: 2).getReg(), MRI);
2667 assert(Cst1 && "Should have gotten a constant for src 1?");
2668 auto Cst2 =
2669 getIConstantVRegValWithLookThrough(VReg: I.getOperand(i: 3).getReg(), MRI);
2670 assert(Cst2 && "Should have gotten a constant for src 2?");
2671 auto LSB = Cst1->Value.getZExtValue();
2672 auto Width = Cst2->Value.getZExtValue();
2673 auto BitfieldInst =
2674 MIB.buildInstr(Opc, DstOps: {I.getOperand(i: 0)}, SrcOps: {I.getOperand(i: 1)})
2675 .addImm(Val: LSB)
2676 .addImm(Val: LSB + Width - 1);
2677 I.eraseFromParent();
2678 return constrainSelectedInstRegOperands(I&: *BitfieldInst, TII, TRI, RBI);
2679 }
2680 case TargetOpcode::G_BRCOND:
2681 return selectCompareBranch(I, MF, MRI);
2682
2683 case TargetOpcode::G_BRINDIRECT: {
2684 const Function &Fn = MF.getFunction();
2685 if (std::optional<uint16_t> BADisc =
2686 STI.getPtrAuthBlockAddressDiscriminatorIfEnabled(ParentFn: Fn)) {
2687 auto MI = MIB.buildInstr(Opc: AArch64::BRA, DstOps: {}, SrcOps: {I.getOperand(i: 0).getReg()});
2688 MI.addImm(Val: AArch64PACKey::IA);
2689 MI.addImm(Val: *BADisc);
2690 MI.addReg(/*AddrDisc=*/RegNo: AArch64::XZR);
2691 I.eraseFromParent();
2692 return constrainSelectedInstRegOperands(I&: *MI, TII, TRI, RBI);
2693 }
2694 I.setDesc(TII.get(Opcode: AArch64::BR));
2695 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2696 }
2697
2698 case TargetOpcode::G_BRJT:
2699 return selectBrJT(I, MRI);
2700
2701 case AArch64::G_ADD_LOW: {
2702 // This op may have been separated from it's ADRP companion by the localizer
2703 // or some other code motion pass. Given that many CPUs will try to
2704 // macro fuse these operations anyway, select this into a MOVaddr pseudo
2705 // which will later be expanded into an ADRP+ADD pair after scheduling.
2706 MachineInstr *BaseMI = MRI.getVRegDef(Reg: I.getOperand(i: 1).getReg());
2707 if (BaseMI->getOpcode() != AArch64::ADRP) {
2708 I.setDesc(TII.get(Opcode: AArch64::ADDXri));
2709 I.addOperand(Op: MachineOperand::CreateImm(Val: 0));
2710 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2711 }
2712 assert(TM.getCodeModel() == CodeModel::Small &&
2713 "Expected small code model");
2714 auto Op1 = BaseMI->getOperand(i: 1);
2715 auto Op2 = I.getOperand(i: 2);
2716 auto MovAddr = MIB.buildInstr(Opc: AArch64::MOVaddr, DstOps: {I.getOperand(i: 0)}, SrcOps: {})
2717 .addGlobalAddress(GV: Op1.getGlobal(), Offset: Op1.getOffset(),
2718 TargetFlags: Op1.getTargetFlags())
2719 .addGlobalAddress(GV: Op2.getGlobal(), Offset: Op2.getOffset(),
2720 TargetFlags: Op2.getTargetFlags());
2721 I.eraseFromParent();
2722 return constrainSelectedInstRegOperands(I&: *MovAddr, TII, TRI, RBI);
2723 }
2724
2725 case TargetOpcode::G_FCONSTANT:
2726 case TargetOpcode::G_CONSTANT: {
2727 const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
2728
2729 const LLT s8 = LLT::scalar(SizeInBits: 8);
2730 const LLT s16 = LLT::scalar(SizeInBits: 16);
2731 const LLT s32 = LLT::scalar(SizeInBits: 32);
2732 const LLT s64 = LLT::scalar(SizeInBits: 64);
2733 const LLT s128 = LLT::scalar(SizeInBits: 128);
2734 const LLT p0 = LLT::pointer(AddressSpace: 0, SizeInBits: 64);
2735
2736 const Register DefReg = I.getOperand(i: 0).getReg();
2737 const LLT DefTy = MRI.getType(Reg: DefReg);
2738 const unsigned DefSize = DefTy.getSizeInBits();
2739 const RegisterBank &RB = *RBI.getRegBank(Reg: DefReg, MRI, TRI);
2740
2741 // FIXME: Redundant check, but even less readable when factored out.
2742 if (isFP) {
2743 if (Ty != s16 && Ty != s32 && Ty != s64 && Ty != s128) {
2744 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
2745 << " constant, expected: " << s16 << " or " << s32
2746 << " or " << s64 << " or " << s128 << '\n');
2747 return false;
2748 }
2749
2750 if (RB.getID() != AArch64::FPRRegBankID) {
2751 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
2752 << " constant on bank: " << RB
2753 << ", expected: FPR\n");
2754 return false;
2755 }
2756
2757 // The case when we have 0.0 is covered by tablegen. Reject it here so we
2758 // can be sure tablegen works correctly and isn't rescued by this code.
2759 // 0.0 is not covered by tablegen for FP128. So we will handle this
2760 // scenario in the code here.
2761 if (DefSize != 128 && I.getOperand(i: 1).getFPImm()->isExactlyValue(V: 0.0))
2762 return false;
2763 } else {
2764 // s32 and s64 are covered by tablegen.
2765 if (Ty != p0 && Ty != s8 && Ty != s16) {
2766 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
2767 << " constant, expected: " << s32 << ", " << s64
2768 << ", or " << p0 << '\n');
2769 return false;
2770 }
2771
2772 if (RB.getID() != AArch64::GPRRegBankID) {
2773 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
2774 << " constant on bank: " << RB
2775 << ", expected: GPR\n");
2776 return false;
2777 }
2778 }
2779
2780 if (isFP) {
2781 const TargetRegisterClass &FPRRC = *getRegClassForTypeOnBank(Ty: DefTy, RB);
2782 // For 16, 64, and 128b values, emit a constant pool load.
2783 switch (DefSize) {
2784 default:
2785 llvm_unreachable("Unexpected destination size for G_FCONSTANT?");
2786 case 32:
2787 case 64: {
2788 bool OptForSize = shouldOptForSize(MF: &MF);
2789 const auto &TLI = MF.getSubtarget().getTargetLowering();
2790 // If TLI says that this fpimm is illegal, then we'll expand to a
2791 // constant pool load.
2792 if (TLI->isFPImmLegal(I.getOperand(i: 1).getFPImm()->getValueAPF(),
2793 EVT::getFloatingPointVT(BitWidth: DefSize), ForCodeSize: OptForSize))
2794 break;
2795 [[fallthrough]];
2796 }
2797 case 16:
2798 case 128: {
2799 auto *FPImm = I.getOperand(i: 1).getFPImm();
2800 auto *LoadMI = emitLoadFromConstantPool(CPVal: FPImm, MIRBuilder&: MIB);
2801 if (!LoadMI) {
2802 LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n");
2803 return false;
2804 }
2805 MIB.buildCopy(Res: {DefReg}, Op: {LoadMI->getOperand(i: 0).getReg()});
2806 I.eraseFromParent();
2807 return RBI.constrainGenericRegister(Reg: DefReg, RC: FPRRC, MRI);
2808 }
2809 }
2810
2811 assert((DefSize == 32 || DefSize == 64) && "Unexpected const def size");
2812 // Either emit a FMOV, or emit a copy to emit a normal mov.
2813 const Register DefGPRReg = MRI.createVirtualRegister(
2814 RegClass: DefSize == 32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2815 MachineOperand &RegOp = I.getOperand(i: 0);
2816 RegOp.setReg(DefGPRReg);
2817 MIB.setInsertPt(MBB&: MIB.getMBB(), II: std::next(x: I.getIterator()));
2818 MIB.buildCopy(Res: {DefReg}, Op: {DefGPRReg});
2819
2820 if (!RBI.constrainGenericRegister(Reg: DefReg, RC: FPRRC, MRI)) {
2821 LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
2822 return false;
2823 }
2824
2825 MachineOperand &ImmOp = I.getOperand(i: 1);
2826 // FIXME: Is going through int64_t always correct?
2827 ImmOp.ChangeToImmediate(
2828 ImmVal: ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
2829 } else if (I.getOperand(i: 1).isCImm()) {
2830 uint64_t Val = I.getOperand(i: 1).getCImm()->getZExtValue();
2831 I.getOperand(i: 1).ChangeToImmediate(ImmVal: Val);
2832 } else if (I.getOperand(i: 1).isImm()) {
2833 uint64_t Val = I.getOperand(i: 1).getImm();
2834 I.getOperand(i: 1).ChangeToImmediate(ImmVal: Val);
2835 }
2836
2837 const unsigned MovOpc =
2838 DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
2839 I.setDesc(TII.get(Opcode: MovOpc));
2840 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2841 return true;
2842 }
2843 case TargetOpcode::G_EXTRACT: {
2844 Register DstReg = I.getOperand(i: 0).getReg();
2845 Register SrcReg = I.getOperand(i: 1).getReg();
2846 LLT SrcTy = MRI.getType(Reg: SrcReg);
2847 LLT DstTy = MRI.getType(Reg: DstReg);
2848 (void)DstTy;
2849 unsigned SrcSize = SrcTy.getSizeInBits();
2850
2851 if (SrcTy.getSizeInBits() > 64) {
2852 // This should be an extract of an s128, which is like a vector extract.
2853 if (SrcTy.getSizeInBits() != 128)
2854 return false;
2855 // Only support extracting 64 bits from an s128 at the moment.
2856 if (DstTy.getSizeInBits() != 64)
2857 return false;
2858
2859 unsigned Offset = I.getOperand(i: 2).getImm();
2860 if (Offset % 64 != 0)
2861 return false;
2862
2863 // Check we have the right regbank always.
2864 const RegisterBank &SrcRB = *RBI.getRegBank(Reg: SrcReg, MRI, TRI);
2865 const RegisterBank &DstRB = *RBI.getRegBank(Reg: DstReg, MRI, TRI);
2866 assert(SrcRB.getID() == DstRB.getID() && "Wrong extract regbank!");
2867
2868 if (SrcRB.getID() == AArch64::GPRRegBankID) {
2869 auto NewI =
2870 MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {DstReg}, SrcOps: {})
2871 .addUse(RegNo: SrcReg, Flags: 0,
2872 SubReg: Offset == 0 ? AArch64::sube64 : AArch64::subo64);
2873 constrainOperandRegClass(MF, TRI, MRI, TII, RBI, InsertPt&: *NewI,
2874 RegClass: AArch64::GPR64RegClass, RegMO&: NewI->getOperand(i: 0));
2875 I.eraseFromParent();
2876 return true;
2877 }
2878
2879 // Emit the same code as a vector extract.
2880 // Offset must be a multiple of 64.
2881 unsigned LaneIdx = Offset / 64;
2882 MachineInstr *Extract = emitExtractVectorElt(
2883 DstReg, DstRB, ScalarTy: LLT::scalar(SizeInBits: 64), VecReg: SrcReg, LaneIdx, MIRBuilder&: MIB);
2884 if (!Extract)
2885 return false;
2886 I.eraseFromParent();
2887 return true;
2888 }
2889
2890 I.setDesc(TII.get(Opcode: SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
2891 MachineInstrBuilder(MF, I).addImm(Val: I.getOperand(i: 2).getImm() +
2892 Ty.getSizeInBits() - 1);
2893
2894 if (SrcSize < 64) {
2895 assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
2896 "unexpected G_EXTRACT types");
2897 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2898 }
2899
2900 DstReg = MRI.createGenericVirtualRegister(Ty: LLT::scalar(SizeInBits: 64));
2901 MIB.setInsertPt(MBB&: MIB.getMBB(), II: std::next(x: I.getIterator()));
2902 MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {I.getOperand(i: 0).getReg()}, SrcOps: {})
2903 .addReg(RegNo: DstReg, flags: 0, SubReg: AArch64::sub_32);
2904 RBI.constrainGenericRegister(Reg: I.getOperand(i: 0).getReg(),
2905 RC: AArch64::GPR32RegClass, MRI);
2906 I.getOperand(i: 0).setReg(DstReg);
2907
2908 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2909 }
2910
2911 case TargetOpcode::G_INSERT: {
2912 LLT SrcTy = MRI.getType(Reg: I.getOperand(i: 2).getReg());
2913 LLT DstTy = MRI.getType(Reg: I.getOperand(i: 0).getReg());
2914 unsigned DstSize = DstTy.getSizeInBits();
2915 // Larger inserts are vectors, same-size ones should be something else by
2916 // now (split up or turned into COPYs).
2917 if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
2918 return false;
2919
2920 I.setDesc(TII.get(Opcode: DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
2921 unsigned LSB = I.getOperand(i: 3).getImm();
2922 unsigned Width = MRI.getType(Reg: I.getOperand(i: 2).getReg()).getSizeInBits();
2923 I.getOperand(i: 3).setImm((DstSize - LSB) % DstSize);
2924 MachineInstrBuilder(MF, I).addImm(Val: Width - 1);
2925
2926 if (DstSize < 64) {
2927 assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
2928 "unexpected G_INSERT types");
2929 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2930 }
2931
2932 Register SrcReg = MRI.createGenericVirtualRegister(Ty: LLT::scalar(SizeInBits: 64));
2933 BuildMI(BB&: MBB, I: I.getIterator(), MIMD: I.getDebugLoc(),
2934 MCID: TII.get(Opcode: AArch64::SUBREG_TO_REG))
2935 .addDef(RegNo: SrcReg)
2936 .addImm(Val: 0)
2937 .addUse(RegNo: I.getOperand(i: 2).getReg())
2938 .addImm(Val: AArch64::sub_32);
2939 RBI.constrainGenericRegister(Reg: I.getOperand(i: 2).getReg(),
2940 RC: AArch64::GPR32RegClass, MRI);
2941 I.getOperand(i: 2).setReg(SrcReg);
2942
2943 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2944 }
2945 case TargetOpcode::G_FRAME_INDEX: {
2946 // allocas and G_FRAME_INDEX are only supported in addrspace(0).
2947 if (Ty != LLT::pointer(AddressSpace: 0, SizeInBits: 64)) {
2948 LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty
2949 << ", expected: " << LLT::pointer(0, 64) << '\n');
2950 return false;
2951 }
2952 I.setDesc(TII.get(Opcode: AArch64::ADDXri));
2953
2954 // MOs for a #0 shifted immediate.
2955 I.addOperand(Op: MachineOperand::CreateImm(Val: 0));
2956 I.addOperand(Op: MachineOperand::CreateImm(Val: 0));
2957
2958 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2959 }
2960
2961 case TargetOpcode::G_GLOBAL_VALUE: {
2962 const GlobalValue *GV = nullptr;
2963 unsigned OpFlags;
2964 if (I.getOperand(i: 1).isSymbol()) {
2965 OpFlags = I.getOperand(i: 1).getTargetFlags();
2966 // Currently only used by "RtLibUseGOT".
2967 assert(OpFlags == AArch64II::MO_GOT);
2968 } else {
2969 GV = I.getOperand(i: 1).getGlobal();
2970 if (GV->isThreadLocal()) {
2971 // We don't support instructions with emulated TLS variables yet
2972 if (TM.useEmulatedTLS())
2973 return false;
2974 return selectTLSGlobalValue(I, MRI);
2975 }
2976 OpFlags = STI.ClassifyGlobalReference(GV, TM);
2977 }
2978
2979 if (OpFlags & AArch64II::MO_GOT) {
2980 I.setDesc(TII.get(Opcode: MF.getInfo<AArch64FunctionInfo>()->hasELFSignedGOT()
2981 ? AArch64::LOADgotAUTH
2982 : AArch64::LOADgot));
2983 I.getOperand(i: 1).setTargetFlags(OpFlags);
2984 } else if (TM.getCodeModel() == CodeModel::Large &&
2985 !TM.isPositionIndependent()) {
2986 // Materialize the global using movz/movk instructions.
2987 materializeLargeCMVal(I, V: GV, OpFlags);
2988 I.eraseFromParent();
2989 return true;
2990 } else if (TM.getCodeModel() == CodeModel::Tiny) {
2991 I.setDesc(TII.get(Opcode: AArch64::ADR));
2992 I.getOperand(i: 1).setTargetFlags(OpFlags);
2993 } else {
2994 I.setDesc(TII.get(Opcode: AArch64::MOVaddr));
2995 I.getOperand(i: 1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
2996 MachineInstrBuilder MIB(MF, I);
2997 MIB.addGlobalAddress(GV, Offset: I.getOperand(i: 1).getOffset(),
2998 TargetFlags: OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
2999 }
3000 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3001 }
3002
3003 case TargetOpcode::G_PTRAUTH_GLOBAL_VALUE:
3004 return selectPtrAuthGlobalValue(I, MRI);
3005
3006 case TargetOpcode::G_ZEXTLOAD:
3007 case TargetOpcode::G_LOAD:
3008 case TargetOpcode::G_STORE: {
3009 GLoadStore &LdSt = cast<GLoadStore>(Val&: I);
3010 bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
3011 LLT PtrTy = MRI.getType(Reg: LdSt.getPointerReg());
3012
3013 // Can only handle AddressSpace 0, 64-bit pointers.
3014 if (PtrTy != LLT::pointer(AddressSpace: 0, SizeInBits: 64)) {
3015 return false;
3016 }
3017
3018 uint64_t MemSizeInBytes = LdSt.getMemSize().getValue();
3019 unsigned MemSizeInBits = LdSt.getMemSizeInBits().getValue();
3020 AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering();
3021
3022 // Need special instructions for atomics that affect ordering.
3023 if (Order != AtomicOrdering::NotAtomic &&
3024 Order != AtomicOrdering::Unordered &&
3025 Order != AtomicOrdering::Monotonic) {
3026 assert(!isa<GZExtLoad>(LdSt));
3027 assert(MemSizeInBytes <= 8 &&
3028 "128-bit atomics should already be custom-legalized");
3029
3030 if (isa<GLoad>(Val: LdSt)) {
3031 static constexpr unsigned LDAPROpcodes[] = {
3032 AArch64::LDAPRB, AArch64::LDAPRH, AArch64::LDAPRW, AArch64::LDAPRX};
3033 static constexpr unsigned LDAROpcodes[] = {
3034 AArch64::LDARB, AArch64::LDARH, AArch64::LDARW, AArch64::LDARX};
3035 ArrayRef<unsigned> Opcodes =
3036 STI.hasRCPC() && Order != AtomicOrdering::SequentiallyConsistent
3037 ? LDAPROpcodes
3038 : LDAROpcodes;
3039 I.setDesc(TII.get(Opcode: Opcodes[Log2_32(Value: MemSizeInBytes)]));
3040 } else {
3041 static constexpr unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH,
3042 AArch64::STLRW, AArch64::STLRX};
3043 Register ValReg = LdSt.getReg(Idx: 0);
3044 if (MRI.getType(Reg: ValReg).getSizeInBits() == 64 && MemSizeInBits != 64) {
3045 // Emit a subreg copy of 32 bits.
3046 Register NewVal = MRI.createVirtualRegister(RegClass: &AArch64::GPR32RegClass);
3047 MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {NewVal}, SrcOps: {})
3048 .addReg(RegNo: I.getOperand(i: 0).getReg(), flags: 0, SubReg: AArch64::sub_32);
3049 I.getOperand(i: 0).setReg(NewVal);
3050 }
3051 I.setDesc(TII.get(Opcode: Opcodes[Log2_32(Value: MemSizeInBytes)]));
3052 }
3053 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3054 return true;
3055 }
3056
3057#ifndef NDEBUG
3058 const Register PtrReg = LdSt.getPointerReg();
3059 const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
3060 // Check that the pointer register is valid.
3061 assert(PtrRB.getID() == AArch64::GPRRegBankID &&
3062 "Load/Store pointer operand isn't a GPR");
3063 assert(MRI.getType(PtrReg).isPointer() &&
3064 "Load/Store pointer operand isn't a pointer");
3065#endif
3066
3067 const Register ValReg = LdSt.getReg(Idx: 0);
3068 const RegisterBank &RB = *RBI.getRegBank(Reg: ValReg, MRI, TRI);
3069 LLT ValTy = MRI.getType(Reg: ValReg);
3070
3071 // The code below doesn't support truncating stores, so we need to split it
3072 // again.
3073 if (isa<GStore>(Val: LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
3074 unsigned SubReg;
3075 LLT MemTy = LdSt.getMMO().getMemoryType();
3076 auto *RC = getRegClassForTypeOnBank(Ty: MemTy, RB);
3077 if (!getSubRegForClass(RC, TRI, SubReg))
3078 return false;
3079
3080 // Generate a subreg copy.
3081 auto Copy = MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {MemTy}, SrcOps: {})
3082 .addReg(RegNo: ValReg, flags: 0, SubReg)
3083 .getReg(Idx: 0);
3084 RBI.constrainGenericRegister(Reg: Copy, RC: *RC, MRI);
3085 LdSt.getOperand(i: 0).setReg(Copy);
3086 } else if (isa<GLoad>(Val: LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
3087 // If this is an any-extending load from the FPR bank, split it into a regular
3088 // load + extend.
3089 if (RB.getID() == AArch64::FPRRegBankID) {
3090 unsigned SubReg;
3091 LLT MemTy = LdSt.getMMO().getMemoryType();
3092 auto *RC = getRegClassForTypeOnBank(Ty: MemTy, RB);
3093 if (!getSubRegForClass(RC, TRI, SubReg))
3094 return false;
3095 Register OldDst = LdSt.getReg(Idx: 0);
3096 Register NewDst =
3097 MRI.createGenericVirtualRegister(Ty: LdSt.getMMO().getMemoryType());
3098 LdSt.getOperand(i: 0).setReg(NewDst);
3099 MRI.setRegBank(Reg: NewDst, RegBank: RB);
3100 // Generate a SUBREG_TO_REG to extend it.
3101 MIB.setInsertPt(MBB&: MIB.getMBB(), II: std::next(x: LdSt.getIterator()));
3102 MIB.buildInstr(Opc: AArch64::SUBREG_TO_REG, DstOps: {OldDst}, SrcOps: {})
3103 .addImm(Val: 0)
3104 .addUse(RegNo: NewDst)
3105 .addImm(Val: SubReg);
3106 auto SubRegRC = getRegClassForTypeOnBank(Ty: MRI.getType(Reg: OldDst), RB);
3107 RBI.constrainGenericRegister(Reg: OldDst, RC: *SubRegRC, MRI);
3108 MIB.setInstr(LdSt);
3109 ValTy = MemTy; // This is no longer an extending load.
3110 }
3111 }
3112
3113 // Helper lambda for partially selecting I. Either returns the original
3114 // instruction with an updated opcode, or a new instruction.
3115 auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
3116 bool IsStore = isa<GStore>(Val: I);
3117 const unsigned NewOpc =
3118 selectLoadStoreUIOp(GenericOpc: I.getOpcode(), RegBankID: RB.getID(), OpSize: MemSizeInBits);
3119 if (NewOpc == I.getOpcode())
3120 return nullptr;
3121 // Check if we can fold anything into the addressing mode.
3122 auto AddrModeFns =
3123 selectAddrModeIndexed(Root&: I.getOperand(i: 1), Size: MemSizeInBytes);
3124 if (!AddrModeFns) {
3125 // Can't fold anything. Use the original instruction.
3126 I.setDesc(TII.get(Opcode: NewOpc));
3127 I.addOperand(Op: MachineOperand::CreateImm(Val: 0));
3128 return &I;
3129 }
3130
3131 // Folded something. Create a new instruction and return it.
3132 auto NewInst = MIB.buildInstr(Opc: NewOpc, DstOps: {}, SrcOps: {}, Flags: I.getFlags());
3133 Register CurValReg = I.getOperand(i: 0).getReg();
3134 IsStore ? NewInst.addUse(RegNo: CurValReg) : NewInst.addDef(RegNo: CurValReg);
3135 NewInst.cloneMemRefs(OtherMI: I);
3136 for (auto &Fn : *AddrModeFns)
3137 Fn(NewInst);
3138 I.eraseFromParent();
3139 return &*NewInst;
3140 };
3141
3142 MachineInstr *LoadStore = SelectLoadStoreAddressingMode();
3143 if (!LoadStore)
3144 return false;
3145
3146 // If we're storing a 0, use WZR/XZR.
3147 if (Opcode == TargetOpcode::G_STORE) {
3148 auto CVal = getIConstantVRegValWithLookThrough(
3149 VReg: LoadStore->getOperand(i: 0).getReg(), MRI);
3150 if (CVal && CVal->Value == 0) {
3151 switch (LoadStore->getOpcode()) {
3152 case AArch64::STRWui:
3153 case AArch64::STRHHui:
3154 case AArch64::STRBBui:
3155 LoadStore->getOperand(i: 0).setReg(AArch64::WZR);
3156 break;
3157 case AArch64::STRXui:
3158 LoadStore->getOperand(i: 0).setReg(AArch64::XZR);
3159 break;
3160 }
3161 }
3162 }
3163
3164 if (IsZExtLoad || (Opcode == TargetOpcode::G_LOAD &&
3165 ValTy == LLT::scalar(SizeInBits: 64) && MemSizeInBits == 32)) {
3166 // The any/zextload from a smaller type to i32 should be handled by the
3167 // importer.
3168 if (MRI.getType(Reg: LoadStore->getOperand(i: 0).getReg()).getSizeInBits() != 64)
3169 return false;
3170 // If we have an extending load then change the load's type to be a
3171 // narrower reg and zero_extend with SUBREG_TO_REG.
3172 Register LdReg = MRI.createVirtualRegister(RegClass: &AArch64::GPR32RegClass);
3173 Register DstReg = LoadStore->getOperand(i: 0).getReg();
3174 LoadStore->getOperand(i: 0).setReg(LdReg);
3175
3176 MIB.setInsertPt(MBB&: MIB.getMBB(), II: std::next(x: LoadStore->getIterator()));
3177 MIB.buildInstr(Opc: AArch64::SUBREG_TO_REG, DstOps: {DstReg}, SrcOps: {})
3178 .addImm(Val: 0)
3179 .addUse(RegNo: LdReg)
3180 .addImm(Val: AArch64::sub_32);
3181 constrainSelectedInstRegOperands(I&: *LoadStore, TII, TRI, RBI);
3182 return RBI.constrainGenericRegister(Reg: DstReg, RC: AArch64::GPR64allRegClass,
3183 MRI);
3184 }
3185 return constrainSelectedInstRegOperands(I&: *LoadStore, TII, TRI, RBI);
3186 }
3187
3188 case TargetOpcode::G_INDEXED_ZEXTLOAD:
3189 case TargetOpcode::G_INDEXED_SEXTLOAD:
3190 return selectIndexedExtLoad(I, MRI);
3191 case TargetOpcode::G_INDEXED_LOAD:
3192 return selectIndexedLoad(I, MRI);
3193 case TargetOpcode::G_INDEXED_STORE:
3194 return selectIndexedStore(I&: cast<GIndexedStore>(Val&: I), MRI);
3195
3196 case TargetOpcode::G_LSHR:
3197 case TargetOpcode::G_ASHR:
3198 if (MRI.getType(Reg: I.getOperand(i: 0).getReg()).isVector())
3199 return selectVectorAshrLshr(I, MRI);
3200 [[fallthrough]];
3201 case TargetOpcode::G_SHL:
3202 if (Opcode == TargetOpcode::G_SHL &&
3203 MRI.getType(Reg: I.getOperand(i: 0).getReg()).isVector())
3204 return selectVectorSHL(I, MRI);
3205
3206 // These shifts were legalized to have 64 bit shift amounts because we
3207 // want to take advantage of the selection patterns that assume the
3208 // immediates are s64s, however, selectBinaryOp will assume both operands
3209 // will have the same bit size.
3210 {
3211 Register SrcReg = I.getOperand(i: 1).getReg();
3212 Register ShiftReg = I.getOperand(i: 2).getReg();
3213 const LLT ShiftTy = MRI.getType(Reg: ShiftReg);
3214 const LLT SrcTy = MRI.getType(Reg: SrcReg);
3215 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
3216 ShiftTy.getSizeInBits() == 64) {
3217 assert(!ShiftTy.isVector() && "unexpected vector shift ty");
3218 // Insert a subregister copy to implement a 64->32 trunc
3219 auto Trunc = MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {SrcTy}, SrcOps: {})
3220 .addReg(RegNo: ShiftReg, flags: 0, SubReg: AArch64::sub_32);
3221 MRI.setRegBank(Reg: Trunc.getReg(Idx: 0), RegBank: RBI.getRegBank(ID: AArch64::GPRRegBankID));
3222 I.getOperand(i: 2).setReg(Trunc.getReg(Idx: 0));
3223 }
3224 }
3225 [[fallthrough]];
3226 case TargetOpcode::G_OR: {
3227 // Reject the various things we don't support yet.
3228 if (unsupportedBinOp(I, RBI, MRI, TRI))
3229 return false;
3230
3231 const unsigned OpSize = Ty.getSizeInBits();
3232
3233 const Register DefReg = I.getOperand(i: 0).getReg();
3234 const RegisterBank &RB = *RBI.getRegBank(Reg: DefReg, MRI, TRI);
3235
3236 const unsigned NewOpc = selectBinaryOp(GenericOpc: I.getOpcode(), RegBankID: RB.getID(), OpSize);
3237 if (NewOpc == I.getOpcode())
3238 return false;
3239
3240 I.setDesc(TII.get(Opcode: NewOpc));
3241 // FIXME: Should the type be always reset in setDesc?
3242
3243 // Now that we selected an opcode, we need to constrain the register
3244 // operands to use appropriate classes.
3245 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3246 }
3247
3248 case TargetOpcode::G_PTR_ADD: {
3249 emitADD(DefReg: I.getOperand(i: 0).getReg(), LHS&: I.getOperand(i: 1), RHS&: I.getOperand(i: 2), MIRBuilder&: MIB);
3250 I.eraseFromParent();
3251 return true;
3252 }
3253
3254 case TargetOpcode::G_SADDE:
3255 case TargetOpcode::G_UADDE:
3256 case TargetOpcode::G_SSUBE:
3257 case TargetOpcode::G_USUBE:
3258 case TargetOpcode::G_SADDO:
3259 case TargetOpcode::G_UADDO:
3260 case TargetOpcode::G_SSUBO:
3261 case TargetOpcode::G_USUBO:
3262 return selectOverflowOp(I, MRI);
3263
3264 case TargetOpcode::G_PTRMASK: {
3265 Register MaskReg = I.getOperand(i: 2).getReg();
3266 std::optional<int64_t> MaskVal = getIConstantVRegSExtVal(VReg: MaskReg, MRI);
3267 // TODO: Implement arbitrary cases
3268 if (!MaskVal || !isShiftedMask_64(Value: *MaskVal))
3269 return false;
3270
3271 uint64_t Mask = *MaskVal;
3272 I.setDesc(TII.get(Opcode: AArch64::ANDXri));
3273 I.getOperand(i: 2).ChangeToImmediate(
3274 ImmVal: AArch64_AM::encodeLogicalImmediate(imm: Mask, regSize: 64));
3275
3276 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3277 }
3278 case TargetOpcode::G_PTRTOINT:
3279 case TargetOpcode::G_TRUNC: {
3280 const LLT DstTy = MRI.getType(Reg: I.getOperand(i: 0).getReg());
3281 const LLT SrcTy = MRI.getType(Reg: I.getOperand(i: 1).getReg());
3282
3283 const Register DstReg = I.getOperand(i: 0).getReg();
3284 const Register SrcReg = I.getOperand(i: 1).getReg();
3285
3286 const RegisterBank &DstRB = *RBI.getRegBank(Reg: DstReg, MRI, TRI);
3287 const RegisterBank &SrcRB = *RBI.getRegBank(Reg: SrcReg, MRI, TRI);
3288
3289 if (DstRB.getID() != SrcRB.getID()) {
3290 LLVM_DEBUG(
3291 dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");
3292 return false;
3293 }
3294
3295 if (DstRB.getID() == AArch64::GPRRegBankID) {
3296 const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(Ty: DstTy, RB: DstRB);
3297 if (!DstRC)
3298 return false;
3299
3300 const TargetRegisterClass *SrcRC = getRegClassForTypeOnBank(Ty: SrcTy, RB: SrcRB);
3301 if (!SrcRC)
3302 return false;
3303
3304 if (!RBI.constrainGenericRegister(Reg: SrcReg, RC: *SrcRC, MRI) ||
3305 !RBI.constrainGenericRegister(Reg: DstReg, RC: *DstRC, MRI)) {
3306 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");
3307 return false;
3308 }
3309
3310 if (DstRC == SrcRC) {
3311 // Nothing to be done
3312 } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(SizeInBits: 32) &&
3313 SrcTy == LLT::scalar(SizeInBits: 64)) {
3314 llvm_unreachable("TableGen can import this case");
3315 return false;
3316 } else if (DstRC == &AArch64::GPR32RegClass &&
3317 SrcRC == &AArch64::GPR64RegClass) {
3318 I.getOperand(i: 1).setSubReg(AArch64::sub_32);
3319 } else {
3320 LLVM_DEBUG(
3321 dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
3322 return false;
3323 }
3324
3325 I.setDesc(TII.get(Opcode: TargetOpcode::COPY));
3326 return true;
3327 } else if (DstRB.getID() == AArch64::FPRRegBankID) {
3328 if (DstTy == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 16) &&
3329 SrcTy == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 32)) {
3330 I.setDesc(TII.get(Opcode: AArch64::XTNv4i16));
3331 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3332 return true;
3333 }
3334
3335 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) {
3336 MachineInstr *Extract = emitExtractVectorElt(
3337 DstReg, DstRB, ScalarTy: LLT::scalar(SizeInBits: DstTy.getSizeInBits()), VecReg: SrcReg, LaneIdx: 0, MIRBuilder&: MIB);
3338 if (!Extract)
3339 return false;
3340 I.eraseFromParent();
3341 return true;
3342 }
3343
3344 // We might have a vector G_PTRTOINT, in which case just emit a COPY.
3345 if (Opcode == TargetOpcode::G_PTRTOINT) {
3346 assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector");
3347 I.setDesc(TII.get(Opcode: TargetOpcode::COPY));
3348 return selectCopy(I, TII, MRI, TRI, RBI);
3349 }
3350 }
3351
3352 return false;
3353 }
3354
3355 case TargetOpcode::G_ANYEXT: {
3356 if (selectUSMovFromExtend(I, MRI))
3357 return true;
3358
3359 const Register DstReg = I.getOperand(i: 0).getReg();
3360 const Register SrcReg = I.getOperand(i: 1).getReg();
3361
3362 const RegisterBank &RBDst = *RBI.getRegBank(Reg: DstReg, MRI, TRI);
3363 if (RBDst.getID() != AArch64::GPRRegBankID) {
3364 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst
3365 << ", expected: GPR\n");
3366 return false;
3367 }
3368
3369 const RegisterBank &RBSrc = *RBI.getRegBank(Reg: SrcReg, MRI, TRI);
3370 if (RBSrc.getID() != AArch64::GPRRegBankID) {
3371 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc
3372 << ", expected: GPR\n");
3373 return false;
3374 }
3375
3376 const unsigned DstSize = MRI.getType(Reg: DstReg).getSizeInBits();
3377
3378 if (DstSize == 0) {
3379 LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n");
3380 return false;
3381 }
3382
3383 if (DstSize != 64 && DstSize > 32) {
3384 LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize
3385 << ", expected: 32 or 64\n");
3386 return false;
3387 }
3388 // At this point G_ANYEXT is just like a plain COPY, but we need
3389 // to explicitly form the 64-bit value if any.
3390 if (DstSize > 32) {
3391 Register ExtSrc = MRI.createVirtualRegister(RegClass: &AArch64::GPR64allRegClass);
3392 BuildMI(BB&: MBB, I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: AArch64::SUBREG_TO_REG))
3393 .addDef(RegNo: ExtSrc)
3394 .addImm(Val: 0)
3395 .addUse(RegNo: SrcReg)
3396 .addImm(Val: AArch64::sub_32);
3397 I.getOperand(i: 1).setReg(ExtSrc);
3398 }
3399 return selectCopy(I, TII, MRI, TRI, RBI);
3400 }
3401
3402 case TargetOpcode::G_ZEXT:
3403 case TargetOpcode::G_SEXT_INREG:
3404 case TargetOpcode::G_SEXT: {
3405 if (selectUSMovFromExtend(I, MRI))
3406 return true;
3407
3408 unsigned Opcode = I.getOpcode();
3409 const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
3410 const Register DefReg = I.getOperand(i: 0).getReg();
3411 Register SrcReg = I.getOperand(i: 1).getReg();
3412 const LLT DstTy = MRI.getType(Reg: DefReg);
3413 const LLT SrcTy = MRI.getType(Reg: SrcReg);
3414 unsigned DstSize = DstTy.getSizeInBits();
3415 unsigned SrcSize = SrcTy.getSizeInBits();
3416
3417 // SEXT_INREG has the same src reg size as dst, the size of the value to be
3418 // extended is encoded in the imm.
3419 if (Opcode == TargetOpcode::G_SEXT_INREG)
3420 SrcSize = I.getOperand(i: 2).getImm();
3421
3422 if (DstTy.isVector())
3423 return false; // Should be handled by imported patterns.
3424
3425 assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==
3426 AArch64::GPRRegBankID &&
3427 "Unexpected ext regbank");
3428
3429 MachineInstr *ExtI;
3430
3431 // First check if we're extending the result of a load which has a dest type
3432 // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest
3433 // GPR register on AArch64 and all loads which are smaller automatically
3434 // zero-extend the upper bits. E.g.
3435 // %v(s8) = G_LOAD %p, :: (load 1)
3436 // %v2(s32) = G_ZEXT %v(s8)
3437 if (!IsSigned) {
3438 auto *LoadMI = getOpcodeDef(Opcode: TargetOpcode::G_LOAD, Reg: SrcReg, MRI);
3439 bool IsGPR =
3440 RBI.getRegBank(Reg: SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID;
3441 if (LoadMI && IsGPR) {
3442 const MachineMemOperand *MemOp = *LoadMI->memoperands_begin();
3443 unsigned BytesLoaded = MemOp->getSize().getValue();
3444 if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded)
3445 return selectCopy(I, TII, MRI, TRI, RBI);
3446 }
3447
3448 // For the 32-bit -> 64-bit case, we can emit a mov (ORRWrs)
3449 // + SUBREG_TO_REG.
3450 if (IsGPR && SrcSize == 32 && DstSize == 64) {
3451 Register SubregToRegSrc =
3452 MRI.createVirtualRegister(RegClass: &AArch64::GPR32RegClass);
3453 const Register ZReg = AArch64::WZR;
3454 MIB.buildInstr(Opc: AArch64::ORRWrs, DstOps: {SubregToRegSrc}, SrcOps: {ZReg, SrcReg})
3455 .addImm(Val: 0);
3456
3457 MIB.buildInstr(Opc: AArch64::SUBREG_TO_REG, DstOps: {DefReg}, SrcOps: {})
3458 .addImm(Val: 0)
3459 .addUse(RegNo: SubregToRegSrc)
3460 .addImm(Val: AArch64::sub_32);
3461
3462 if (!RBI.constrainGenericRegister(Reg: DefReg, RC: AArch64::GPR64RegClass,
3463 MRI)) {
3464 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n");
3465 return false;
3466 }
3467
3468 if (!RBI.constrainGenericRegister(Reg: SrcReg, RC: AArch64::GPR32RegClass,
3469 MRI)) {
3470 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n");
3471 return false;
3472 }
3473
3474 I.eraseFromParent();
3475 return true;
3476 }
3477 }
3478
3479 if (DstSize == 64) {
3480 if (Opcode != TargetOpcode::G_SEXT_INREG) {
3481 // FIXME: Can we avoid manually doing this?
3482 if (!RBI.constrainGenericRegister(Reg: SrcReg, RC: AArch64::GPR32RegClass,
3483 MRI)) {
3484 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)
3485 << " operand\n");
3486 return false;
3487 }
3488 SrcReg = MIB.buildInstr(Opc: AArch64::SUBREG_TO_REG,
3489 DstOps: {&AArch64::GPR64RegClass}, SrcOps: {})
3490 .addImm(Val: 0)
3491 .addUse(RegNo: SrcReg)
3492 .addImm(Val: AArch64::sub_32)
3493 .getReg(Idx: 0);
3494 }
3495
3496 ExtI = MIB.buildInstr(Opc: IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
3497 DstOps: {DefReg}, SrcOps: {SrcReg})
3498 .addImm(Val: 0)
3499 .addImm(Val: SrcSize - 1);
3500 } else if (DstSize <= 32) {
3501 ExtI = MIB.buildInstr(Opc: IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
3502 DstOps: {DefReg}, SrcOps: {SrcReg})
3503 .addImm(Val: 0)
3504 .addImm(Val: SrcSize - 1);
3505 } else {
3506 return false;
3507 }
3508
3509 constrainSelectedInstRegOperands(I&: *ExtI, TII, TRI, RBI);
3510 I.eraseFromParent();
3511 return true;
3512 }
3513
3514 case TargetOpcode::G_SITOFP:
3515 case TargetOpcode::G_UITOFP:
3516 case TargetOpcode::G_FPTOSI:
3517 case TargetOpcode::G_FPTOUI: {
3518 const LLT DstTy = MRI.getType(Reg: I.getOperand(i: 0).getReg()),
3519 SrcTy = MRI.getType(Reg: I.getOperand(i: 1).getReg());
3520 const unsigned NewOpc = selectFPConvOpc(GenericOpc: Opcode, DstTy, SrcTy);
3521 if (NewOpc == Opcode)
3522 return false;
3523
3524 I.setDesc(TII.get(Opcode: NewOpc));
3525 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3526 I.setFlags(MachineInstr::NoFPExcept);
3527
3528 return true;
3529 }
3530
3531 case TargetOpcode::G_FREEZE:
3532 return selectCopy(I, TII, MRI, TRI, RBI);
3533
3534 case TargetOpcode::G_INTTOPTR:
3535 // The importer is currently unable to import pointer types since they
3536 // didn't exist in SelectionDAG.
3537 return selectCopy(I, TII, MRI, TRI, RBI);
3538
3539 case TargetOpcode::G_BITCAST:
3540 // Imported SelectionDAG rules can handle every bitcast except those that
3541 // bitcast from a type to the same type. Ideally, these shouldn't occur
3542 // but we might not run an optimizer that deletes them. The other exception
3543 // is bitcasts involving pointer types, as SelectionDAG has no knowledge
3544 // of them.
3545 return selectCopy(I, TII, MRI, TRI, RBI);
3546
3547 case TargetOpcode::G_SELECT: {
3548 auto &Sel = cast<GSelect>(Val&: I);
3549 const Register CondReg = Sel.getCondReg();
3550 const Register TReg = Sel.getTrueReg();
3551 const Register FReg = Sel.getFalseReg();
3552
3553 if (tryOptSelect(Sel))
3554 return true;
3555
3556 // Make sure to use an unused vreg instead of wzr, so that the peephole
3557 // optimizations will be able to optimize these.
3558 Register DeadVReg = MRI.createVirtualRegister(RegClass: &AArch64::GPR32RegClass);
3559 auto TstMI = MIB.buildInstr(Opc: AArch64::ANDSWri, DstOps: {DeadVReg}, SrcOps: {CondReg})
3560 .addImm(Val: AArch64_AM::encodeLogicalImmediate(imm: 1, regSize: 32));
3561 constrainSelectedInstRegOperands(I&: *TstMI, TII, TRI, RBI);
3562 if (!emitSelect(Dst: Sel.getReg(Idx: 0), True: TReg, False: FReg, CC: AArch64CC::NE, MIB))
3563 return false;
3564 Sel.eraseFromParent();
3565 return true;
3566 }
3567 case TargetOpcode::G_ICMP: {
3568 if (Ty.isVector())
3569 return false;
3570
3571 if (Ty != LLT::scalar(SizeInBits: 32)) {
3572 LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty
3573 << ", expected: " << LLT::scalar(32) << '\n');
3574 return false;
3575 }
3576
3577 auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(i: 1).getPredicate());
3578 const AArch64CC::CondCode InvCC =
3579 changeICMPPredToAArch64CC(P: CmpInst::getInversePredicate(pred: Pred));
3580 emitIntegerCompare(LHS&: I.getOperand(i: 2), RHS&: I.getOperand(i: 3), Predicate&: I.getOperand(i: 1), MIRBuilder&: MIB);
3581 emitCSINC(/*Dst=*/I.getOperand(i: 0).getReg(), /*Src1=*/AArch64::WZR,
3582 /*Src2=*/AArch64::WZR, Pred: InvCC, MIRBuilder&: MIB);
3583 I.eraseFromParent();
3584 return true;
3585 }
3586
3587 case TargetOpcode::G_FCMP: {
3588 CmpInst::Predicate Pred =
3589 static_cast<CmpInst::Predicate>(I.getOperand(i: 1).getPredicate());
3590 if (!emitFPCompare(LHS: I.getOperand(i: 2).getReg(), RHS: I.getOperand(i: 3).getReg(), MIRBuilder&: MIB,
3591 Pred) ||
3592 !emitCSetForFCmp(Dst: I.getOperand(i: 0).getReg(), Pred, MIRBuilder&: MIB))
3593 return false;
3594 I.eraseFromParent();
3595 return true;
3596 }
3597 case TargetOpcode::G_VASTART:
3598 return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
3599 : selectVaStartAAPCS(I, MF, MRI);
3600 case TargetOpcode::G_INTRINSIC:
3601 return selectIntrinsic(I, MRI);
3602 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3603 return selectIntrinsicWithSideEffects(I, MRI);
3604 case TargetOpcode::G_IMPLICIT_DEF: {
3605 I.setDesc(TII.get(Opcode: TargetOpcode::IMPLICIT_DEF));
3606 const LLT DstTy = MRI.getType(Reg: I.getOperand(i: 0).getReg());
3607 const Register DstReg = I.getOperand(i: 0).getReg();
3608 const RegisterBank &DstRB = *RBI.getRegBank(Reg: DstReg, MRI, TRI);
3609 const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(Ty: DstTy, RB: DstRB);
3610 RBI.constrainGenericRegister(Reg: DstReg, RC: *DstRC, MRI);
3611 return true;
3612 }
3613 case TargetOpcode::G_BLOCK_ADDR: {
3614 Function *BAFn = I.getOperand(i: 1).getBlockAddress()->getFunction();
3615 if (std::optional<uint16_t> BADisc =
3616 STI.getPtrAuthBlockAddressDiscriminatorIfEnabled(ParentFn: *BAFn)) {
3617 MIB.buildInstr(Opc: TargetOpcode::IMPLICIT_DEF, DstOps: {AArch64::X16}, SrcOps: {});
3618 MIB.buildInstr(Opc: TargetOpcode::IMPLICIT_DEF, DstOps: {AArch64::X17}, SrcOps: {});
3619 MIB.buildInstr(Opcode: AArch64::MOVaddrPAC)
3620 .addBlockAddress(BA: I.getOperand(i: 1).getBlockAddress())
3621 .addImm(Val: AArch64PACKey::IA)
3622 .addReg(/*AddrDisc=*/RegNo: AArch64::XZR)
3623 .addImm(Val: *BADisc)
3624 .constrainAllUses(TII, TRI, RBI);
3625 MIB.buildCopy(Res: I.getOperand(i: 0).getReg(), Op: Register(AArch64::X16));
3626 RBI.constrainGenericRegister(Reg: I.getOperand(i: 0).getReg(),
3627 RC: AArch64::GPR64RegClass, MRI);
3628 I.eraseFromParent();
3629 return true;
3630 }
3631 if (TM.getCodeModel() == CodeModel::Large && !TM.isPositionIndependent()) {
3632 materializeLargeCMVal(I, V: I.getOperand(i: 1).getBlockAddress(), OpFlags: 0);
3633 I.eraseFromParent();
3634 return true;
3635 } else {
3636 I.setDesc(TII.get(Opcode: AArch64::MOVaddrBA));
3637 auto MovMI = BuildMI(BB&: MBB, I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: AArch64::MOVaddrBA),
3638 DestReg: I.getOperand(i: 0).getReg())
3639 .addBlockAddress(BA: I.getOperand(i: 1).getBlockAddress(),
3640 /* Offset */ 0, TargetFlags: AArch64II::MO_PAGE)
3641 .addBlockAddress(
3642 BA: I.getOperand(i: 1).getBlockAddress(), /* Offset */ 0,
3643 TargetFlags: AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
3644 I.eraseFromParent();
3645 return constrainSelectedInstRegOperands(I&: *MovMI, TII, TRI, RBI);
3646 }
3647 }
3648 case AArch64::G_DUP: {
3649 // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by
3650 // imported patterns. Do it manually here. Avoiding generating s16 gpr is
3651 // difficult because at RBS we may end up pessimizing the fpr case if we
3652 // decided to add an anyextend to fix this. Manual selection is the most
3653 // robust solution for now.
3654 if (RBI.getRegBank(Reg: I.getOperand(i: 1).getReg(), MRI, TRI)->getID() !=
3655 AArch64::GPRRegBankID)
3656 return false; // We expect the fpr regbank case to be imported.
3657 LLT VecTy = MRI.getType(Reg: I.getOperand(i: 0).getReg());
3658 if (VecTy == LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 8))
3659 I.setDesc(TII.get(Opcode: AArch64::DUPv8i8gpr));
3660 else if (VecTy == LLT::fixed_vector(NumElements: 16, ScalarSizeInBits: 8))
3661 I.setDesc(TII.get(Opcode: AArch64::DUPv16i8gpr));
3662 else if (VecTy == LLT::fixed_vector(NumElements: 4, ScalarSizeInBits: 16))
3663 I.setDesc(TII.get(Opcode: AArch64::DUPv4i16gpr));
3664 else if (VecTy == LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 16))
3665 I.setDesc(TII.get(Opcode: AArch64::DUPv8i16gpr));
3666 else
3667 return false;
3668 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3669 }
3670 case TargetOpcode::G_BUILD_VECTOR:
3671 return selectBuildVector(I, MRI);
3672 case TargetOpcode::G_MERGE_VALUES:
3673 return selectMergeValues(I, MRI);
3674 case TargetOpcode::G_UNMERGE_VALUES:
3675 return selectUnmergeValues(I, MRI);
3676 case TargetOpcode::G_SHUFFLE_VECTOR:
3677 return selectShuffleVector(I, MRI);
3678 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3679 return selectExtractElt(I, MRI);
3680 case TargetOpcode::G_CONCAT_VECTORS:
3681 return selectConcatVectors(I, MRI);
3682 case TargetOpcode::G_JUMP_TABLE:
3683 return selectJumpTable(I, MRI);
3684 case TargetOpcode::G_MEMCPY:
3685 case TargetOpcode::G_MEMCPY_INLINE:
3686 case TargetOpcode::G_MEMMOVE:
3687 case TargetOpcode::G_MEMSET:
3688 assert(STI.hasMOPS() && "Shouldn't get here without +mops feature");
3689 return selectMOPS(I, MRI);
3690 }
3691
3692 return false;
3693}
3694
3695bool AArch64InstructionSelector::selectAndRestoreState(MachineInstr &I) {
3696 MachineIRBuilderState OldMIBState = MIB.getState();
3697 bool Success = select(I);
3698 MIB.setState(OldMIBState);
3699 return Success;
3700}
3701
3702bool AArch64InstructionSelector::selectMOPS(MachineInstr &GI,
3703 MachineRegisterInfo &MRI) {
3704 unsigned Mopcode;
3705 switch (GI.getOpcode()) {
3706 case TargetOpcode::G_MEMCPY:
3707 case TargetOpcode::G_MEMCPY_INLINE:
3708 Mopcode = AArch64::MOPSMemoryCopyPseudo;
3709 break;
3710 case TargetOpcode::G_MEMMOVE:
3711 Mopcode = AArch64::MOPSMemoryMovePseudo;
3712 break;
3713 case TargetOpcode::G_MEMSET:
3714 // For tagged memset see llvm.aarch64.mops.memset.tag
3715 Mopcode = AArch64::MOPSMemorySetPseudo;
3716 break;
3717 }
3718
3719 auto &DstPtr = GI.getOperand(i: 0);
3720 auto &SrcOrVal = GI.getOperand(i: 1);
3721 auto &Size = GI.getOperand(i: 2);
3722
3723 // Create copies of the registers that can be clobbered.
3724 const Register DstPtrCopy = MRI.cloneVirtualRegister(VReg: DstPtr.getReg());
3725 const Register SrcValCopy = MRI.cloneVirtualRegister(VReg: SrcOrVal.getReg());
3726 const Register SizeCopy = MRI.cloneVirtualRegister(VReg: Size.getReg());
3727
3728 const bool IsSet = Mopcode == AArch64::MOPSMemorySetPseudo;
3729 const auto &SrcValRegClass =
3730 IsSet ? AArch64::GPR64RegClass : AArch64::GPR64commonRegClass;
3731
3732 // Constrain to specific registers
3733 RBI.constrainGenericRegister(Reg: DstPtrCopy, RC: AArch64::GPR64commonRegClass, MRI);
3734 RBI.constrainGenericRegister(Reg: SrcValCopy, RC: SrcValRegClass, MRI);
3735 RBI.constrainGenericRegister(Reg: SizeCopy, RC: AArch64::GPR64RegClass, MRI);
3736
3737 MIB.buildCopy(Res: DstPtrCopy, Op: DstPtr);
3738 MIB.buildCopy(Res: SrcValCopy, Op: SrcOrVal);
3739 MIB.buildCopy(Res: SizeCopy, Op: Size);
3740
3741 // New instruction uses the copied registers because it must update them.
3742 // The defs are not used since they don't exist in G_MEM*. They are still
3743 // tied.
3744 // Note: order of operands is different from G_MEMSET, G_MEMCPY, G_MEMMOVE
3745 Register DefDstPtr = MRI.createVirtualRegister(RegClass: &AArch64::GPR64commonRegClass);
3746 Register DefSize = MRI.createVirtualRegister(RegClass: &AArch64::GPR64RegClass);
3747 if (IsSet) {
3748 MIB.buildInstr(Opc: Mopcode, DstOps: {DefDstPtr, DefSize},
3749 SrcOps: {DstPtrCopy, SizeCopy, SrcValCopy});
3750 } else {
3751 Register DefSrcPtr = MRI.createVirtualRegister(RegClass: &SrcValRegClass);
3752 MIB.buildInstr(Opc: Mopcode, DstOps: {DefDstPtr, DefSrcPtr, DefSize},
3753 SrcOps: {DstPtrCopy, SrcValCopy, SizeCopy});
3754 }
3755
3756 GI.eraseFromParent();
3757 return true;
3758}
3759
3760bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
3761 MachineRegisterInfo &MRI) {
3762 assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT");
3763 Register JTAddr = I.getOperand(i: 0).getReg();
3764 unsigned JTI = I.getOperand(i: 1).getIndex();
3765 Register Index = I.getOperand(i: 2).getReg();
3766
3767 MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(Idx: JTI, Size: 4, PCRelSym: nullptr);
3768
3769 // With aarch64-jump-table-hardening, we only expand the jump table dispatch
3770 // sequence later, to guarantee the integrity of the intermediate values.
3771 if (MF->getFunction().hasFnAttribute(Kind: "aarch64-jump-table-hardening")) {
3772 CodeModel::Model CM = TM.getCodeModel();
3773 if (STI.isTargetMachO()) {
3774 if (CM != CodeModel::Small && CM != CodeModel::Large)
3775 report_fatal_error(reason: "Unsupported code-model for hardened jump-table");
3776 } else {
3777 // Note that COFF support would likely also need JUMP_TABLE_DEBUG_INFO.
3778 assert(STI.isTargetELF() &&
3779 "jump table hardening only supported on MachO/ELF");
3780 if (CM != CodeModel::Small)
3781 report_fatal_error(reason: "Unsupported code-model for hardened jump-table");
3782 }
3783
3784 MIB.buildCopy(Res: {AArch64::X16}, Op: I.getOperand(i: 2).getReg());
3785 MIB.buildInstr(Opcode: AArch64::BR_JumpTable)
3786 .addJumpTableIndex(Idx: I.getOperand(i: 1).getIndex());
3787 I.eraseFromParent();
3788 return true;
3789 }
3790
3791 Register TargetReg = MRI.createVirtualRegister(RegClass: &AArch64::GPR64RegClass);
3792 Register ScratchReg = MRI.createVirtualRegister(RegClass: &AArch64::GPR64spRegClass);
3793
3794 auto JumpTableInst = MIB.buildInstr(Opc: AArch64::JumpTableDest32,
3795 DstOps: {TargetReg, ScratchReg}, SrcOps: {JTAddr, Index})
3796 .addJumpTableIndex(Idx: JTI);
3797 // Save the jump table info.
3798 MIB.buildInstr(Opc: TargetOpcode::JUMP_TABLE_DEBUG_INFO, DstOps: {},
3799 SrcOps: {static_cast<int64_t>(JTI)});
3800 // Build the indirect branch.
3801 MIB.buildInstr(Opc: AArch64::BR, DstOps: {}, SrcOps: {TargetReg});
3802 I.eraseFromParent();
3803 return constrainSelectedInstRegOperands(I&: *JumpTableInst, TII, TRI, RBI);
3804}
3805
3806bool AArch64InstructionSelector::selectJumpTable(MachineInstr &I,
3807 MachineRegisterInfo &MRI) {
3808 assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table");
3809 assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!");
3810
3811 Register DstReg = I.getOperand(i: 0).getReg();
3812 unsigned JTI = I.getOperand(i: 1).getIndex();
3813 // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
3814 auto MovMI =
3815 MIB.buildInstr(Opc: AArch64::MOVaddrJT, DstOps: {DstReg}, SrcOps: {})
3816 .addJumpTableIndex(Idx: JTI, TargetFlags: AArch64II::MO_PAGE)
3817 .addJumpTableIndex(Idx: JTI, TargetFlags: AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
3818 I.eraseFromParent();
3819 return constrainSelectedInstRegOperands(I&: *MovMI, TII, TRI, RBI);
3820}
3821
3822bool AArch64InstructionSelector::selectTLSGlobalValue(
3823 MachineInstr &I, MachineRegisterInfo &MRI) {
3824 if (!STI.isTargetMachO())
3825 return false;
3826 MachineFunction &MF = *I.getParent()->getParent();
3827 MF.getFrameInfo().setAdjustsStack(true);
3828
3829 const auto &GlobalOp = I.getOperand(i: 1);
3830 assert(GlobalOp.getOffset() == 0 &&
3831 "Shouldn't have an offset on TLS globals!");
3832 const GlobalValue &GV = *GlobalOp.getGlobal();
3833
3834 auto LoadGOT =
3835 MIB.buildInstr(Opc: AArch64::LOADgot, DstOps: {&AArch64::GPR64commonRegClass}, SrcOps: {})
3836 .addGlobalAddress(GV: &GV, Offset: 0, TargetFlags: AArch64II::MO_TLS);
3837
3838 auto Load = MIB.buildInstr(Opc: AArch64::LDRXui, DstOps: {&AArch64::GPR64commonRegClass},
3839 SrcOps: {LoadGOT.getReg(Idx: 0)})
3840 .addImm(Val: 0);
3841
3842 MIB.buildCopy(Res: Register(AArch64::X0), Op: LoadGOT.getReg(Idx: 0));
3843 // TLS calls preserve all registers except those that absolutely must be
3844 // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
3845 // silly).
3846 unsigned Opcode = getBLRCallOpcode(MF);
3847
3848 // With ptrauth-calls, the tlv access thunk pointer is authenticated (IA, 0).
3849 if (MF.getFunction().hasFnAttribute(Kind: "ptrauth-calls")) {
3850 assert(Opcode == AArch64::BLR);
3851 Opcode = AArch64::BLRAAZ;
3852 }
3853
3854 MIB.buildInstr(Opc: Opcode, DstOps: {}, SrcOps: {Load})
3855 .addUse(RegNo: AArch64::X0, Flags: RegState::Implicit)
3856 .addDef(RegNo: AArch64::X0, Flags: RegState::Implicit)
3857 .addRegMask(Mask: TRI.getTLSCallPreservedMask());
3858
3859 MIB.buildCopy(Res: I.getOperand(i: 0).getReg(), Op: Register(AArch64::X0));
3860 RBI.constrainGenericRegister(Reg: I.getOperand(i: 0).getReg(), RC: AArch64::GPR64RegClass,
3861 MRI);
3862 I.eraseFromParent();
3863 return true;
3864}
3865
3866MachineInstr *AArch64InstructionSelector::emitScalarToVector(
3867 unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
3868 MachineIRBuilder &MIRBuilder) const {
3869 auto Undef = MIRBuilder.buildInstr(Opc: TargetOpcode::IMPLICIT_DEF, DstOps: {DstRC}, SrcOps: {});
3870
3871 auto BuildFn = [&](unsigned SubregIndex) {
3872 auto Ins =
3873 MIRBuilder
3874 .buildInstr(Opc: TargetOpcode::INSERT_SUBREG, DstOps: {DstRC}, SrcOps: {Undef, Scalar})
3875 .addImm(Val: SubregIndex);
3876 constrainSelectedInstRegOperands(I&: *Undef, TII, TRI, RBI);
3877 constrainSelectedInstRegOperands(I&: *Ins, TII, TRI, RBI);
3878 return &*Ins;
3879 };
3880
3881 switch (EltSize) {
3882 case 8:
3883 return BuildFn(AArch64::bsub);
3884 case 16:
3885 return BuildFn(AArch64::hsub);
3886 case 32:
3887 return BuildFn(AArch64::ssub);
3888 case 64:
3889 return BuildFn(AArch64::dsub);
3890 default:
3891 return nullptr;
3892 }
3893}
3894
3895MachineInstr *
3896AArch64InstructionSelector::emitNarrowVector(Register DstReg, Register SrcReg,
3897 MachineIRBuilder &MIB,
3898 MachineRegisterInfo &MRI) const {
3899 LLT DstTy = MRI.getType(Reg: DstReg);
3900 const TargetRegisterClass *RC =
3901 getRegClassForTypeOnBank(Ty: DstTy, RB: *RBI.getRegBank(Reg: SrcReg, MRI, TRI));
3902 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
3903 LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
3904 return nullptr;
3905 }
3906 unsigned SubReg = 0;
3907 if (!getSubRegForClass(RC, TRI, SubReg))
3908 return nullptr;
3909 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
3910 LLVM_DEBUG(dbgs() << "Unsupported destination size! ("
3911 << DstTy.getSizeInBits() << "\n");
3912 return nullptr;
3913 }
3914 auto Copy = MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {DstReg}, SrcOps: {})
3915 .addReg(RegNo: SrcReg, flags: 0, SubReg);
3916 RBI.constrainGenericRegister(Reg: DstReg, RC: *RC, MRI);
3917 return Copy;
3918}
3919
3920bool AArch64InstructionSelector::selectMergeValues(
3921 MachineInstr &I, MachineRegisterInfo &MRI) {
3922 assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode");
3923 const LLT DstTy = MRI.getType(Reg: I.getOperand(i: 0).getReg());
3924 const LLT SrcTy = MRI.getType(Reg: I.getOperand(i: 1).getReg());
3925 assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation");
3926 const RegisterBank &RB = *RBI.getRegBank(Reg: I.getOperand(i: 1).getReg(), MRI, TRI);
3927
3928 if (I.getNumOperands() != 3)
3929 return false;
3930
3931 // Merging 2 s64s into an s128.
3932 if (DstTy == LLT::scalar(SizeInBits: 128)) {
3933 if (SrcTy.getSizeInBits() != 64)
3934 return false;
3935 Register DstReg = I.getOperand(i: 0).getReg();
3936 Register Src1Reg = I.getOperand(i: 1).getReg();
3937 Register Src2Reg = I.getOperand(i: 2).getReg();
3938 auto Tmp = MIB.buildInstr(Opc: TargetOpcode::IMPLICIT_DEF, DstOps: {DstTy}, SrcOps: {});
3939 MachineInstr *InsMI = emitLaneInsert(DstReg: std::nullopt, SrcReg: Tmp.getReg(Idx: 0), EltReg: Src1Reg,
3940 /* LaneIdx */ 0, RB, MIRBuilder&: MIB);
3941 if (!InsMI)
3942 return false;
3943 MachineInstr *Ins2MI = emitLaneInsert(DstReg, SrcReg: InsMI->getOperand(i: 0).getReg(),
3944 EltReg: Src2Reg, /* LaneIdx */ 1, RB, MIRBuilder&: MIB);
3945 if (!Ins2MI)
3946 return false;
3947 constrainSelectedInstRegOperands(I&: *InsMI, TII, TRI, RBI);
3948 constrainSelectedInstRegOperands(I&: *Ins2MI, TII, TRI, RBI);
3949 I.eraseFromParent();
3950 return true;
3951 }
3952
3953 if (RB.getID() != AArch64::GPRRegBankID)
3954 return false;
3955
3956 if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
3957 return false;
3958
3959 auto *DstRC = &AArch64::GPR64RegClass;
3960 Register SubToRegDef = MRI.createVirtualRegister(RegClass: DstRC);
3961 MachineInstr &SubRegMI = *BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(),
3962 MCID: TII.get(Opcode: TargetOpcode::SUBREG_TO_REG))
3963 .addDef(RegNo: SubToRegDef)
3964 .addImm(Val: 0)
3965 .addUse(RegNo: I.getOperand(i: 1).getReg())
3966 .addImm(Val: AArch64::sub_32);
3967 Register SubToRegDef2 = MRI.createVirtualRegister(RegClass: DstRC);
3968 // Need to anyext the second scalar before we can use bfm
3969 MachineInstr &SubRegMI2 = *BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(),
3970 MCID: TII.get(Opcode: TargetOpcode::SUBREG_TO_REG))
3971 .addDef(RegNo: SubToRegDef2)
3972 .addImm(Val: 0)
3973 .addUse(RegNo: I.getOperand(i: 2).getReg())
3974 .addImm(Val: AArch64::sub_32);
3975 MachineInstr &BFM =
3976 *BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: AArch64::BFMXri))
3977 .addDef(RegNo: I.getOperand(i: 0).getReg())
3978 .addUse(RegNo: SubToRegDef)
3979 .addUse(RegNo: SubToRegDef2)
3980 .addImm(Val: 32)
3981 .addImm(Val: 31);
3982 constrainSelectedInstRegOperands(I&: SubRegMI, TII, TRI, RBI);
3983 constrainSelectedInstRegOperands(I&: SubRegMI2, TII, TRI, RBI);
3984 constrainSelectedInstRegOperands(I&: BFM, TII, TRI, RBI);
3985 I.eraseFromParent();
3986 return true;
3987}
3988
3989static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
3990 const unsigned EltSize) {
3991 // Choose a lane copy opcode and subregister based off of the size of the
3992 // vector's elements.
3993 switch (EltSize) {
3994 case 8:
3995 CopyOpc = AArch64::DUPi8;
3996 ExtractSubReg = AArch64::bsub;
3997 break;
3998 case 16:
3999 CopyOpc = AArch64::DUPi16;
4000 ExtractSubReg = AArch64::hsub;
4001 break;
4002 case 32:
4003 CopyOpc = AArch64::DUPi32;
4004 ExtractSubReg = AArch64::ssub;
4005 break;
4006 case 64:
4007 CopyOpc = AArch64::DUPi64;
4008 ExtractSubReg = AArch64::dsub;
4009 break;
4010 default:
4011 // Unknown size, bail out.
4012 LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n");
4013 return false;
4014 }
4015 return true;
4016}
4017
4018MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
4019 std::optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
4020 Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
4021 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4022 unsigned CopyOpc = 0;
4023 unsigned ExtractSubReg = 0;
4024 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, EltSize: ScalarTy.getSizeInBits())) {
4025 LLVM_DEBUG(
4026 dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
4027 return nullptr;
4028 }
4029
4030 const TargetRegisterClass *DstRC =
4031 getRegClassForTypeOnBank(Ty: ScalarTy, RB: DstRB, GetAllRegSet: true);
4032 if (!DstRC) {
4033 LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
4034 return nullptr;
4035 }
4036
4037 const RegisterBank &VecRB = *RBI.getRegBank(Reg: VecReg, MRI, TRI);
4038 const LLT &VecTy = MRI.getType(Reg: VecReg);
4039 const TargetRegisterClass *VecRC =
4040 getRegClassForTypeOnBank(Ty: VecTy, RB: VecRB, GetAllRegSet: true);
4041 if (!VecRC) {
4042 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
4043 return nullptr;
4044 }
4045
4046 // The register that we're going to copy into.
4047 Register InsertReg = VecReg;
4048 if (!DstReg)
4049 DstReg = MRI.createVirtualRegister(RegClass: DstRC);
4050 // If the lane index is 0, we just use a subregister COPY.
4051 if (LaneIdx == 0) {
4052 auto Copy = MIRBuilder.buildInstr(Opc: TargetOpcode::COPY, DstOps: {*DstReg}, SrcOps: {})
4053 .addReg(RegNo: VecReg, flags: 0, SubReg: ExtractSubReg);
4054 RBI.constrainGenericRegister(Reg: *DstReg, RC: *DstRC, MRI);
4055 return &*Copy;
4056 }
4057
4058 // Lane copies require 128-bit wide registers. If we're dealing with an
4059 // unpacked vector, then we need to move up to that width. Insert an implicit
4060 // def and a subregister insert to get us there.
4061 if (VecTy.getSizeInBits() != 128) {
4062 MachineInstr *ScalarToVector = emitScalarToVector(
4063 EltSize: VecTy.getSizeInBits(), DstRC: &AArch64::FPR128RegClass, Scalar: VecReg, MIRBuilder);
4064 if (!ScalarToVector)
4065 return nullptr;
4066 InsertReg = ScalarToVector->getOperand(i: 0).getReg();
4067 }
4068
4069 MachineInstr *LaneCopyMI =
4070 MIRBuilder.buildInstr(Opc: CopyOpc, DstOps: {*DstReg}, SrcOps: {InsertReg}).addImm(Val: LaneIdx);
4071 constrainSelectedInstRegOperands(I&: *LaneCopyMI, TII, TRI, RBI);
4072
4073 // Make sure that we actually constrain the initial copy.
4074 RBI.constrainGenericRegister(Reg: *DstReg, RC: *DstRC, MRI);
4075 return LaneCopyMI;
4076}
4077
4078bool AArch64InstructionSelector::selectExtractElt(
4079 MachineInstr &I, MachineRegisterInfo &MRI) {
4080 assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
4081 "unexpected opcode!");
4082 Register DstReg = I.getOperand(i: 0).getReg();
4083 const LLT NarrowTy = MRI.getType(Reg: DstReg);
4084 const Register SrcReg = I.getOperand(i: 1).getReg();
4085 const LLT WideTy = MRI.getType(Reg: SrcReg);
4086 (void)WideTy;
4087 assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
4088 "source register size too small!");
4089 assert(!NarrowTy.isVector() && "cannot extract vector into vector!");
4090
4091 // Need the lane index to determine the correct copy opcode.
4092 MachineOperand &LaneIdxOp = I.getOperand(i: 2);
4093 assert(LaneIdxOp.isReg() && "Lane index operand was not a register?");
4094
4095 if (RBI.getRegBank(Reg: DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
4096 LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n");
4097 return false;
4098 }
4099
4100 // Find the index to extract from.
4101 auto VRegAndVal = getIConstantVRegValWithLookThrough(VReg: LaneIdxOp.getReg(), MRI);
4102 if (!VRegAndVal)
4103 return false;
4104 unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
4105
4106
4107 const RegisterBank &DstRB = *RBI.getRegBank(Reg: DstReg, MRI, TRI);
4108 MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, ScalarTy: NarrowTy, VecReg: SrcReg,
4109 LaneIdx, MIRBuilder&: MIB);
4110 if (!Extract)
4111 return false;
4112
4113 I.eraseFromParent();
4114 return true;
4115}
4116
4117bool AArch64InstructionSelector::selectSplitVectorUnmerge(
4118 MachineInstr &I, MachineRegisterInfo &MRI) {
4119 unsigned NumElts = I.getNumOperands() - 1;
4120 Register SrcReg = I.getOperand(i: NumElts).getReg();
4121 const LLT NarrowTy = MRI.getType(Reg: I.getOperand(i: 0).getReg());
4122 const LLT SrcTy = MRI.getType(Reg: SrcReg);
4123
4124 assert(NarrowTy.isVector() && "Expected an unmerge into vectors");
4125 if (SrcTy.getSizeInBits() > 128) {
4126 LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge");
4127 return false;
4128 }
4129
4130 // We implement a split vector operation by treating the sub-vectors as
4131 // scalars and extracting them.
4132 const RegisterBank &DstRB =
4133 *RBI.getRegBank(Reg: I.getOperand(i: 0).getReg(), MRI, TRI);
4134 for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
4135 Register Dst = I.getOperand(i: OpIdx).getReg();
4136 MachineInstr *Extract =
4137 emitExtractVectorElt(DstReg: Dst, DstRB, ScalarTy: NarrowTy, VecReg: SrcReg, LaneIdx: OpIdx, MIRBuilder&: MIB);
4138 if (!Extract)
4139 return false;
4140 }
4141 I.eraseFromParent();
4142 return true;
4143}
4144
4145bool AArch64InstructionSelector::selectUnmergeValues(MachineInstr &I,
4146 MachineRegisterInfo &MRI) {
4147 assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
4148 "unexpected opcode");
4149
4150 // TODO: Handle unmerging into GPRs and from scalars to scalars.
4151 if (RBI.getRegBank(Reg: I.getOperand(i: 0).getReg(), MRI, TRI)->getID() !=
4152 AArch64::FPRRegBankID ||
4153 RBI.getRegBank(Reg: I.getOperand(i: 1).getReg(), MRI, TRI)->getID() !=
4154 AArch64::FPRRegBankID) {
4155 LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
4156 "currently unsupported.\n");
4157 return false;
4158 }
4159
4160 // The last operand is the vector source register, and every other operand is
4161 // a register to unpack into.
4162 unsigned NumElts = I.getNumOperands() - 1;
4163 Register SrcReg = I.getOperand(i: NumElts).getReg();
4164 const LLT NarrowTy = MRI.getType(Reg: I.getOperand(i: 0).getReg());
4165 const LLT WideTy = MRI.getType(Reg: SrcReg);
4166 (void)WideTy;
4167 assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) &&
4168 "can only unmerge from vector or s128 types!");
4169 assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
4170 "source register size too small!");
4171
4172 if (!NarrowTy.isScalar())
4173 return selectSplitVectorUnmerge(I, MRI);
4174
4175 // Choose a lane copy opcode and subregister based off of the size of the
4176 // vector's elements.
4177 unsigned CopyOpc = 0;
4178 unsigned ExtractSubReg = 0;
4179 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, EltSize: NarrowTy.getSizeInBits()))
4180 return false;
4181
4182 // Set up for the lane copies.
4183 MachineBasicBlock &MBB = *I.getParent();
4184
4185 // Stores the registers we'll be copying from.
4186 SmallVector<Register, 4> InsertRegs;
4187
4188 // We'll use the first register twice, so we only need NumElts-1 registers.
4189 unsigned NumInsertRegs = NumElts - 1;
4190
4191 // If our elements fit into exactly 128 bits, then we can copy from the source
4192 // directly. Otherwise, we need to do a bit of setup with some subregister
4193 // inserts.
4194 if (NarrowTy.getSizeInBits() * NumElts == 128) {
4195 InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);
4196 } else {
4197 // No. We have to perform subregister inserts. For each insert, create an
4198 // implicit def and a subregister insert, and save the register we create.
4199 const TargetRegisterClass *RC = getRegClassForTypeOnBank(
4200 Ty: LLT::fixed_vector(NumElements: NumElts, ScalarSizeInBits: WideTy.getScalarSizeInBits()),
4201 RB: *RBI.getRegBank(Reg: SrcReg, MRI, TRI));
4202 unsigned SubReg = 0;
4203 bool Found = getSubRegForClass(RC, TRI, SubReg);
4204 (void)Found;
4205 assert(Found && "expected to find last operand's subeg idx");
4206 for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
4207 Register ImpDefReg = MRI.createVirtualRegister(RegClass: &AArch64::FPR128RegClass);
4208 MachineInstr &ImpDefMI =
4209 *BuildMI(BB&: MBB, I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: TargetOpcode::IMPLICIT_DEF),
4210 DestReg: ImpDefReg);
4211
4212 // Now, create the subregister insert from SrcReg.
4213 Register InsertReg = MRI.createVirtualRegister(RegClass: &AArch64::FPR128RegClass);
4214 MachineInstr &InsMI =
4215 *BuildMI(BB&: MBB, I, MIMD: I.getDebugLoc(),
4216 MCID: TII.get(Opcode: TargetOpcode::INSERT_SUBREG), DestReg: InsertReg)
4217 .addUse(RegNo: ImpDefReg)
4218 .addUse(RegNo: SrcReg)
4219 .addImm(Val: SubReg);
4220
4221 constrainSelectedInstRegOperands(I&: ImpDefMI, TII, TRI, RBI);
4222 constrainSelectedInstRegOperands(I&: InsMI, TII, TRI, RBI);
4223
4224 // Save the register so that we can copy from it after.
4225 InsertRegs.push_back(Elt: InsertReg);
4226 }
4227 }
4228
4229 // Now that we've created any necessary subregister inserts, we can
4230 // create the copies.
4231 //
4232 // Perform the first copy separately as a subregister copy.
4233 Register CopyTo = I.getOperand(i: 0).getReg();
4234 auto FirstCopy = MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {CopyTo}, SrcOps: {})
4235 .addReg(RegNo: InsertRegs[0], flags: 0, SubReg: ExtractSubReg);
4236 constrainSelectedInstRegOperands(I&: *FirstCopy, TII, TRI, RBI);
4237
4238 // Now, perform the remaining copies as vector lane copies.
4239 unsigned LaneIdx = 1;
4240 for (Register InsReg : InsertRegs) {
4241 Register CopyTo = I.getOperand(i: LaneIdx).getReg();
4242 MachineInstr &CopyInst =
4243 *BuildMI(BB&: MBB, I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: CopyOpc), DestReg: CopyTo)
4244 .addUse(RegNo: InsReg)
4245 .addImm(Val: LaneIdx);
4246 constrainSelectedInstRegOperands(I&: CopyInst, TII, TRI, RBI);
4247 ++LaneIdx;
4248 }
4249
4250 // Separately constrain the first copy's destination. Because of the
4251 // limitation in constrainOperandRegClass, we can't guarantee that this will
4252 // actually be constrained. So, do it ourselves using the second operand.
4253 const TargetRegisterClass *RC =
4254 MRI.getRegClassOrNull(Reg: I.getOperand(i: 1).getReg());
4255 if (!RC) {
4256 LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n");
4257 return false;
4258 }
4259
4260 RBI.constrainGenericRegister(Reg: CopyTo, RC: *RC, MRI);
4261 I.eraseFromParent();
4262 return true;
4263}
4264
4265bool AArch64InstructionSelector::selectConcatVectors(
4266 MachineInstr &I, MachineRegisterInfo &MRI) {
4267 assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
4268 "Unexpected opcode");
4269 Register Dst = I.getOperand(i: 0).getReg();
4270 Register Op1 = I.getOperand(i: 1).getReg();
4271 Register Op2 = I.getOperand(i: 2).getReg();
4272 MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIRBuilder&: MIB);
4273 if (!ConcatMI)
4274 return false;
4275 I.eraseFromParent();
4276 return true;
4277}
4278
4279unsigned
4280AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal,
4281 MachineFunction &MF) const {
4282 Type *CPTy = CPVal->getType();
4283 Align Alignment = MF.getDataLayout().getPrefTypeAlign(Ty: CPTy);
4284
4285 MachineConstantPool *MCP = MF.getConstantPool();
4286 return MCP->getConstantPoolIndex(C: CPVal, Alignment);
4287}
4288
4289MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
4290 const Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
4291 const TargetRegisterClass *RC;
4292 unsigned Opc;
4293 bool IsTiny = TM.getCodeModel() == CodeModel::Tiny;
4294 unsigned Size = MIRBuilder.getDataLayout().getTypeStoreSize(Ty: CPVal->getType());
4295 switch (Size) {
4296 case 16:
4297 RC = &AArch64::FPR128RegClass;
4298 Opc = IsTiny ? AArch64::LDRQl : AArch64::LDRQui;
4299 break;
4300 case 8:
4301 RC = &AArch64::FPR64RegClass;
4302 Opc = IsTiny ? AArch64::LDRDl : AArch64::LDRDui;
4303 break;
4304 case 4:
4305 RC = &AArch64::FPR32RegClass;
4306 Opc = IsTiny ? AArch64::LDRSl : AArch64::LDRSui;
4307 break;
4308 case 2:
4309 RC = &AArch64::FPR16RegClass;
4310 Opc = AArch64::LDRHui;
4311 break;
4312 default:
4313 LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
4314 << *CPVal->getType());
4315 return nullptr;
4316 }
4317
4318 MachineInstr *LoadMI = nullptr;
4319 auto &MF = MIRBuilder.getMF();
4320 unsigned CPIdx = emitConstantPoolEntry(CPVal, MF);
4321 if (IsTiny && (Size == 16 || Size == 8 || Size == 4)) {
4322 // Use load(literal) for tiny code model.
4323 LoadMI = &*MIRBuilder.buildInstr(Opc, DstOps: {RC}, SrcOps: {}).addConstantPoolIndex(Idx: CPIdx);
4324 } else {
4325 auto Adrp =
4326 MIRBuilder.buildInstr(Opc: AArch64::ADRP, DstOps: {&AArch64::GPR64RegClass}, SrcOps: {})
4327 .addConstantPoolIndex(Idx: CPIdx, Offset: 0, TargetFlags: AArch64II::MO_PAGE);
4328
4329 LoadMI = &*MIRBuilder.buildInstr(Opc, DstOps: {RC}, SrcOps: {Adrp})
4330 .addConstantPoolIndex(
4331 Idx: CPIdx, Offset: 0, TargetFlags: AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4332
4333 constrainSelectedInstRegOperands(I&: *Adrp, TII, TRI, RBI);
4334 }
4335
4336 MachinePointerInfo PtrInfo = MachinePointerInfo::getConstantPool(MF);
4337 LoadMI->addMemOperand(MF, MO: MF.getMachineMemOperand(PtrInfo,
4338 F: MachineMemOperand::MOLoad,
4339 Size, BaseAlignment: Align(Size)));
4340 constrainSelectedInstRegOperands(I&: *LoadMI, TII, TRI, RBI);
4341 return LoadMI;
4342}
4343
4344/// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
4345/// size and RB.
4346static std::pair<unsigned, unsigned>
4347getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
4348 unsigned Opc, SubregIdx;
4349 if (RB.getID() == AArch64::GPRRegBankID) {
4350 if (EltSize == 8) {
4351 Opc = AArch64::INSvi8gpr;
4352 SubregIdx = AArch64::bsub;
4353 } else if (EltSize == 16) {
4354 Opc = AArch64::INSvi16gpr;
4355 SubregIdx = AArch64::ssub;
4356 } else if (EltSize == 32) {
4357 Opc = AArch64::INSvi32gpr;
4358 SubregIdx = AArch64::ssub;
4359 } else if (EltSize == 64) {
4360 Opc = AArch64::INSvi64gpr;
4361 SubregIdx = AArch64::dsub;
4362 } else {
4363 llvm_unreachable("invalid elt size!");
4364 }
4365 } else {
4366 if (EltSize == 8) {
4367 Opc = AArch64::INSvi8lane;
4368 SubregIdx = AArch64::bsub;
4369 } else if (EltSize == 16) {
4370 Opc = AArch64::INSvi16lane;
4371 SubregIdx = AArch64::hsub;
4372 } else if (EltSize == 32) {
4373 Opc = AArch64::INSvi32lane;
4374 SubregIdx = AArch64::ssub;
4375 } else if (EltSize == 64) {
4376 Opc = AArch64::INSvi64lane;
4377 SubregIdx = AArch64::dsub;
4378 } else {
4379 llvm_unreachable("invalid elt size!");
4380 }
4381 }
4382 return std::make_pair(x&: Opc, y&: SubregIdx);
4383}
4384
4385MachineInstr *AArch64InstructionSelector::emitInstr(
4386 unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
4387 std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder,
4388 const ComplexRendererFns &RenderFns) const {
4389 assert(Opcode && "Expected an opcode?");
4390 assert(!isPreISelGenericOpcode(Opcode) &&
4391 "Function should only be used to produce selected instructions!");
4392 auto MI = MIRBuilder.buildInstr(Opc: Opcode, DstOps, SrcOps);
4393 if (RenderFns)
4394 for (auto &Fn : *RenderFns)
4395 Fn(MI);
4396 constrainSelectedInstRegOperands(I&: *MI, TII, TRI, RBI);
4397 return &*MI;
4398}
4399
4400MachineInstr *AArch64InstructionSelector::emitAddSub(
4401 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
4402 Register Dst, MachineOperand &LHS, MachineOperand &RHS,
4403 MachineIRBuilder &MIRBuilder) const {
4404 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4405 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4406 auto Ty = MRI.getType(Reg: LHS.getReg());
4407 assert(!Ty.isVector() && "Expected a scalar or pointer?");
4408 unsigned Size = Ty.getSizeInBits();
4409 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only");
4410 bool Is32Bit = Size == 32;
4411
4412 // INSTRri form with positive arithmetic immediate.
4413 if (auto Fns = selectArithImmed(Root&: RHS))
4414 return emitInstr(Opcode: AddrModeAndSizeToOpcode[0][Is32Bit], DstOps: {Dst}, SrcOps: {LHS},
4415 MIRBuilder, RenderFns: Fns);
4416
4417 // INSTRri form with negative arithmetic immediate.
4418 if (auto Fns = selectNegArithImmed(Root&: RHS))
4419 return emitInstr(Opcode: AddrModeAndSizeToOpcode[3][Is32Bit], DstOps: {Dst}, SrcOps: {LHS},
4420 MIRBuilder, RenderFns: Fns);
4421
4422 // INSTRrx form.
4423 if (auto Fns = selectArithExtendedRegister(Root&: RHS))
4424 return emitInstr(Opcode: AddrModeAndSizeToOpcode[4][Is32Bit], DstOps: {Dst}, SrcOps: {LHS},
4425 MIRBuilder, RenderFns: Fns);
4426
4427 // INSTRrs form.
4428 if (auto Fns = selectShiftedRegister(Root&: RHS))
4429 return emitInstr(Opcode: AddrModeAndSizeToOpcode[1][Is32Bit], DstOps: {Dst}, SrcOps: {LHS},
4430 MIRBuilder, RenderFns: Fns);
4431 return emitInstr(Opcode: AddrModeAndSizeToOpcode[2][Is32Bit], DstOps: {Dst}, SrcOps: {LHS, RHS},
4432 MIRBuilder);
4433}
4434
4435MachineInstr *
4436AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
4437 MachineOperand &RHS,
4438 MachineIRBuilder &MIRBuilder) const {
4439 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4440 ._M_elems: {{AArch64::ADDXri, AArch64::ADDWri},
4441 {AArch64::ADDXrs, AArch64::ADDWrs},
4442 {AArch64::ADDXrr, AArch64::ADDWrr},
4443 {AArch64::SUBXri, AArch64::SUBWri},
4444 {AArch64::ADDXrx, AArch64::ADDWrx}}};
4445 return emitAddSub(AddrModeAndSizeToOpcode: OpcTable, Dst: DefReg, LHS, RHS, MIRBuilder);
4446}
4447
4448MachineInstr *
4449AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS,
4450 MachineOperand &RHS,
4451 MachineIRBuilder &MIRBuilder) const {
4452 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4453 ._M_elems: {{AArch64::ADDSXri, AArch64::ADDSWri},
4454 {AArch64::ADDSXrs, AArch64::ADDSWrs},
4455 {AArch64::ADDSXrr, AArch64::ADDSWrr},
4456 {AArch64::SUBSXri, AArch64::SUBSWri},
4457 {AArch64::ADDSXrx, AArch64::ADDSWrx}}};
4458 return emitAddSub(AddrModeAndSizeToOpcode: OpcTable, Dst, LHS, RHS, MIRBuilder);
4459}
4460
4461MachineInstr *
4462AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS,
4463 MachineOperand &RHS,
4464 MachineIRBuilder &MIRBuilder) const {
4465 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4466 ._M_elems: {{AArch64::SUBSXri, AArch64::SUBSWri},
4467 {AArch64::SUBSXrs, AArch64::SUBSWrs},
4468 {AArch64::SUBSXrr, AArch64::SUBSWrr},
4469 {AArch64::ADDSXri, AArch64::ADDSWri},
4470 {AArch64::SUBSXrx, AArch64::SUBSWrx}}};
4471 return emitAddSub(AddrModeAndSizeToOpcode: OpcTable, Dst, LHS, RHS, MIRBuilder);
4472}
4473
4474MachineInstr *
4475AArch64InstructionSelector::emitADCS(Register Dst, MachineOperand &LHS,
4476 MachineOperand &RHS,
4477 MachineIRBuilder &MIRBuilder) const {
4478 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4479 MachineRegisterInfo *MRI = MIRBuilder.getMRI();
4480 bool Is32Bit = (MRI->getType(Reg: LHS.getReg()).getSizeInBits() == 32);
4481 static const unsigned OpcTable[2] = {AArch64::ADCSXr, AArch64::ADCSWr};
4482 return emitInstr(Opcode: OpcTable[Is32Bit], DstOps: {Dst}, SrcOps: {LHS, RHS}, MIRBuilder);
4483}
4484
4485MachineInstr *
4486AArch64InstructionSelector::emitSBCS(Register Dst, MachineOperand &LHS,
4487 MachineOperand &RHS,
4488 MachineIRBuilder &MIRBuilder) const {
4489 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4490 MachineRegisterInfo *MRI = MIRBuilder.getMRI();
4491 bool Is32Bit = (MRI->getType(Reg: LHS.getReg()).getSizeInBits() == 32);
4492 static const unsigned OpcTable[2] = {AArch64::SBCSXr, AArch64::SBCSWr};
4493 return emitInstr(Opcode: OpcTable[Is32Bit], DstOps: {Dst}, SrcOps: {LHS, RHS}, MIRBuilder);
4494}
4495
4496MachineInstr *
4497AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
4498 MachineIRBuilder &MIRBuilder) const {
4499 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4500 bool Is32Bit = (MRI.getType(Reg: LHS.getReg()).getSizeInBits() == 32);
4501 auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
4502 return emitADDS(Dst: MRI.createVirtualRegister(RegClass: RC), LHS, RHS, MIRBuilder);
4503}
4504
4505MachineInstr *
4506AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS,
4507 MachineIRBuilder &MIRBuilder) const {
4508 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4509 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4510 LLT Ty = MRI.getType(Reg: LHS.getReg());
4511 unsigned RegSize = Ty.getSizeInBits();
4512 bool Is32Bit = (RegSize == 32);
4513 const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri},
4514 {AArch64::ANDSXrs, AArch64::ANDSWrs},
4515 {AArch64::ANDSXrr, AArch64::ANDSWrr}};
4516 // ANDS needs a logical immediate for its immediate form. Check if we can
4517 // fold one in.
4518 if (auto ValAndVReg = getIConstantVRegValWithLookThrough(VReg: RHS.getReg(), MRI)) {
4519 int64_t Imm = ValAndVReg->Value.getSExtValue();
4520
4521 if (AArch64_AM::isLogicalImmediate(imm: Imm, regSize: RegSize)) {
4522 auto TstMI = MIRBuilder.buildInstr(Opc: OpcTable[0][Is32Bit], DstOps: {Ty}, SrcOps: {LHS});
4523 TstMI.addImm(Val: AArch64_AM::encodeLogicalImmediate(imm: Imm, regSize: RegSize));
4524 constrainSelectedInstRegOperands(I&: *TstMI, TII, TRI, RBI);
4525 return &*TstMI;
4526 }
4527 }
4528
4529 if (auto Fns = selectLogicalShiftedRegister(Root&: RHS))
4530 return emitInstr(Opcode: OpcTable[1][Is32Bit], DstOps: {Ty}, SrcOps: {LHS}, MIRBuilder, RenderFns: Fns);
4531 return emitInstr(Opcode: OpcTable[2][Is32Bit], DstOps: {Ty}, SrcOps: {LHS, RHS}, MIRBuilder);
4532}
4533
4534MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
4535 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
4536 MachineIRBuilder &MIRBuilder) const {
4537 assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
4538 assert(Predicate.isPredicate() && "Expected predicate?");
4539 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4540 LLT CmpTy = MRI.getType(Reg: LHS.getReg());
4541 assert(!CmpTy.isVector() && "Expected scalar or pointer");
4542 unsigned Size = CmpTy.getSizeInBits();
4543 (void)Size;
4544 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?");
4545 // Fold the compare into a cmn or tst if possible.
4546 if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))
4547 return FoldCmp;
4548 auto Dst = MRI.cloneVirtualRegister(VReg: LHS.getReg());
4549 return emitSUBS(Dst, LHS, RHS, MIRBuilder);
4550}
4551
4552MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
4553 Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const {
4554 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4555#ifndef NDEBUG
4556 LLT Ty = MRI.getType(Dst);
4557 assert(!Ty.isVector() && Ty.getSizeInBits() == 32 &&
4558 "Expected a 32-bit scalar register?");
4559#endif
4560 const Register ZReg = AArch64::WZR;
4561 AArch64CC::CondCode CC1, CC2;
4562 changeFCMPPredToAArch64CC(P: Pred, CondCode&: CC1, CondCode2&: CC2);
4563 auto InvCC1 = AArch64CC::getInvertedCondCode(Code: CC1);
4564 if (CC2 == AArch64CC::AL)
4565 return emitCSINC(/*Dst=*/Dst, /*Src1=*/ZReg, /*Src2=*/ZReg, Pred: InvCC1,
4566 MIRBuilder);
4567 const TargetRegisterClass *RC = &AArch64::GPR32RegClass;
4568 Register Def1Reg = MRI.createVirtualRegister(RegClass: RC);
4569 Register Def2Reg = MRI.createVirtualRegister(RegClass: RC);
4570 auto InvCC2 = AArch64CC::getInvertedCondCode(Code: CC2);
4571 emitCSINC(/*Dst=*/Def1Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, Pred: InvCC1, MIRBuilder);
4572 emitCSINC(/*Dst=*/Def2Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, Pred: InvCC2, MIRBuilder);
4573 auto OrMI = MIRBuilder.buildInstr(Opc: AArch64::ORRWrr, DstOps: {Dst}, SrcOps: {Def1Reg, Def2Reg});
4574 constrainSelectedInstRegOperands(I&: *OrMI, TII, TRI, RBI);
4575 return &*OrMI;
4576}
4577
4578MachineInstr *AArch64InstructionSelector::emitFPCompare(
4579 Register LHS, Register RHS, MachineIRBuilder &MIRBuilder,
4580 std::optional<CmpInst::Predicate> Pred) const {
4581 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4582 LLT Ty = MRI.getType(Reg: LHS);
4583 if (Ty.isVector())
4584 return nullptr;
4585 unsigned OpSize = Ty.getSizeInBits();
4586 assert(OpSize == 16 || OpSize == 32 || OpSize == 64);
4587
4588 // If this is a compare against +0.0, then we don't have
4589 // to explicitly materialize a constant.
4590 const ConstantFP *FPImm = getConstantFPVRegVal(VReg: RHS, MRI);
4591 bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
4592
4593 auto IsEqualityPred = [](CmpInst::Predicate P) {
4594 return P == CmpInst::FCMP_OEQ || P == CmpInst::FCMP_ONE ||
4595 P == CmpInst::FCMP_UEQ || P == CmpInst::FCMP_UNE;
4596 };
4597 if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) {
4598 // Try commutating the operands.
4599 const ConstantFP *LHSImm = getConstantFPVRegVal(VReg: LHS, MRI);
4600 if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) {
4601 ShouldUseImm = true;
4602 std::swap(a&: LHS, b&: RHS);
4603 }
4604 }
4605 unsigned CmpOpcTbl[2][3] = {
4606 {AArch64::FCMPHrr, AArch64::FCMPSrr, AArch64::FCMPDrr},
4607 {AArch64::FCMPHri, AArch64::FCMPSri, AArch64::FCMPDri}};
4608 unsigned CmpOpc =
4609 CmpOpcTbl[ShouldUseImm][OpSize == 16 ? 0 : (OpSize == 32 ? 1 : 2)];
4610
4611 // Partially build the compare. Decide if we need to add a use for the
4612 // third operand based off whether or not we're comparing against 0.0.
4613 auto CmpMI = MIRBuilder.buildInstr(Opcode: CmpOpc).addUse(RegNo: LHS);
4614 CmpMI.setMIFlags(MachineInstr::NoFPExcept);
4615 if (!ShouldUseImm)
4616 CmpMI.addUse(RegNo: RHS);
4617 constrainSelectedInstRegOperands(I&: *CmpMI, TII, TRI, RBI);
4618 return &*CmpMI;
4619}
4620
4621MachineInstr *AArch64InstructionSelector::emitVectorConcat(
4622 std::optional<Register> Dst, Register Op1, Register Op2,
4623 MachineIRBuilder &MIRBuilder) const {
4624 // We implement a vector concat by:
4625 // 1. Use scalar_to_vector to insert the lower vector into the larger dest
4626 // 2. Insert the upper vector into the destination's upper element
4627 // TODO: some of this code is common with G_BUILD_VECTOR handling.
4628 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4629
4630 const LLT Op1Ty = MRI.getType(Reg: Op1);
4631 const LLT Op2Ty = MRI.getType(Reg: Op2);
4632
4633 if (Op1Ty != Op2Ty) {
4634 LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys");
4635 return nullptr;
4636 }
4637 assert(Op1Ty.isVector() && "Expected a vector for vector concat");
4638
4639 if (Op1Ty.getSizeInBits() >= 128) {
4640 LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors");
4641 return nullptr;
4642 }
4643
4644 // At the moment we just support 64 bit vector concats.
4645 if (Op1Ty.getSizeInBits() != 64) {
4646 LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors");
4647 return nullptr;
4648 }
4649
4650 const LLT ScalarTy = LLT::scalar(SizeInBits: Op1Ty.getSizeInBits());
4651 const RegisterBank &FPRBank = *RBI.getRegBank(Reg: Op1, MRI, TRI);
4652 const TargetRegisterClass *DstRC =
4653 getRegClassForTypeOnBank(Ty: Op1Ty.multiplyElements(Factor: 2), RB: FPRBank);
4654
4655 MachineInstr *WidenedOp1 =
4656 emitScalarToVector(EltSize: ScalarTy.getSizeInBits(), DstRC, Scalar: Op1, MIRBuilder);
4657 MachineInstr *WidenedOp2 =
4658 emitScalarToVector(EltSize: ScalarTy.getSizeInBits(), DstRC, Scalar: Op2, MIRBuilder);
4659 if (!WidenedOp1 || !WidenedOp2) {
4660 LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value");
4661 return nullptr;
4662 }
4663
4664 // Now do the insert of the upper element.
4665 unsigned InsertOpc, InsSubRegIdx;
4666 std::tie(args&: InsertOpc, args&: InsSubRegIdx) =
4667 getInsertVecEltOpInfo(RB: FPRBank, EltSize: ScalarTy.getSizeInBits());
4668
4669 if (!Dst)
4670 Dst = MRI.createVirtualRegister(RegClass: DstRC);
4671 auto InsElt =
4672 MIRBuilder
4673 .buildInstr(Opc: InsertOpc, DstOps: {*Dst}, SrcOps: {WidenedOp1->getOperand(i: 0).getReg()})
4674 .addImm(Val: 1) /* Lane index */
4675 .addUse(RegNo: WidenedOp2->getOperand(i: 0).getReg())
4676 .addImm(Val: 0);
4677 constrainSelectedInstRegOperands(I&: *InsElt, TII, TRI, RBI);
4678 return &*InsElt;
4679}
4680
4681MachineInstr *
4682AArch64InstructionSelector::emitCSINC(Register Dst, Register Src1,
4683 Register Src2, AArch64CC::CondCode Pred,
4684 MachineIRBuilder &MIRBuilder) const {
4685 auto &MRI = *MIRBuilder.getMRI();
4686 const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg: Dst);
4687 // If we used a register class, then this won't necessarily have an LLT.
4688 // Compute the size based off whether or not we have a class or bank.
4689 unsigned Size;
4690 if (const auto *RC = dyn_cast<const TargetRegisterClass *>(Val: RegClassOrBank))
4691 Size = TRI.getRegSizeInBits(RC: *RC);
4692 else
4693 Size = MRI.getType(Reg: Dst).getSizeInBits();
4694 // Some opcodes use s1.
4695 assert(Size <= 64 && "Expected 64 bits or less only!");
4696 static const unsigned OpcTable[2] = {AArch64::CSINCWr, AArch64::CSINCXr};
4697 unsigned Opc = OpcTable[Size == 64];
4698 auto CSINC = MIRBuilder.buildInstr(Opc, DstOps: {Dst}, SrcOps: {Src1, Src2}).addImm(Val: Pred);
4699 constrainSelectedInstRegOperands(I&: *CSINC, TII, TRI, RBI);
4700 return &*CSINC;
4701}
4702
4703MachineInstr *AArch64InstructionSelector::emitCarryIn(MachineInstr &I,
4704 Register CarryReg) {
4705 MachineRegisterInfo *MRI = MIB.getMRI();
4706 unsigned Opcode = I.getOpcode();
4707
4708 // If the instruction is a SUB, we need to negate the carry,
4709 // because borrowing is indicated by carry-flag == 0.
4710 bool NeedsNegatedCarry =
4711 (Opcode == TargetOpcode::G_USUBE || Opcode == TargetOpcode::G_SSUBE);
4712
4713 // If the previous instruction will already produce the correct carry, do not
4714 // emit a carry generating instruction. E.g. for G_UADDE/G_USUBE sequences
4715 // generated during legalization of wide add/sub. This optimization depends on
4716 // these sequences not being interrupted by other instructions.
4717 // We have to select the previous instruction before the carry-using
4718 // instruction is deleted by the calling function, otherwise the previous
4719 // instruction might become dead and would get deleted.
4720 MachineInstr *SrcMI = MRI->getVRegDef(Reg: CarryReg);
4721 if (SrcMI == I.getPrevNode()) {
4722 if (auto *CarrySrcMI = dyn_cast<GAddSubCarryOut>(Val: SrcMI)) {
4723 bool ProducesNegatedCarry = CarrySrcMI->isSub();
4724 if (NeedsNegatedCarry == ProducesNegatedCarry &&
4725 CarrySrcMI->isUnsigned() &&
4726 CarrySrcMI->getCarryOutReg() == CarryReg &&
4727 selectAndRestoreState(I&: *SrcMI))
4728 return nullptr;
4729 }
4730 }
4731
4732 Register DeadReg = MRI->createVirtualRegister(RegClass: &AArch64::GPR32RegClass);
4733
4734 if (NeedsNegatedCarry) {
4735 // (0 - Carry) sets !C in NZCV when Carry == 1
4736 Register ZReg = AArch64::WZR;
4737 return emitInstr(Opcode: AArch64::SUBSWrr, DstOps: {DeadReg}, SrcOps: {ZReg, CarryReg}, MIRBuilder&: MIB);
4738 }
4739
4740 // (Carry - 1) sets !C in NZCV when Carry == 0
4741 auto Fns = select12BitValueWithLeftShift(Immed: 1);
4742 return emitInstr(Opcode: AArch64::SUBSWri, DstOps: {DeadReg}, SrcOps: {CarryReg}, MIRBuilder&: MIB, RenderFns: Fns);
4743}
4744
4745bool AArch64InstructionSelector::selectOverflowOp(MachineInstr &I,
4746 MachineRegisterInfo &MRI) {
4747 auto &CarryMI = cast<GAddSubCarryOut>(Val&: I);
4748
4749 if (auto *CarryInMI = dyn_cast<GAddSubCarryInOut>(Val: &I)) {
4750 // Set NZCV carry according to carry-in VReg
4751 emitCarryIn(I, CarryReg: CarryInMI->getCarryInReg());
4752 }
4753
4754 // Emit the operation and get the correct condition code.
4755 auto OpAndCC = emitOverflowOp(Opcode: I.getOpcode(), Dst: CarryMI.getDstReg(),
4756 LHS&: CarryMI.getLHS(), RHS&: CarryMI.getRHS(), MIRBuilder&: MIB);
4757
4758 Register CarryOutReg = CarryMI.getCarryOutReg();
4759
4760 // Don't convert carry-out to VReg if it is never used
4761 if (!MRI.use_nodbg_empty(RegNo: CarryOutReg)) {
4762 // Now, put the overflow result in the register given by the first operand
4763 // to the overflow op. CSINC increments the result when the predicate is
4764 // false, so to get the increment when it's true, we need to use the
4765 // inverse. In this case, we want to increment when carry is set.
4766 Register ZReg = AArch64::WZR;
4767 emitCSINC(/*Dst=*/CarryOutReg, /*Src1=*/ZReg, /*Src2=*/ZReg,
4768 Pred: getInvertedCondCode(Code: OpAndCC.second), MIRBuilder&: MIB);
4769 }
4770
4771 I.eraseFromParent();
4772 return true;
4773}
4774
4775std::pair<MachineInstr *, AArch64CC::CondCode>
4776AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst,
4777 MachineOperand &LHS,
4778 MachineOperand &RHS,
4779 MachineIRBuilder &MIRBuilder) const {
4780 switch (Opcode) {
4781 default:
4782 llvm_unreachable("Unexpected opcode!");
4783 case TargetOpcode::G_SADDO:
4784 return std::make_pair(x: emitADDS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::VS);
4785 case TargetOpcode::G_UADDO:
4786 return std::make_pair(x: emitADDS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::HS);
4787 case TargetOpcode::G_SSUBO:
4788 return std::make_pair(x: emitSUBS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::VS);
4789 case TargetOpcode::G_USUBO:
4790 return std::make_pair(x: emitSUBS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::LO);
4791 case TargetOpcode::G_SADDE:
4792 return std::make_pair(x: emitADCS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::VS);
4793 case TargetOpcode::G_UADDE:
4794 return std::make_pair(x: emitADCS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::HS);
4795 case TargetOpcode::G_SSUBE:
4796 return std::make_pair(x: emitSBCS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::VS);
4797 case TargetOpcode::G_USUBE:
4798 return std::make_pair(x: emitSBCS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::LO);
4799 }
4800}
4801
4802/// Returns true if @p Val is a tree of AND/OR/CMP operations that can be
4803/// expressed as a conjunction.
4804/// \param CanNegate Set to true if we can negate the whole sub-tree just by
4805/// changing the conditions on the CMP tests.
4806/// (this means we can call emitConjunctionRec() with
4807/// Negate==true on this sub-tree)
4808/// \param MustBeFirst Set to true if this subtree needs to be negated and we
4809/// cannot do the negation naturally. We are required to
4810/// emit the subtree first in this case.
4811/// \param WillNegate Is true if are called when the result of this
4812/// subexpression must be negated. This happens when the
4813/// outer expression is an OR. We can use this fact to know
4814/// that we have a double negation (or (or ...) ...) that
4815/// can be implemented for free.
4816static bool canEmitConjunction(Register Val, bool &CanNegate, bool &MustBeFirst,
4817 bool WillNegate, MachineRegisterInfo &MRI,
4818 unsigned Depth = 0) {
4819 if (!MRI.hasOneNonDBGUse(RegNo: Val))
4820 return false;
4821 MachineInstr *ValDef = MRI.getVRegDef(Reg: Val);
4822 unsigned Opcode = ValDef->getOpcode();
4823 if (isa<GAnyCmp>(Val: ValDef)) {
4824 CanNegate = true;
4825 MustBeFirst = false;
4826 return true;
4827 }
4828 // Protect against exponential runtime and stack overflow.
4829 if (Depth > 6)
4830 return false;
4831 if (Opcode == TargetOpcode::G_AND || Opcode == TargetOpcode::G_OR) {
4832 bool IsOR = Opcode == TargetOpcode::G_OR;
4833 Register O0 = ValDef->getOperand(i: 1).getReg();
4834 Register O1 = ValDef->getOperand(i: 2).getReg();
4835 bool CanNegateL;
4836 bool MustBeFirstL;
4837 if (!canEmitConjunction(Val: O0, CanNegate&: CanNegateL, MustBeFirst&: MustBeFirstL, WillNegate: IsOR, MRI, Depth: Depth + 1))
4838 return false;
4839 bool CanNegateR;
4840 bool MustBeFirstR;
4841 if (!canEmitConjunction(Val: O1, CanNegate&: CanNegateR, MustBeFirst&: MustBeFirstR, WillNegate: IsOR, MRI, Depth: Depth + 1))
4842 return false;
4843
4844 if (MustBeFirstL && MustBeFirstR)
4845 return false;
4846
4847 if (IsOR) {
4848 // For an OR expression we need to be able to naturally negate at least
4849 // one side or we cannot do the transformation at all.
4850 if (!CanNegateL && !CanNegateR)
4851 return false;
4852 // If we the result of the OR will be negated and we can naturally negate
4853 // the leaves, then this sub-tree as a whole negates naturally.
4854 CanNegate = WillNegate && CanNegateL && CanNegateR;
4855 // If we cannot naturally negate the whole sub-tree, then this must be
4856 // emitted first.
4857 MustBeFirst = !CanNegate;
4858 } else {
4859 assert(Opcode == TargetOpcode::G_AND && "Must be G_AND");
4860 // We cannot naturally negate an AND operation.
4861 CanNegate = false;
4862 MustBeFirst = MustBeFirstL || MustBeFirstR;
4863 }
4864 return true;
4865 }
4866 return false;
4867}
4868
4869MachineInstr *AArch64InstructionSelector::emitConditionalComparison(
4870 Register LHS, Register RHS, CmpInst::Predicate CC,
4871 AArch64CC::CondCode Predicate, AArch64CC::CondCode OutCC,
4872 MachineIRBuilder &MIB) const {
4873 auto &MRI = *MIB.getMRI();
4874 LLT OpTy = MRI.getType(Reg: LHS);
4875 unsigned CCmpOpc;
4876 std::optional<ValueAndVReg> C;
4877 if (CmpInst::isIntPredicate(P: CC)) {
4878 assert(OpTy.getSizeInBits() == 32 || OpTy.getSizeInBits() == 64);
4879 C = getIConstantVRegValWithLookThrough(VReg: RHS, MRI);
4880 if (!C || C->Value.sgt(RHS: 31) || C->Value.slt(RHS: -31))
4881 CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWr : AArch64::CCMPXr;
4882 else if (C->Value.ule(RHS: 31))
4883 CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWi : AArch64::CCMPXi;
4884 else
4885 CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMNWi : AArch64::CCMNXi;
4886 } else {
4887 assert(OpTy.getSizeInBits() == 16 || OpTy.getSizeInBits() == 32 ||
4888 OpTy.getSizeInBits() == 64);
4889 switch (OpTy.getSizeInBits()) {
4890 case 16:
4891 assert(STI.hasFullFP16() && "Expected Full FP16 for fp16 comparisons");
4892 CCmpOpc = AArch64::FCCMPHrr;
4893 break;
4894 case 32:
4895 CCmpOpc = AArch64::FCCMPSrr;
4896 break;
4897 case 64:
4898 CCmpOpc = AArch64::FCCMPDrr;
4899 break;
4900 default:
4901 return nullptr;
4902 }
4903 }
4904 AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(Code: OutCC);
4905 unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(Code: InvOutCC);
4906 auto CCmp =
4907 MIB.buildInstr(Opc: CCmpOpc, DstOps: {}, SrcOps: {LHS});
4908 if (CCmpOpc == AArch64::CCMPWi || CCmpOpc == AArch64::CCMPXi)
4909 CCmp.addImm(Val: C->Value.getZExtValue());
4910 else if (CCmpOpc == AArch64::CCMNWi || CCmpOpc == AArch64::CCMNXi)
4911 CCmp.addImm(Val: C->Value.abs().getZExtValue());
4912 else
4913 CCmp.addReg(RegNo: RHS);
4914 CCmp.addImm(Val: NZCV).addImm(Val: Predicate);
4915 constrainSelectedInstRegOperands(I&: *CCmp, TII, TRI, RBI);
4916 return &*CCmp;
4917}
4918
4919MachineInstr *AArch64InstructionSelector::emitConjunctionRec(
4920 Register Val, AArch64CC::CondCode &OutCC, bool Negate, Register CCOp,
4921 AArch64CC::CondCode Predicate, MachineIRBuilder &MIB) const {
4922 // We're at a tree leaf, produce a conditional comparison operation.
4923 auto &MRI = *MIB.getMRI();
4924 MachineInstr *ValDef = MRI.getVRegDef(Reg: Val);
4925 unsigned Opcode = ValDef->getOpcode();
4926 if (auto *Cmp = dyn_cast<GAnyCmp>(Val: ValDef)) {
4927 Register LHS = Cmp->getLHSReg();
4928 Register RHS = Cmp->getRHSReg();
4929 CmpInst::Predicate CC = Cmp->getCond();
4930 if (Negate)
4931 CC = CmpInst::getInversePredicate(pred: CC);
4932 if (isa<GICmp>(Val: Cmp)) {
4933 OutCC = changeICMPPredToAArch64CC(P: CC);
4934 } else {
4935 // Handle special FP cases.
4936 AArch64CC::CondCode ExtraCC;
4937 changeFPCCToANDAArch64CC(CC, CondCode&: OutCC, CondCode2&: ExtraCC);
4938 // Some floating point conditions can't be tested with a single condition
4939 // code. Construct an additional comparison in this case.
4940 if (ExtraCC != AArch64CC::AL) {
4941 MachineInstr *ExtraCmp;
4942 if (!CCOp)
4943 ExtraCmp = emitFPCompare(LHS, RHS, MIRBuilder&: MIB, Pred: CC);
4944 else
4945 ExtraCmp =
4946 emitConditionalComparison(LHS, RHS, CC, Predicate, OutCC: ExtraCC, MIB);
4947 CCOp = ExtraCmp->getOperand(i: 0).getReg();
4948 Predicate = ExtraCC;
4949 }
4950 }
4951
4952 // Produce a normal comparison if we are first in the chain
4953 if (!CCOp) {
4954 auto Dst = MRI.cloneVirtualRegister(VReg: LHS);
4955 if (isa<GICmp>(Val: Cmp))
4956 return emitSUBS(Dst, LHS&: Cmp->getOperand(i: 2), RHS&: Cmp->getOperand(i: 3), MIRBuilder&: MIB);
4957 return emitFPCompare(LHS: Cmp->getOperand(i: 2).getReg(),
4958 RHS: Cmp->getOperand(i: 3).getReg(), MIRBuilder&: MIB);
4959 }
4960 // Otherwise produce a ccmp.
4961 return emitConditionalComparison(LHS, RHS, CC, Predicate, OutCC, MIB);
4962 }
4963 assert(MRI.hasOneNonDBGUse(Val) && "Valid conjunction/disjunction tree");
4964
4965 bool IsOR = Opcode == TargetOpcode::G_OR;
4966
4967 Register LHS = ValDef->getOperand(i: 1).getReg();
4968 bool CanNegateL;
4969 bool MustBeFirstL;
4970 bool ValidL = canEmitConjunction(Val: LHS, CanNegate&: CanNegateL, MustBeFirst&: MustBeFirstL, WillNegate: IsOR, MRI);
4971 assert(ValidL && "Valid conjunction/disjunction tree");
4972 (void)ValidL;
4973
4974 Register RHS = ValDef->getOperand(i: 2).getReg();
4975 bool CanNegateR;
4976 bool MustBeFirstR;
4977 bool ValidR = canEmitConjunction(Val: RHS, CanNegate&: CanNegateR, MustBeFirst&: MustBeFirstR, WillNegate: IsOR, MRI);
4978 assert(ValidR && "Valid conjunction/disjunction tree");
4979 (void)ValidR;
4980
4981 // Swap sub-tree that must come first to the right side.
4982 if (MustBeFirstL) {
4983 assert(!MustBeFirstR && "Valid conjunction/disjunction tree");
4984 std::swap(a&: LHS, b&: RHS);
4985 std::swap(a&: CanNegateL, b&: CanNegateR);
4986 std::swap(a&: MustBeFirstL, b&: MustBeFirstR);
4987 }
4988
4989 bool NegateR;
4990 bool NegateAfterR;
4991 bool NegateL;
4992 bool NegateAfterAll;
4993 if (Opcode == TargetOpcode::G_OR) {
4994 // Swap the sub-tree that we can negate naturally to the left.
4995 if (!CanNegateL) {
4996 assert(CanNegateR && "at least one side must be negatable");
4997 assert(!MustBeFirstR && "invalid conjunction/disjunction tree");
4998 assert(!Negate);
4999 std::swap(a&: LHS, b&: RHS);
5000 NegateR = false;
5001 NegateAfterR = true;
5002 } else {
5003 // Negate the left sub-tree if possible, otherwise negate the result.
5004 NegateR = CanNegateR;
5005 NegateAfterR = !CanNegateR;
5006 }
5007 NegateL = true;
5008 NegateAfterAll = !Negate;
5009 } else {
5010 assert(Opcode == TargetOpcode::G_AND &&
5011 "Valid conjunction/disjunction tree");
5012 assert(!Negate && "Valid conjunction/disjunction tree");
5013
5014 NegateL = false;
5015 NegateR = false;
5016 NegateAfterR = false;
5017 NegateAfterAll = false;
5018 }
5019
5020 // Emit sub-trees.
5021 AArch64CC::CondCode RHSCC;
5022 MachineInstr *CmpR =
5023 emitConjunctionRec(Val: RHS, OutCC&: RHSCC, Negate: NegateR, CCOp, Predicate, MIB);
5024 if (NegateAfterR)
5025 RHSCC = AArch64CC::getInvertedCondCode(Code: RHSCC);
5026 MachineInstr *CmpL = emitConjunctionRec(
5027 Val: LHS, OutCC, Negate: NegateL, CCOp: CmpR->getOperand(i: 0).getReg(), Predicate: RHSCC, MIB);
5028 if (NegateAfterAll)
5029 OutCC = AArch64CC::getInvertedCondCode(Code: OutCC);
5030 return CmpL;
5031}
5032
5033MachineInstr *AArch64InstructionSelector::emitConjunction(
5034 Register Val, AArch64CC::CondCode &OutCC, MachineIRBuilder &MIB) const {
5035 bool DummyCanNegate;
5036 bool DummyMustBeFirst;
5037 if (!canEmitConjunction(Val, CanNegate&: DummyCanNegate, MustBeFirst&: DummyMustBeFirst, WillNegate: false,
5038 MRI&: *MIB.getMRI()))
5039 return nullptr;
5040 return emitConjunctionRec(Val, OutCC, Negate: false, CCOp: Register(), Predicate: AArch64CC::AL, MIB);
5041}
5042
5043bool AArch64InstructionSelector::tryOptSelectConjunction(GSelect &SelI,
5044 MachineInstr &CondMI) {
5045 AArch64CC::CondCode AArch64CC;
5046 MachineInstr *ConjMI = emitConjunction(Val: SelI.getCondReg(), OutCC&: AArch64CC, MIB);
5047 if (!ConjMI)
5048 return false;
5049
5050 emitSelect(Dst: SelI.getReg(Idx: 0), True: SelI.getTrueReg(), False: SelI.getFalseReg(), CC: AArch64CC, MIB);
5051 SelI.eraseFromParent();
5052 return true;
5053}
5054
5055bool AArch64InstructionSelector::tryOptSelect(GSelect &I) {
5056 MachineRegisterInfo &MRI = *MIB.getMRI();
5057 // We want to recognize this pattern:
5058 //
5059 // $z = G_FCMP pred, $x, $y
5060 // ...
5061 // $w = G_SELECT $z, $a, $b
5062 //
5063 // Where the value of $z is *only* ever used by the G_SELECT (possibly with
5064 // some copies/truncs in between.)
5065 //
5066 // If we see this, then we can emit something like this:
5067 //
5068 // fcmp $x, $y
5069 // fcsel $w, $a, $b, pred
5070 //
5071 // Rather than emitting both of the rather long sequences in the standard
5072 // G_FCMP/G_SELECT select methods.
5073
5074 // First, check if the condition is defined by a compare.
5075 MachineInstr *CondDef = MRI.getVRegDef(Reg: I.getOperand(i: 1).getReg());
5076
5077 // We can only fold if all of the defs have one use.
5078 Register CondDefReg = CondDef->getOperand(i: 0).getReg();
5079 if (!MRI.hasOneNonDBGUse(RegNo: CondDefReg)) {
5080 // Unless it's another select.
5081 for (const MachineInstr &UI : MRI.use_nodbg_instructions(Reg: CondDefReg)) {
5082 if (CondDef == &UI)
5083 continue;
5084 if (UI.getOpcode() != TargetOpcode::G_SELECT)
5085 return false;
5086 }
5087 }
5088
5089 // Is the condition defined by a compare?
5090 unsigned CondOpc = CondDef->getOpcode();
5091 if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP) {
5092 if (tryOptSelectConjunction(SelI&: I, CondMI&: *CondDef))
5093 return true;
5094 return false;
5095 }
5096
5097 AArch64CC::CondCode CondCode;
5098 if (CondOpc == TargetOpcode::G_ICMP) {
5099 auto Pred =
5100 static_cast<CmpInst::Predicate>(CondDef->getOperand(i: 1).getPredicate());
5101 CondCode = changeICMPPredToAArch64CC(P: Pred);
5102 emitIntegerCompare(LHS&: CondDef->getOperand(i: 2), RHS&: CondDef->getOperand(i: 3),
5103 Predicate&: CondDef->getOperand(i: 1), MIRBuilder&: MIB);
5104 } else {
5105 // Get the condition code for the select.
5106 auto Pred =
5107 static_cast<CmpInst::Predicate>(CondDef->getOperand(i: 1).getPredicate());
5108 AArch64CC::CondCode CondCode2;
5109 changeFCMPPredToAArch64CC(P: Pred, CondCode, CondCode2);
5110
5111 // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
5112 // instructions to emit the comparison.
5113 // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
5114 // unnecessary.
5115 if (CondCode2 != AArch64CC::AL)
5116 return false;
5117
5118 if (!emitFPCompare(LHS: CondDef->getOperand(i: 2).getReg(),
5119 RHS: CondDef->getOperand(i: 3).getReg(), MIRBuilder&: MIB)) {
5120 LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n");
5121 return false;
5122 }
5123 }
5124
5125 // Emit the select.
5126 emitSelect(Dst: I.getOperand(i: 0).getReg(), True: I.getOperand(i: 2).getReg(),
5127 False: I.getOperand(i: 3).getReg(), CC: CondCode, MIB);
5128 I.eraseFromParent();
5129 return true;
5130}
5131
5132MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
5133 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
5134 MachineIRBuilder &MIRBuilder) const {
5135 assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&
5136 "Unexpected MachineOperand");
5137 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
5138 // We want to find this sort of thing:
5139 // x = G_SUB 0, y
5140 // G_ICMP z, x
5141 //
5142 // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.
5143 // e.g:
5144 //
5145 // cmn z, y
5146
5147 // Check if the RHS or LHS of the G_ICMP is defined by a SUB
5148 MachineInstr *LHSDef = getDefIgnoringCopies(Reg: LHS.getReg(), MRI);
5149 MachineInstr *RHSDef = getDefIgnoringCopies(Reg: RHS.getReg(), MRI);
5150 auto P = static_cast<CmpInst::Predicate>(Predicate.getPredicate());
5151 // Given this:
5152 //
5153 // x = G_SUB 0, y
5154 // G_ICMP x, z
5155 //
5156 // Produce this:
5157 //
5158 // cmn y, z
5159 if (isCMN(MaybeSub: LHSDef, Pred: P, MRI))
5160 return emitCMN(LHS&: LHSDef->getOperand(i: 2), RHS, MIRBuilder);
5161
5162 // Same idea here, but with the RHS of the compare instead:
5163 //
5164 // Given this:
5165 //
5166 // x = G_SUB 0, y
5167 // G_ICMP z, x
5168 //
5169 // Produce this:
5170 //
5171 // cmn z, y
5172 if (isCMN(MaybeSub: RHSDef, Pred: P, MRI))
5173 return emitCMN(LHS, RHS&: RHSDef->getOperand(i: 2), MIRBuilder);
5174
5175 // Given this:
5176 //
5177 // z = G_AND x, y
5178 // G_ICMP z, 0
5179 //
5180 // Produce this if the compare is signed:
5181 //
5182 // tst x, y
5183 if (!CmpInst::isUnsigned(predicate: P) && LHSDef &&
5184 LHSDef->getOpcode() == TargetOpcode::G_AND) {
5185 // Make sure that the RHS is 0.
5186 auto ValAndVReg = getIConstantVRegValWithLookThrough(VReg: RHS.getReg(), MRI);
5187 if (!ValAndVReg || ValAndVReg->Value != 0)
5188 return nullptr;
5189
5190 return emitTST(LHS&: LHSDef->getOperand(i: 1),
5191 RHS&: LHSDef->getOperand(i: 2), MIRBuilder);
5192 }
5193
5194 return nullptr;
5195}
5196
5197bool AArch64InstructionSelector::selectShuffleVector(
5198 MachineInstr &I, MachineRegisterInfo &MRI) {
5199 const LLT DstTy = MRI.getType(Reg: I.getOperand(i: 0).getReg());
5200 Register Src1Reg = I.getOperand(i: 1).getReg();
5201 const LLT Src1Ty = MRI.getType(Reg: Src1Reg);
5202 Register Src2Reg = I.getOperand(i: 2).getReg();
5203 const LLT Src2Ty = MRI.getType(Reg: Src2Reg);
5204 ArrayRef<int> Mask = I.getOperand(i: 3).getShuffleMask();
5205
5206 MachineBasicBlock &MBB = *I.getParent();
5207 MachineFunction &MF = *MBB.getParent();
5208 LLVMContext &Ctx = MF.getFunction().getContext();
5209
5210 // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
5211 // it's originated from a <1 x T> type. Those should have been lowered into
5212 // G_BUILD_VECTOR earlier.
5213 if (!Src1Ty.isVector() || !Src2Ty.isVector()) {
5214 LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n");
5215 return false;
5216 }
5217
5218 unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
5219
5220 SmallVector<Constant *, 64> CstIdxs;
5221 for (int Val : Mask) {
5222 // For now, any undef indexes we'll just assume to be 0. This should be
5223 // optimized in future, e.g. to select DUP etc.
5224 Val = Val < 0 ? 0 : Val;
5225 for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
5226 unsigned Offset = Byte + Val * BytesPerElt;
5227 CstIdxs.emplace_back(Args: ConstantInt::get(Ty: Type::getInt8Ty(C&: Ctx), V: Offset));
5228 }
5229 }
5230
5231 // Use a constant pool to load the index vector for TBL.
5232 Constant *CPVal = ConstantVector::get(V: CstIdxs);
5233 MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIRBuilder&: MIB);
5234 if (!IndexLoad) {
5235 LLVM_DEBUG(dbgs() << "Could not load from a constant pool");
5236 return false;
5237 }
5238
5239 if (DstTy.getSizeInBits() != 128) {
5240 assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty");
5241 // This case can be done with TBL1.
5242 MachineInstr *Concat =
5243 emitVectorConcat(Dst: std::nullopt, Op1: Src1Reg, Op2: Src2Reg, MIRBuilder&: MIB);
5244 if (!Concat) {
5245 LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1");
5246 return false;
5247 }
5248
5249 // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
5250 IndexLoad = emitScalarToVector(EltSize: 64, DstRC: &AArch64::FPR128RegClass,
5251 Scalar: IndexLoad->getOperand(i: 0).getReg(), MIRBuilder&: MIB);
5252
5253 auto TBL1 = MIB.buildInstr(
5254 Opc: AArch64::TBLv16i8One, DstOps: {&AArch64::FPR128RegClass},
5255 SrcOps: {Concat->getOperand(i: 0).getReg(), IndexLoad->getOperand(i: 0).getReg()});
5256 constrainSelectedInstRegOperands(I&: *TBL1, TII, TRI, RBI);
5257
5258 auto Copy =
5259 MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {I.getOperand(i: 0).getReg()}, SrcOps: {})
5260 .addReg(RegNo: TBL1.getReg(Idx: 0), flags: 0, SubReg: AArch64::dsub);
5261 RBI.constrainGenericRegister(Reg: Copy.getReg(Idx: 0), RC: AArch64::FPR64RegClass, MRI);
5262 I.eraseFromParent();
5263 return true;
5264 }
5265
5266 // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
5267 // Q registers for regalloc.
5268 SmallVector<Register, 2> Regs = {Src1Reg, Src2Reg};
5269 auto RegSeq = createQTuple(Regs, MIB);
5270 auto TBL2 = MIB.buildInstr(Opc: AArch64::TBLv16i8Two, DstOps: {I.getOperand(i: 0)},
5271 SrcOps: {RegSeq, IndexLoad->getOperand(i: 0)});
5272 constrainSelectedInstRegOperands(I&: *TBL2, TII, TRI, RBI);
5273 I.eraseFromParent();
5274 return true;
5275}
5276
5277MachineInstr *AArch64InstructionSelector::emitLaneInsert(
5278 std::optional<Register> DstReg, Register SrcReg, Register EltReg,
5279 unsigned LaneIdx, const RegisterBank &RB,
5280 MachineIRBuilder &MIRBuilder) const {
5281 MachineInstr *InsElt = nullptr;
5282 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
5283 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
5284
5285 // Create a register to define with the insert if one wasn't passed in.
5286 if (!DstReg)
5287 DstReg = MRI.createVirtualRegister(RegClass: DstRC);
5288
5289 unsigned EltSize = MRI.getType(Reg: EltReg).getSizeInBits();
5290 unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
5291
5292 if (RB.getID() == AArch64::FPRRegBankID) {
5293 auto InsSub = emitScalarToVector(EltSize, DstRC, Scalar: EltReg, MIRBuilder);
5294 InsElt = MIRBuilder.buildInstr(Opc, DstOps: {*DstReg}, SrcOps: {SrcReg})
5295 .addImm(Val: LaneIdx)
5296 .addUse(RegNo: InsSub->getOperand(i: 0).getReg())
5297 .addImm(Val: 0);
5298 } else {
5299 InsElt = MIRBuilder.buildInstr(Opc, DstOps: {*DstReg}, SrcOps: {SrcReg})
5300 .addImm(Val: LaneIdx)
5301 .addUse(RegNo: EltReg);
5302 }
5303
5304 constrainSelectedInstRegOperands(I&: *InsElt, TII, TRI, RBI);
5305 return InsElt;
5306}
5307
5308bool AArch64InstructionSelector::selectUSMovFromExtend(
5309 MachineInstr &MI, MachineRegisterInfo &MRI) {
5310 if (MI.getOpcode() != TargetOpcode::G_SEXT &&
5311 MI.getOpcode() != TargetOpcode::G_ZEXT &&
5312 MI.getOpcode() != TargetOpcode::G_ANYEXT)
5313 return false;
5314 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SEXT;
5315 const Register DefReg = MI.getOperand(i: 0).getReg();
5316 const LLT DstTy = MRI.getType(Reg: DefReg);
5317 unsigned DstSize = DstTy.getSizeInBits();
5318
5319 if (DstSize != 32 && DstSize != 64)
5320 return false;
5321
5322 MachineInstr *Extract = getOpcodeDef(Opcode: TargetOpcode::G_EXTRACT_VECTOR_ELT,
5323 Reg: MI.getOperand(i: 1).getReg(), MRI);
5324 int64_t Lane;
5325 if (!Extract || !mi_match(R: Extract->getOperand(i: 2).getReg(), MRI, P: m_ICst(Cst&: Lane)))
5326 return false;
5327 Register Src0 = Extract->getOperand(i: 1).getReg();
5328
5329 const LLT VecTy = MRI.getType(Reg: Src0);
5330 if (VecTy.isScalableVector())
5331 return false;
5332
5333 if (VecTy.getSizeInBits() != 128) {
5334 const MachineInstr *ScalarToVector = emitScalarToVector(
5335 EltSize: VecTy.getSizeInBits(), DstRC: &AArch64::FPR128RegClass, Scalar: Src0, MIRBuilder&: MIB);
5336 assert(ScalarToVector && "Didn't expect emitScalarToVector to fail!");
5337 Src0 = ScalarToVector->getOperand(i: 0).getReg();
5338 }
5339
5340 unsigned Opcode;
5341 if (DstSize == 64 && VecTy.getScalarSizeInBits() == 32)
5342 Opcode = IsSigned ? AArch64::SMOVvi32to64 : AArch64::UMOVvi32;
5343 else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 16)
5344 Opcode = IsSigned ? AArch64::SMOVvi16to64 : AArch64::UMOVvi16;
5345 else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 8)
5346 Opcode = IsSigned ? AArch64::SMOVvi8to64 : AArch64::UMOVvi8;
5347 else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 16)
5348 Opcode = IsSigned ? AArch64::SMOVvi16to32 : AArch64::UMOVvi16;
5349 else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 8)
5350 Opcode = IsSigned ? AArch64::SMOVvi8to32 : AArch64::UMOVvi8;
5351 else
5352 llvm_unreachable("Unexpected type combo for S/UMov!");
5353
5354 // We may need to generate one of these, depending on the type and sign of the
5355 // input:
5356 // DstReg = SMOV Src0, Lane;
5357 // NewReg = UMOV Src0, Lane; DstReg = SUBREG_TO_REG NewReg, sub_32;
5358 MachineInstr *ExtI = nullptr;
5359 if (DstSize == 64 && !IsSigned) {
5360 Register NewReg = MRI.createVirtualRegister(RegClass: &AArch64::GPR32RegClass);
5361 MIB.buildInstr(Opc: Opcode, DstOps: {NewReg}, SrcOps: {Src0}).addImm(Val: Lane);
5362 ExtI = MIB.buildInstr(Opc: AArch64::SUBREG_TO_REG, DstOps: {DefReg}, SrcOps: {})
5363 .addImm(Val: 0)
5364 .addUse(RegNo: NewReg)
5365 .addImm(Val: AArch64::sub_32);
5366 RBI.constrainGenericRegister(Reg: DefReg, RC: AArch64::GPR64RegClass, MRI);
5367 } else
5368 ExtI = MIB.buildInstr(Opc: Opcode, DstOps: {DefReg}, SrcOps: {Src0}).addImm(Val: Lane);
5369
5370 constrainSelectedInstRegOperands(I&: *ExtI, TII, TRI, RBI);
5371 MI.eraseFromParent();
5372 return true;
5373}
5374
5375MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm8(
5376 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
5377 unsigned int Op;
5378 if (DstSize == 128) {
5379 if (Bits.getHiBits(numBits: 64) != Bits.getLoBits(numBits: 64))
5380 return nullptr;
5381 Op = AArch64::MOVIv16b_ns;
5382 } else {
5383 Op = AArch64::MOVIv8b_ns;
5384 }
5385
5386 uint64_t Val = Bits.zextOrTrunc(width: 64).getZExtValue();
5387
5388 if (AArch64_AM::isAdvSIMDModImmType9(Imm: Val)) {
5389 Val = AArch64_AM::encodeAdvSIMDModImmType9(Imm: Val);
5390 auto Mov = Builder.buildInstr(Opc: Op, DstOps: {Dst}, SrcOps: {}).addImm(Val);
5391 constrainSelectedInstRegOperands(I&: *Mov, TII, TRI, RBI);
5392 return &*Mov;
5393 }
5394 return nullptr;
5395}
5396
5397MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm16(
5398 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
5399 bool Inv) {
5400
5401 unsigned int Op;
5402 if (DstSize == 128) {
5403 if (Bits.getHiBits(numBits: 64) != Bits.getLoBits(numBits: 64))
5404 return nullptr;
5405 Op = Inv ? AArch64::MVNIv8i16 : AArch64::MOVIv8i16;
5406 } else {
5407 Op = Inv ? AArch64::MVNIv4i16 : AArch64::MOVIv4i16;
5408 }
5409
5410 uint64_t Val = Bits.zextOrTrunc(width: 64).getZExtValue();
5411 uint64_t Shift;
5412
5413 if (AArch64_AM::isAdvSIMDModImmType5(Imm: Val)) {
5414 Val = AArch64_AM::encodeAdvSIMDModImmType5(Imm: Val);
5415 Shift = 0;
5416 } else if (AArch64_AM::isAdvSIMDModImmType6(Imm: Val)) {
5417 Val = AArch64_AM::encodeAdvSIMDModImmType6(Imm: Val);
5418 Shift = 8;
5419 } else
5420 return nullptr;
5421
5422 auto Mov = Builder.buildInstr(Opc: Op, DstOps: {Dst}, SrcOps: {}).addImm(Val).addImm(Val: Shift);
5423 constrainSelectedInstRegOperands(I&: *Mov, TII, TRI, RBI);
5424 return &*Mov;
5425}
5426
5427MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm32(
5428 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
5429 bool Inv) {
5430
5431 unsigned int Op;
5432 if (DstSize == 128) {
5433 if (Bits.getHiBits(numBits: 64) != Bits.getLoBits(numBits: 64))
5434 return nullptr;
5435 Op = Inv ? AArch64::MVNIv4i32 : AArch64::MOVIv4i32;
5436 } else {
5437 Op = Inv ? AArch64::MVNIv2i32 : AArch64::MOVIv2i32;
5438 }
5439
5440 uint64_t Val = Bits.zextOrTrunc(width: 64).getZExtValue();
5441 uint64_t Shift;
5442
5443 if ((AArch64_AM::isAdvSIMDModImmType1(Imm: Val))) {
5444 Val = AArch64_AM::encodeAdvSIMDModImmType1(Imm: Val);
5445 Shift = 0;
5446 } else if ((AArch64_AM::isAdvSIMDModImmType2(Imm: Val))) {
5447 Val = AArch64_AM::encodeAdvSIMDModImmType2(Imm: Val);
5448 Shift = 8;
5449 } else if ((AArch64_AM::isAdvSIMDModImmType3(Imm: Val))) {
5450 Val = AArch64_AM::encodeAdvSIMDModImmType3(Imm: Val);
5451 Shift = 16;
5452 } else if ((AArch64_AM::isAdvSIMDModImmType4(Imm: Val))) {
5453 Val = AArch64_AM::encodeAdvSIMDModImmType4(Imm: Val);
5454 Shift = 24;
5455 } else
5456 return nullptr;
5457
5458 auto Mov = Builder.buildInstr(Opc: Op, DstOps: {Dst}, SrcOps: {}).addImm(Val).addImm(Val: Shift);
5459 constrainSelectedInstRegOperands(I&: *Mov, TII, TRI, RBI);
5460 return &*Mov;
5461}
5462
5463MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm64(
5464 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
5465
5466 unsigned int Op;
5467 if (DstSize == 128) {
5468 if (Bits.getHiBits(numBits: 64) != Bits.getLoBits(numBits: 64))
5469 return nullptr;
5470 Op = AArch64::MOVIv2d_ns;
5471 } else {
5472 Op = AArch64::MOVID;
5473 }
5474
5475 uint64_t Val = Bits.zextOrTrunc(width: 64).getZExtValue();
5476 if (AArch64_AM::isAdvSIMDModImmType10(Imm: Val)) {
5477 Val = AArch64_AM::encodeAdvSIMDModImmType10(Imm: Val);
5478 auto Mov = Builder.buildInstr(Opc: Op, DstOps: {Dst}, SrcOps: {}).addImm(Val);
5479 constrainSelectedInstRegOperands(I&: *Mov, TII, TRI, RBI);
5480 return &*Mov;
5481 }
5482 return nullptr;
5483}
5484
5485MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm321s(
5486 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
5487 bool Inv) {
5488
5489 unsigned int Op;
5490 if (DstSize == 128) {
5491 if (Bits.getHiBits(numBits: 64) != Bits.getLoBits(numBits: 64))
5492 return nullptr;
5493 Op = Inv ? AArch64::MVNIv4s_msl : AArch64::MOVIv4s_msl;
5494 } else {
5495 Op = Inv ? AArch64::MVNIv2s_msl : AArch64::MOVIv2s_msl;
5496 }
5497
5498 uint64_t Val = Bits.zextOrTrunc(width: 64).getZExtValue();
5499 uint64_t Shift;
5500
5501 if (AArch64_AM::isAdvSIMDModImmType7(Imm: Val)) {
5502 Val = AArch64_AM::encodeAdvSIMDModImmType7(Imm: Val);
5503 Shift = 264;
5504 } else if (AArch64_AM::isAdvSIMDModImmType8(Imm: Val)) {
5505 Val = AArch64_AM::encodeAdvSIMDModImmType8(Imm: Val);
5506 Shift = 272;
5507 } else
5508 return nullptr;
5509
5510 auto Mov = Builder.buildInstr(Opc: Op, DstOps: {Dst}, SrcOps: {}).addImm(Val).addImm(Val: Shift);
5511 constrainSelectedInstRegOperands(I&: *Mov, TII, TRI, RBI);
5512 return &*Mov;
5513}
5514
5515MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImmFP(
5516 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
5517
5518 unsigned int Op;
5519 bool IsWide = false;
5520 if (DstSize == 128) {
5521 if (Bits.getHiBits(numBits: 64) != Bits.getLoBits(numBits: 64))
5522 return nullptr;
5523 Op = AArch64::FMOVv4f32_ns;
5524 IsWide = true;
5525 } else {
5526 Op = AArch64::FMOVv2f32_ns;
5527 }
5528
5529 uint64_t Val = Bits.zextOrTrunc(width: 64).getZExtValue();
5530
5531 if (AArch64_AM::isAdvSIMDModImmType11(Imm: Val)) {
5532 Val = AArch64_AM::encodeAdvSIMDModImmType11(Imm: Val);
5533 } else if (IsWide && AArch64_AM::isAdvSIMDModImmType12(Imm: Val)) {
5534 Val = AArch64_AM::encodeAdvSIMDModImmType12(Imm: Val);
5535 Op = AArch64::FMOVv2f64_ns;
5536 } else
5537 return nullptr;
5538
5539 auto Mov = Builder.buildInstr(Opc: Op, DstOps: {Dst}, SrcOps: {}).addImm(Val);
5540 constrainSelectedInstRegOperands(I&: *Mov, TII, TRI, RBI);
5541 return &*Mov;
5542}
5543
5544bool AArch64InstructionSelector::selectIndexedExtLoad(
5545 MachineInstr &MI, MachineRegisterInfo &MRI) {
5546 auto &ExtLd = cast<GIndexedAnyExtLoad>(Val&: MI);
5547 Register Dst = ExtLd.getDstReg();
5548 Register WriteBack = ExtLd.getWritebackReg();
5549 Register Base = ExtLd.getBaseReg();
5550 Register Offset = ExtLd.getOffsetReg();
5551 LLT Ty = MRI.getType(Reg: Dst);
5552 assert(Ty.getSizeInBits() <= 64); // Only for scalar GPRs.
5553 unsigned MemSizeBits = ExtLd.getMMO().getMemoryType().getSizeInBits();
5554 bool IsPre = ExtLd.isPre();
5555 bool IsSExt = isa<GIndexedSExtLoad>(Val: ExtLd);
5556 unsigned InsertIntoSubReg = 0;
5557 bool IsDst64 = Ty.getSizeInBits() == 64;
5558
5559 // ZExt/SExt should be on gpr but can handle extload and zextload of fpr, so
5560 // long as they are scalar.
5561 bool IsFPR = RBI.getRegBank(Reg: Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID;
5562 if ((IsSExt && IsFPR) || Ty.isVector())
5563 return false;
5564
5565 unsigned Opc = 0;
5566 LLT NewLdDstTy;
5567 LLT s32 = LLT::scalar(SizeInBits: 32);
5568 LLT s64 = LLT::scalar(SizeInBits: 64);
5569
5570 if (MemSizeBits == 8) {
5571 if (IsSExt) {
5572 if (IsDst64)
5573 Opc = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
5574 else
5575 Opc = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
5576 NewLdDstTy = IsDst64 ? s64 : s32;
5577 } else if (IsFPR) {
5578 Opc = IsPre ? AArch64::LDRBpre : AArch64::LDRBpost;
5579 InsertIntoSubReg = AArch64::bsub;
5580 NewLdDstTy = LLT::scalar(SizeInBits: MemSizeBits);
5581 } else {
5582 Opc = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
5583 InsertIntoSubReg = IsDst64 ? AArch64::sub_32 : 0;
5584 NewLdDstTy = s32;
5585 }
5586 } else if (MemSizeBits == 16) {
5587 if (IsSExt) {
5588 if (IsDst64)
5589 Opc = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
5590 else
5591 Opc = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
5592 NewLdDstTy = IsDst64 ? s64 : s32;
5593 } else if (IsFPR) {
5594 Opc = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
5595 InsertIntoSubReg = AArch64::hsub;
5596 NewLdDstTy = LLT::scalar(SizeInBits: MemSizeBits);
5597 } else {
5598 Opc = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
5599 InsertIntoSubReg = IsDst64 ? AArch64::sub_32 : 0;
5600 NewLdDstTy = s32;
5601 }
5602 } else if (MemSizeBits == 32) {
5603 if (IsSExt) {
5604 Opc = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
5605 NewLdDstTy = s64;
5606 } else if (IsFPR) {
5607 Opc = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
5608 InsertIntoSubReg = AArch64::ssub;
5609 NewLdDstTy = LLT::scalar(SizeInBits: MemSizeBits);
5610 } else {
5611 Opc = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
5612 InsertIntoSubReg = IsDst64 ? AArch64::sub_32 : 0;
5613 NewLdDstTy = s32;
5614 }
5615 } else {
5616 llvm_unreachable("Unexpected size for indexed load");
5617 }
5618
5619 auto Cst = getIConstantVRegVal(VReg: Offset, MRI);
5620 if (!Cst)
5621 return false; // Shouldn't happen, but just in case.
5622
5623 auto LdMI = MIB.buildInstr(Opc, DstOps: {WriteBack, NewLdDstTy}, SrcOps: {Base})
5624 .addImm(Val: Cst->getSExtValue());
5625 LdMI.cloneMemRefs(OtherMI: ExtLd);
5626 constrainSelectedInstRegOperands(I&: *LdMI, TII, TRI, RBI);
5627 // Make sure to select the load with the MemTy as the dest type, and then
5628 // insert into a larger reg if needed.
5629 if (InsertIntoSubReg) {
5630 // Generate a SUBREG_TO_REG.
5631 auto SubToReg = MIB.buildInstr(Opc: TargetOpcode::SUBREG_TO_REG, DstOps: {Dst}, SrcOps: {})
5632 .addImm(Val: 0)
5633 .addUse(RegNo: LdMI.getReg(Idx: 1))
5634 .addImm(Val: InsertIntoSubReg);
5635 RBI.constrainGenericRegister(
5636 Reg: SubToReg.getReg(Idx: 0),
5637 RC: *getRegClassForTypeOnBank(Ty: MRI.getType(Reg: Dst),
5638 RB: *RBI.getRegBank(Reg: Dst, MRI, TRI)),
5639 MRI);
5640 } else {
5641 auto Copy = MIB.buildCopy(Res: Dst, Op: LdMI.getReg(Idx: 1));
5642 selectCopy(I&: *Copy, TII, MRI, TRI, RBI);
5643 }
5644 MI.eraseFromParent();
5645
5646 return true;
5647}
5648
5649bool AArch64InstructionSelector::selectIndexedLoad(MachineInstr &MI,
5650 MachineRegisterInfo &MRI) {
5651 auto &Ld = cast<GIndexedLoad>(Val&: MI);
5652 Register Dst = Ld.getDstReg();
5653 Register WriteBack = Ld.getWritebackReg();
5654 Register Base = Ld.getBaseReg();
5655 Register Offset = Ld.getOffsetReg();
5656 assert(MRI.getType(Dst).getSizeInBits() <= 128 &&
5657 "Unexpected type for indexed load");
5658 unsigned MemSize = Ld.getMMO().getMemoryType().getSizeInBytes();
5659
5660 if (MemSize < MRI.getType(Reg: Dst).getSizeInBytes())
5661 return selectIndexedExtLoad(MI, MRI);
5662
5663 unsigned Opc = 0;
5664 if (Ld.isPre()) {
5665 static constexpr unsigned GPROpcodes[] = {
5666 AArch64::LDRBBpre, AArch64::LDRHHpre, AArch64::LDRWpre,
5667 AArch64::LDRXpre};
5668 static constexpr unsigned FPROpcodes[] = {
5669 AArch64::LDRBpre, AArch64::LDRHpre, AArch64::LDRSpre, AArch64::LDRDpre,
5670 AArch64::LDRQpre};
5671 if (RBI.getRegBank(Reg: Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5672 Opc = FPROpcodes[Log2_32(Value: MemSize)];
5673 else
5674 Opc = GPROpcodes[Log2_32(Value: MemSize)];
5675 } else {
5676 static constexpr unsigned GPROpcodes[] = {
5677 AArch64::LDRBBpost, AArch64::LDRHHpost, AArch64::LDRWpost,
5678 AArch64::LDRXpost};
5679 static constexpr unsigned FPROpcodes[] = {
5680 AArch64::LDRBpost, AArch64::LDRHpost, AArch64::LDRSpost,
5681 AArch64::LDRDpost, AArch64::LDRQpost};
5682 if (RBI.getRegBank(Reg: Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5683 Opc = FPROpcodes[Log2_32(Value: MemSize)];
5684 else
5685 Opc = GPROpcodes[Log2_32(Value: MemSize)];
5686 }
5687 auto Cst = getIConstantVRegVal(VReg: Offset, MRI);
5688 if (!Cst)
5689 return false; // Shouldn't happen, but just in case.
5690 auto LdMI =
5691 MIB.buildInstr(Opc, DstOps: {WriteBack, Dst}, SrcOps: {Base}).addImm(Val: Cst->getSExtValue());
5692 LdMI.cloneMemRefs(OtherMI: Ld);
5693 constrainSelectedInstRegOperands(I&: *LdMI, TII, TRI, RBI);
5694 MI.eraseFromParent();
5695 return true;
5696}
5697
5698bool AArch64InstructionSelector::selectIndexedStore(GIndexedStore &I,
5699 MachineRegisterInfo &MRI) {
5700 Register Dst = I.getWritebackReg();
5701 Register Val = I.getValueReg();
5702 Register Base = I.getBaseReg();
5703 Register Offset = I.getOffsetReg();
5704 LLT ValTy = MRI.getType(Reg: Val);
5705 assert(ValTy.getSizeInBits() <= 128 && "Unexpected type for indexed store");
5706
5707 unsigned Opc = 0;
5708 if (I.isPre()) {
5709 static constexpr unsigned GPROpcodes[] = {
5710 AArch64::STRBBpre, AArch64::STRHHpre, AArch64::STRWpre,
5711 AArch64::STRXpre};
5712 static constexpr unsigned FPROpcodes[] = {
5713 AArch64::STRBpre, AArch64::STRHpre, AArch64::STRSpre, AArch64::STRDpre,
5714 AArch64::STRQpre};
5715
5716 if (RBI.getRegBank(Reg: Val, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5717 Opc = FPROpcodes[Log2_32(Value: ValTy.getSizeInBytes())];
5718 else
5719 Opc = GPROpcodes[Log2_32(Value: ValTy.getSizeInBytes())];
5720 } else {
5721 static constexpr unsigned GPROpcodes[] = {
5722 AArch64::STRBBpost, AArch64::STRHHpost, AArch64::STRWpost,
5723 AArch64::STRXpost};
5724 static constexpr unsigned FPROpcodes[] = {
5725 AArch64::STRBpost, AArch64::STRHpost, AArch64::STRSpost,
5726 AArch64::STRDpost, AArch64::STRQpost};
5727
5728 if (RBI.getRegBank(Reg: Val, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5729 Opc = FPROpcodes[Log2_32(Value: ValTy.getSizeInBytes())];
5730 else
5731 Opc = GPROpcodes[Log2_32(Value: ValTy.getSizeInBytes())];
5732 }
5733
5734 auto Cst = getIConstantVRegVal(VReg: Offset, MRI);
5735 if (!Cst)
5736 return false; // Shouldn't happen, but just in case.
5737 auto Str =
5738 MIB.buildInstr(Opc, DstOps: {Dst}, SrcOps: {Val, Base}).addImm(Val: Cst->getSExtValue());
5739 Str.cloneMemRefs(OtherMI: I);
5740 constrainSelectedInstRegOperands(I&: *Str, TII, TRI, RBI);
5741 I.eraseFromParent();
5742 return true;
5743}
5744
5745MachineInstr *
5746AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
5747 MachineIRBuilder &MIRBuilder,
5748 MachineRegisterInfo &MRI) {
5749 LLT DstTy = MRI.getType(Reg: Dst);
5750 unsigned DstSize = DstTy.getSizeInBits();
5751 if (CV->isNullValue()) {
5752 if (DstSize == 128) {
5753 auto Mov =
5754 MIRBuilder.buildInstr(Opc: AArch64::MOVIv2d_ns, DstOps: {Dst}, SrcOps: {}).addImm(Val: 0);
5755 constrainSelectedInstRegOperands(I&: *Mov, TII, TRI, RBI);
5756 return &*Mov;
5757 }
5758
5759 if (DstSize == 64) {
5760 auto Mov =
5761 MIRBuilder
5762 .buildInstr(Opc: AArch64::MOVIv2d_ns, DstOps: {&AArch64::FPR128RegClass}, SrcOps: {})
5763 .addImm(Val: 0);
5764 auto Copy = MIRBuilder.buildInstr(Opc: TargetOpcode::COPY, DstOps: {Dst}, SrcOps: {})
5765 .addReg(RegNo: Mov.getReg(Idx: 0), flags: 0, SubReg: AArch64::dsub);
5766 RBI.constrainGenericRegister(Reg: Dst, RC: AArch64::FPR64RegClass, MRI);
5767 return &*Copy;
5768 }
5769 }
5770
5771 if (Constant *SplatValue = CV->getSplatValue()) {
5772 APInt SplatValueAsInt =
5773 isa<ConstantFP>(Val: SplatValue)
5774 ? cast<ConstantFP>(Val: SplatValue)->getValueAPF().bitcastToAPInt()
5775 : SplatValue->getUniqueInteger();
5776 APInt DefBits = APInt::getSplat(
5777 NewLen: DstSize, V: SplatValueAsInt.trunc(width: DstTy.getScalarSizeInBits()));
5778 auto TryMOVIWithBits = [&](APInt DefBits) -> MachineInstr * {
5779 MachineInstr *NewOp;
5780 bool Inv = false;
5781 if ((NewOp = tryAdvSIMDModImm64(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder)) ||
5782 (NewOp =
5783 tryAdvSIMDModImm32(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder, Inv)) ||
5784 (NewOp =
5785 tryAdvSIMDModImm321s(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder, Inv)) ||
5786 (NewOp =
5787 tryAdvSIMDModImm16(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder, Inv)) ||
5788 (NewOp = tryAdvSIMDModImm8(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder)) ||
5789 (NewOp = tryAdvSIMDModImmFP(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder)))
5790 return NewOp;
5791
5792 DefBits = ~DefBits;
5793 Inv = true;
5794 if ((NewOp =
5795 tryAdvSIMDModImm32(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder, Inv)) ||
5796 (NewOp =
5797 tryAdvSIMDModImm321s(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder, Inv)) ||
5798 (NewOp = tryAdvSIMDModImm16(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder, Inv)))
5799 return NewOp;
5800 return nullptr;
5801 };
5802
5803 if (auto *NewOp = TryMOVIWithBits(DefBits))
5804 return NewOp;
5805
5806 // See if a fneg of the constant can be materialized with a MOVI, etc
5807 auto TryWithFNeg = [&](APInt DefBits, int NumBits,
5808 unsigned NegOpc) -> MachineInstr * {
5809 // FNegate each sub-element of the constant
5810 APInt Neg = APInt::getHighBitsSet(numBits: NumBits, hiBitsSet: 1).zext(width: DstSize);
5811 APInt NegBits(DstSize, 0);
5812 unsigned NumElts = DstSize / NumBits;
5813 for (unsigned i = 0; i < NumElts; i++)
5814 NegBits |= Neg << (NumBits * i);
5815 NegBits = DefBits ^ NegBits;
5816
5817 // Try to create the new constants with MOVI, and if so generate a fneg
5818 // for it.
5819 if (auto *NewOp = TryMOVIWithBits(NegBits)) {
5820 Register NewDst = MRI.createVirtualRegister(RegClass: &AArch64::FPR128RegClass);
5821 NewOp->getOperand(i: 0).setReg(NewDst);
5822 return MIRBuilder.buildInstr(Opc: NegOpc, DstOps: {Dst}, SrcOps: {NewDst});
5823 }
5824 return nullptr;
5825 };
5826 MachineInstr *R;
5827 if ((R = TryWithFNeg(DefBits, 32, AArch64::FNEGv4f32)) ||
5828 (R = TryWithFNeg(DefBits, 64, AArch64::FNEGv2f64)) ||
5829 (STI.hasFullFP16() &&
5830 (R = TryWithFNeg(DefBits, 16, AArch64::FNEGv8f16))))
5831 return R;
5832 }
5833
5834 auto *CPLoad = emitLoadFromConstantPool(CPVal: CV, MIRBuilder);
5835 if (!CPLoad) {
5836 LLVM_DEBUG(dbgs() << "Could not generate cp load for constant vector!");
5837 return nullptr;
5838 }
5839
5840 auto Copy = MIRBuilder.buildCopy(Res: Dst, Op: CPLoad->getOperand(i: 0));
5841 RBI.constrainGenericRegister(
5842 Reg: Dst, RC: *MRI.getRegClass(Reg: CPLoad->getOperand(i: 0).getReg()), MRI);
5843 return &*Copy;
5844}
5845
5846bool AArch64InstructionSelector::tryOptConstantBuildVec(
5847 MachineInstr &I, LLT DstTy, MachineRegisterInfo &MRI) {
5848 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
5849 unsigned DstSize = DstTy.getSizeInBits();
5850 assert(DstSize <= 128 && "Unexpected build_vec type!");
5851 if (DstSize < 32)
5852 return false;
5853 // Check if we're building a constant vector, in which case we want to
5854 // generate a constant pool load instead of a vector insert sequence.
5855 SmallVector<Constant *, 16> Csts;
5856 for (unsigned Idx = 1; Idx < I.getNumOperands(); ++Idx) {
5857 // Try to find G_CONSTANT or G_FCONSTANT
5858 auto *OpMI =
5859 getOpcodeDef(Opcode: TargetOpcode::G_CONSTANT, Reg: I.getOperand(i: Idx).getReg(), MRI);
5860 if (OpMI)
5861 Csts.emplace_back(
5862 Args: const_cast<ConstantInt *>(OpMI->getOperand(i: 1).getCImm()));
5863 else if ((OpMI = getOpcodeDef(Opcode: TargetOpcode::G_FCONSTANT,
5864 Reg: I.getOperand(i: Idx).getReg(), MRI)))
5865 Csts.emplace_back(
5866 Args: const_cast<ConstantFP *>(OpMI->getOperand(i: 1).getFPImm()));
5867 else
5868 return false;
5869 }
5870 Constant *CV = ConstantVector::get(V: Csts);
5871 if (!emitConstantVector(Dst: I.getOperand(i: 0).getReg(), CV, MIRBuilder&: MIB, MRI))
5872 return false;
5873 I.eraseFromParent();
5874 return true;
5875}
5876
5877bool AArch64InstructionSelector::tryOptBuildVecToSubregToReg(
5878 MachineInstr &I, MachineRegisterInfo &MRI) {
5879 // Given:
5880 // %vec = G_BUILD_VECTOR %elt, %undef, %undef, ... %undef
5881 //
5882 // Select the G_BUILD_VECTOR as a SUBREG_TO_REG from %elt.
5883 Register Dst = I.getOperand(i: 0).getReg();
5884 Register EltReg = I.getOperand(i: 1).getReg();
5885 LLT EltTy = MRI.getType(Reg: EltReg);
5886 // If the index isn't on the same bank as its elements, then this can't be a
5887 // SUBREG_TO_REG.
5888 const RegisterBank &EltRB = *RBI.getRegBank(Reg: EltReg, MRI, TRI);
5889 const RegisterBank &DstRB = *RBI.getRegBank(Reg: Dst, MRI, TRI);
5890 if (EltRB != DstRB)
5891 return false;
5892 if (any_of(Range: drop_begin(RangeOrContainer: I.operands(), N: 2), P: [&MRI](const MachineOperand &Op) {
5893 return !getOpcodeDef(Opcode: TargetOpcode::G_IMPLICIT_DEF, Reg: Op.getReg(), MRI);
5894 }))
5895 return false;
5896 unsigned SubReg;
5897 const TargetRegisterClass *EltRC = getRegClassForTypeOnBank(Ty: EltTy, RB: EltRB);
5898 if (!EltRC)
5899 return false;
5900 const TargetRegisterClass *DstRC =
5901 getRegClassForTypeOnBank(Ty: MRI.getType(Reg: Dst), RB: DstRB);
5902 if (!DstRC)
5903 return false;
5904 if (!getSubRegForClass(RC: EltRC, TRI, SubReg))
5905 return false;
5906 auto SubregToReg = MIB.buildInstr(Opc: AArch64::SUBREG_TO_REG, DstOps: {Dst}, SrcOps: {})
5907 .addImm(Val: 0)
5908 .addUse(RegNo: EltReg)
5909 .addImm(Val: SubReg);
5910 I.eraseFromParent();
5911 constrainSelectedInstRegOperands(I&: *SubregToReg, TII, TRI, RBI);
5912 return RBI.constrainGenericRegister(Reg: Dst, RC: *DstRC, MRI);
5913}
5914
5915bool AArch64InstructionSelector::selectBuildVector(MachineInstr &I,
5916 MachineRegisterInfo &MRI) {
5917 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
5918 // Until we port more of the optimized selections, for now just use a vector
5919 // insert sequence.
5920 const LLT DstTy = MRI.getType(Reg: I.getOperand(i: 0).getReg());
5921 const LLT EltTy = MRI.getType(Reg: I.getOperand(i: 1).getReg());
5922 unsigned EltSize = EltTy.getSizeInBits();
5923
5924 if (tryOptConstantBuildVec(I, DstTy, MRI))
5925 return true;
5926 if (tryOptBuildVecToSubregToReg(I, MRI))
5927 return true;
5928
5929 if (EltSize != 8 && EltSize != 16 && EltSize != 32 && EltSize != 64)
5930 return false; // Don't support all element types yet.
5931 const RegisterBank &RB = *RBI.getRegBank(Reg: I.getOperand(i: 1).getReg(), MRI, TRI);
5932
5933 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
5934 MachineInstr *ScalarToVec =
5935 emitScalarToVector(EltSize: DstTy.getElementType().getSizeInBits(), DstRC,
5936 Scalar: I.getOperand(i: 1).getReg(), MIRBuilder&: MIB);
5937 if (!ScalarToVec)
5938 return false;
5939
5940 Register DstVec = ScalarToVec->getOperand(i: 0).getReg();
5941 unsigned DstSize = DstTy.getSizeInBits();
5942
5943 // Keep track of the last MI we inserted. Later on, we might be able to save
5944 // a copy using it.
5945 MachineInstr *PrevMI = ScalarToVec;
5946 for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
5947 // Note that if we don't do a subregister copy, we can end up making an
5948 // extra register.
5949 Register OpReg = I.getOperand(i).getReg();
5950 // Do not emit inserts for undefs
5951 if (!getOpcodeDef<GImplicitDef>(Reg: OpReg, MRI)) {
5952 PrevMI = &*emitLaneInsert(DstReg: std::nullopt, SrcReg: DstVec, EltReg: OpReg, LaneIdx: i - 1, RB, MIRBuilder&: MIB);
5953 DstVec = PrevMI->getOperand(i: 0).getReg();
5954 }
5955 }
5956
5957 // If DstTy's size in bits is less than 128, then emit a subregister copy
5958 // from DstVec to the last register we've defined.
5959 if (DstSize < 128) {
5960 // Force this to be FPR using the destination vector.
5961 const TargetRegisterClass *RC =
5962 getRegClassForTypeOnBank(Ty: DstTy, RB: *RBI.getRegBank(Reg: DstVec, MRI, TRI));
5963 if (!RC)
5964 return false;
5965 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
5966 LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
5967 return false;
5968 }
5969
5970 unsigned SubReg = 0;
5971 if (!getSubRegForClass(RC, TRI, SubReg))
5972 return false;
5973 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
5974 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize
5975 << "\n");
5976 return false;
5977 }
5978
5979 Register Reg = MRI.createVirtualRegister(RegClass: RC);
5980 Register DstReg = I.getOperand(i: 0).getReg();
5981
5982 MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {DstReg}, SrcOps: {}).addReg(RegNo: DstVec, flags: 0, SubReg);
5983 MachineOperand &RegOp = I.getOperand(i: 1);
5984 RegOp.setReg(Reg);
5985 RBI.constrainGenericRegister(Reg: DstReg, RC: *RC, MRI);
5986 } else {
5987 // We either have a vector with all elements (except the first one) undef or
5988 // at least one non-undef non-first element. In the first case, we need to
5989 // constrain the output register ourselves as we may have generated an
5990 // INSERT_SUBREG operation which is a generic operation for which the
5991 // output regclass cannot be automatically chosen.
5992 //
5993 // In the second case, there is no need to do this as it may generate an
5994 // instruction like INSvi32gpr where the regclass can be automatically
5995 // chosen.
5996 //
5997 // Also, we save a copy by re-using the destination register on the final
5998 // insert.
5999 PrevMI->getOperand(i: 0).setReg(I.getOperand(i: 0).getReg());
6000 constrainSelectedInstRegOperands(I&: *PrevMI, TII, TRI, RBI);
6001
6002 Register DstReg = PrevMI->getOperand(i: 0).getReg();
6003 if (PrevMI == ScalarToVec && DstReg.isVirtual()) {
6004 const TargetRegisterClass *RC =
6005 getRegClassForTypeOnBank(Ty: DstTy, RB: *RBI.getRegBank(Reg: DstVec, MRI, TRI));
6006 RBI.constrainGenericRegister(Reg: DstReg, RC: *RC, MRI);
6007 }
6008 }
6009
6010 I.eraseFromParent();
6011 return true;
6012}
6013
6014bool AArch64InstructionSelector::selectVectorLoadIntrinsic(unsigned Opc,
6015 unsigned NumVecs,
6016 MachineInstr &I) {
6017 assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
6018 assert(Opc && "Expected an opcode?");
6019 assert(NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors");
6020 auto &MRI = *MIB.getMRI();
6021 LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg());
6022 unsigned Size = Ty.getSizeInBits();
6023 assert((Size == 64 || Size == 128) &&
6024 "Destination must be 64 bits or 128 bits?");
6025 unsigned SubReg = Size == 64 ? AArch64::dsub0 : AArch64::qsub0;
6026 auto Ptr = I.getOperand(i: I.getNumOperands() - 1).getReg();
6027 assert(MRI.getType(Ptr).isPointer() && "Expected a pointer type?");
6028 auto Load = MIB.buildInstr(Opc, DstOps: {Ty}, SrcOps: {Ptr});
6029 Load.cloneMemRefs(OtherMI: I);
6030 constrainSelectedInstRegOperands(I&: *Load, TII, TRI, RBI);
6031 Register SelectedLoadDst = Load->getOperand(i: 0).getReg();
6032 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
6033 auto Vec = MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {I.getOperand(i: Idx)}, SrcOps: {})
6034 .addReg(RegNo: SelectedLoadDst, flags: 0, SubReg: SubReg + Idx);
6035 // Emit the subreg copies and immediately select them.
6036 // FIXME: We should refactor our copy code into an emitCopy helper and
6037 // clean up uses of this pattern elsewhere in the selector.
6038 selectCopy(I&: *Vec, TII, MRI, TRI, RBI);
6039 }
6040 return true;
6041}
6042
6043bool AArch64InstructionSelector::selectVectorLoadLaneIntrinsic(
6044 unsigned Opc, unsigned NumVecs, MachineInstr &I) {
6045 assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
6046 assert(Opc && "Expected an opcode?");
6047 assert(NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors");
6048 auto &MRI = *MIB.getMRI();
6049 LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg());
6050 bool Narrow = Ty.getSizeInBits() == 64;
6051
6052 auto FirstSrcRegIt = I.operands_begin() + NumVecs + 1;
6053 SmallVector<Register, 4> Regs(NumVecs);
6054 std::transform(first: FirstSrcRegIt, last: FirstSrcRegIt + NumVecs, result: Regs.begin(),
6055 unary_op: [](auto MO) { return MO.getReg(); });
6056
6057 if (Narrow) {
6058 transform(Range&: Regs, d_first: Regs.begin(), F: [this](Register Reg) {
6059 return emitScalarToVector(EltSize: 64, DstRC: &AArch64::FPR128RegClass, Scalar: Reg, MIRBuilder&: MIB)
6060 ->getOperand(i: 0)
6061 .getReg();
6062 });
6063 Ty = Ty.multiplyElements(Factor: 2);
6064 }
6065
6066 Register Tuple = createQTuple(Regs, MIB);
6067 auto LaneNo = getIConstantVRegVal(VReg: (FirstSrcRegIt + NumVecs)->getReg(), MRI);
6068 if (!LaneNo)
6069 return false;
6070
6071 Register Ptr = (FirstSrcRegIt + NumVecs + 1)->getReg();
6072 auto Load = MIB.buildInstr(Opc, DstOps: {Ty}, SrcOps: {})
6073 .addReg(RegNo: Tuple)
6074 .addImm(Val: LaneNo->getZExtValue())
6075 .addReg(RegNo: Ptr);
6076 Load.cloneMemRefs(OtherMI: I);
6077 constrainSelectedInstRegOperands(I&: *Load, TII, TRI, RBI);
6078 Register SelectedLoadDst = Load->getOperand(i: 0).getReg();
6079 unsigned SubReg = AArch64::qsub0;
6080 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
6081 auto Vec = MIB.buildInstr(Opc: TargetOpcode::COPY,
6082 DstOps: {Narrow ? DstOp(&AArch64::FPR128RegClass)
6083 : DstOp(I.getOperand(i: Idx).getReg())},
6084 SrcOps: {})
6085 .addReg(RegNo: SelectedLoadDst, flags: 0, SubReg: SubReg + Idx);
6086 Register WideReg = Vec.getReg(Idx: 0);
6087 // Emit the subreg copies and immediately select them.
6088 selectCopy(I&: *Vec, TII, MRI, TRI, RBI);
6089 if (Narrow &&
6090 !emitNarrowVector(DstReg: I.getOperand(i: Idx).getReg(), SrcReg: WideReg, MIB, MRI))
6091 return false;
6092 }
6093 return true;
6094}
6095
6096void AArch64InstructionSelector::selectVectorStoreIntrinsic(MachineInstr &I,
6097 unsigned NumVecs,
6098 unsigned Opc) {
6099 MachineRegisterInfo &MRI = I.getParent()->getParent()->getRegInfo();
6100 LLT Ty = MRI.getType(Reg: I.getOperand(i: 1).getReg());
6101 Register Ptr = I.getOperand(i: 1 + NumVecs).getReg();
6102
6103 SmallVector<Register, 2> Regs(NumVecs);
6104 std::transform(first: I.operands_begin() + 1, last: I.operands_begin() + 1 + NumVecs,
6105 result: Regs.begin(), unary_op: [](auto MO) { return MO.getReg(); });
6106
6107 Register Tuple = Ty.getSizeInBits() == 128 ? createQTuple(Regs, MIB)
6108 : createDTuple(Regs, MIB);
6109 auto Store = MIB.buildInstr(Opc, DstOps: {}, SrcOps: {Tuple, Ptr});
6110 Store.cloneMemRefs(OtherMI: I);
6111 constrainSelectedInstRegOperands(I&: *Store, TII, TRI, RBI);
6112}
6113
6114bool AArch64InstructionSelector::selectVectorStoreLaneIntrinsic(
6115 MachineInstr &I, unsigned NumVecs, unsigned Opc) {
6116 MachineRegisterInfo &MRI = I.getParent()->getParent()->getRegInfo();
6117 LLT Ty = MRI.getType(Reg: I.getOperand(i: 1).getReg());
6118 bool Narrow = Ty.getSizeInBits() == 64;
6119
6120 SmallVector<Register, 2> Regs(NumVecs);
6121 std::transform(first: I.operands_begin() + 1, last: I.operands_begin() + 1 + NumVecs,
6122 result: Regs.begin(), unary_op: [](auto MO) { return MO.getReg(); });
6123
6124 if (Narrow)
6125 transform(Range&: Regs, d_first: Regs.begin(), F: [this](Register Reg) {
6126 return emitScalarToVector(EltSize: 64, DstRC: &AArch64::FPR128RegClass, Scalar: Reg, MIRBuilder&: MIB)
6127 ->getOperand(i: 0)
6128 .getReg();
6129 });
6130
6131 Register Tuple = createQTuple(Regs, MIB);
6132
6133 auto LaneNo = getIConstantVRegVal(VReg: I.getOperand(i: 1 + NumVecs).getReg(), MRI);
6134 if (!LaneNo)
6135 return false;
6136 Register Ptr = I.getOperand(i: 1 + NumVecs + 1).getReg();
6137 auto Store = MIB.buildInstr(Opc, DstOps: {}, SrcOps: {})
6138 .addReg(RegNo: Tuple)
6139 .addImm(Val: LaneNo->getZExtValue())
6140 .addReg(RegNo: Ptr);
6141 Store.cloneMemRefs(OtherMI: I);
6142 constrainSelectedInstRegOperands(I&: *Store, TII, TRI, RBI);
6143 return true;
6144}
6145
6146bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
6147 MachineInstr &I, MachineRegisterInfo &MRI) {
6148 // Find the intrinsic ID.
6149 unsigned IntrinID = cast<GIntrinsic>(Val&: I).getIntrinsicID();
6150
6151 const LLT S8 = LLT::scalar(SizeInBits: 8);
6152 const LLT S16 = LLT::scalar(SizeInBits: 16);
6153 const LLT S32 = LLT::scalar(SizeInBits: 32);
6154 const LLT S64 = LLT::scalar(SizeInBits: 64);
6155 const LLT P0 = LLT::pointer(AddressSpace: 0, SizeInBits: 64);
6156 // Select the instruction.
6157 switch (IntrinID) {
6158 default:
6159 return false;
6160 case Intrinsic::aarch64_ldxp:
6161 case Intrinsic::aarch64_ldaxp: {
6162 auto NewI = MIB.buildInstr(
6163 Opc: IntrinID == Intrinsic::aarch64_ldxp ? AArch64::LDXPX : AArch64::LDAXPX,
6164 DstOps: {I.getOperand(i: 0).getReg(), I.getOperand(i: 1).getReg()},
6165 SrcOps: {I.getOperand(i: 3)});
6166 NewI.cloneMemRefs(OtherMI: I);
6167 constrainSelectedInstRegOperands(I&: *NewI, TII, TRI, RBI);
6168 break;
6169 }
6170 case Intrinsic::aarch64_neon_ld1x2: {
6171 LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg());
6172 unsigned Opc = 0;
6173 if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8))
6174 Opc = AArch64::LD1Twov8b;
6175 else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8))
6176 Opc = AArch64::LD1Twov16b;
6177 else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16))
6178 Opc = AArch64::LD1Twov4h;
6179 else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16))
6180 Opc = AArch64::LD1Twov8h;
6181 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32))
6182 Opc = AArch64::LD1Twov2s;
6183 else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32))
6184 Opc = AArch64::LD1Twov4s;
6185 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0))
6186 Opc = AArch64::LD1Twov2d;
6187 else if (Ty == S64 || Ty == P0)
6188 Opc = AArch64::LD1Twov1d;
6189 else
6190 llvm_unreachable("Unexpected type for ld1x2!");
6191 selectVectorLoadIntrinsic(Opc, NumVecs: 2, I);
6192 break;
6193 }
6194 case Intrinsic::aarch64_neon_ld1x3: {
6195 LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg());
6196 unsigned Opc = 0;
6197 if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8))
6198 Opc = AArch64::LD1Threev8b;
6199 else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8))
6200 Opc = AArch64::LD1Threev16b;
6201 else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16))
6202 Opc = AArch64::LD1Threev4h;
6203 else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16))
6204 Opc = AArch64::LD1Threev8h;
6205 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32))
6206 Opc = AArch64::LD1Threev2s;
6207 else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32))
6208 Opc = AArch64::LD1Threev4s;
6209 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0))
6210 Opc = AArch64::LD1Threev2d;
6211 else if (Ty == S64 || Ty == P0)
6212 Opc = AArch64::LD1Threev1d;
6213 else
6214 llvm_unreachable("Unexpected type for ld1x3!");
6215 selectVectorLoadIntrinsic(Opc, NumVecs: 3, I);
6216 break;
6217 }
6218 case Intrinsic::aarch64_neon_ld1x4: {
6219 LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg());
6220 unsigned Opc = 0;
6221 if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8))
6222 Opc = AArch64::LD1Fourv8b;
6223 else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8))
6224 Opc = AArch64::LD1Fourv16b;
6225 else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16))
6226 Opc = AArch64::LD1Fourv4h;
6227 else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16))
6228 Opc = AArch64::LD1Fourv8h;
6229 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32))
6230 Opc = AArch64::LD1Fourv2s;
6231 else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32))
6232 Opc = AArch64::LD1Fourv4s;
6233 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0))
6234 Opc = AArch64::LD1Fourv2d;
6235 else if (Ty == S64 || Ty == P0)
6236 Opc = AArch64::LD1Fourv1d;
6237 else
6238 llvm_unreachable("Unexpected type for ld1x4!");
6239 selectVectorLoadIntrinsic(Opc, NumVecs: 4, I);
6240 break;
6241 }
6242 case Intrinsic::aarch64_neon_ld2: {
6243 LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg());
6244 unsigned Opc = 0;
6245 if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8))
6246 Opc = AArch64::LD2Twov8b;
6247 else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8))
6248 Opc = AArch64::LD2Twov16b;
6249 else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16))
6250 Opc = AArch64::LD2Twov4h;
6251 else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16))
6252 Opc = AArch64::LD2Twov8h;
6253 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32))
6254 Opc = AArch64::LD2Twov2s;
6255 else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32))
6256 Opc = AArch64::LD2Twov4s;
6257 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0))
6258 Opc = AArch64::LD2Twov2d;
6259 else if (Ty == S64 || Ty == P0)
6260 Opc = AArch64::LD1Twov1d;
6261 else
6262 llvm_unreachable("Unexpected type for ld2!");
6263 selectVectorLoadIntrinsic(Opc, NumVecs: 2, I);
6264 break;
6265 }
6266 case Intrinsic::aarch64_neon_ld2lane: {
6267 LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg());
6268 unsigned Opc;
6269 if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8) || Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8))
6270 Opc = AArch64::LD2i8;
6271 else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16) || Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16))
6272 Opc = AArch64::LD2i16;
6273 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32) || Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32))
6274 Opc = AArch64::LD2i32;
6275 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) ||
6276 Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0) || Ty == S64 || Ty == P0)
6277 Opc = AArch64::LD2i64;
6278 else
6279 llvm_unreachable("Unexpected type for st2lane!");
6280 if (!selectVectorLoadLaneIntrinsic(Opc, NumVecs: 2, I))
6281 return false;
6282 break;
6283 }
6284 case Intrinsic::aarch64_neon_ld2r: {
6285 LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg());
6286 unsigned Opc = 0;
6287 if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8))
6288 Opc = AArch64::LD2Rv8b;
6289 else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8))
6290 Opc = AArch64::LD2Rv16b;
6291 else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16))
6292 Opc = AArch64::LD2Rv4h;
6293 else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16))
6294 Opc = AArch64::LD2Rv8h;
6295 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32))
6296 Opc = AArch64::LD2Rv2s;
6297 else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32))
6298 Opc = AArch64::LD2Rv4s;
6299 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0))
6300 Opc = AArch64::LD2Rv2d;
6301 else if (Ty == S64 || Ty == P0)
6302 Opc = AArch64::LD2Rv1d;
6303 else
6304 llvm_unreachable("Unexpected type for ld2r!");
6305 selectVectorLoadIntrinsic(Opc, NumVecs: 2, I);
6306 break;
6307 }
6308 case Intrinsic::aarch64_neon_ld3: {
6309 LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg());
6310 unsigned Opc = 0;
6311 if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8))
6312 Opc = AArch64::LD3Threev8b;
6313 else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8))
6314 Opc = AArch64::LD3Threev16b;
6315 else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16))
6316 Opc = AArch64::LD3Threev4h;
6317 else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16))
6318 Opc = AArch64::LD3Threev8h;
6319 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32))
6320 Opc = AArch64::LD3Threev2s;
6321 else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32))
6322 Opc = AArch64::LD3Threev4s;
6323 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0))
6324 Opc = AArch64::LD3Threev2d;
6325 else if (Ty == S64 || Ty == P0)
6326 Opc = AArch64::LD1Threev1d;
6327 else
6328 llvm_unreachable("Unexpected type for ld3!");
6329 selectVectorLoadIntrinsic(Opc, NumVecs: 3, I);
6330 break;
6331 }
6332 case Intrinsic::aarch64_neon_ld3lane: {
6333 LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg());
6334 unsigned Opc;
6335 if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8) || Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8))
6336 Opc = AArch64::LD3i8;
6337 else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16) || Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16))
6338 Opc = AArch64::LD3i16;
6339 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32) || Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32))
6340 Opc = AArch64::LD3i32;
6341 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) ||
6342 Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0) || Ty == S64 || Ty == P0)
6343 Opc = AArch64::LD3i64;
6344 else
6345 llvm_unreachable("Unexpected type for st3lane!");
6346 if (!selectVectorLoadLaneIntrinsic(Opc, NumVecs: 3, I))
6347 return false;
6348 break;
6349 }
6350 case Intrinsic::aarch64_neon_ld3r: {
6351 LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg());
6352 unsigned Opc = 0;
6353 if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8))
6354 Opc = AArch64::LD3Rv8b;
6355 else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8))
6356 Opc = AArch64::LD3Rv16b;
6357 else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16))
6358 Opc = AArch64::LD3Rv4h;
6359 else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16))
6360 Opc = AArch64::LD3Rv8h;
6361 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32))
6362 Opc = AArch64::LD3Rv2s;
6363 else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32))
6364 Opc = AArch64::LD3Rv4s;
6365 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0))
6366 Opc = AArch64::LD3Rv2d;
6367 else if (Ty == S64 || Ty == P0)
6368 Opc = AArch64::LD3Rv1d;
6369 else
6370 llvm_unreachable("Unexpected type for ld3r!");
6371 selectVectorLoadIntrinsic(Opc, NumVecs: 3, I);
6372 break;
6373 }
6374 case Intrinsic::aarch64_neon_ld4: {
6375 LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg());
6376 unsigned Opc = 0;
6377 if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8))
6378 Opc = AArch64::LD4Fourv8b;
6379 else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8))
6380 Opc = AArch64::LD4Fourv16b;
6381 else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16))
6382 Opc = AArch64::LD4Fourv4h;
6383 else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16))
6384 Opc = AArch64::LD4Fourv8h;
6385 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32))
6386 Opc = AArch64::LD4Fourv2s;
6387 else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32))
6388 Opc = AArch64::LD4Fourv4s;
6389 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0))
6390 Opc = AArch64::LD4Fourv2d;
6391 else if (Ty == S64 || Ty == P0)
6392 Opc = AArch64::LD1Fourv1d;
6393 else
6394 llvm_unreachable("Unexpected type for ld4!");
6395 selectVectorLoadIntrinsic(Opc, NumVecs: 4, I);
6396 break;
6397 }
6398 case Intrinsic::aarch64_neon_ld4lane: {
6399 LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg());
6400 unsigned Opc;
6401 if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8) || Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8))
6402 Opc = AArch64::LD4i8;
6403 else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16) || Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16))
6404 Opc = AArch64::LD4i16;
6405 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32) || Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32))
6406 Opc = AArch64::LD4i32;
6407 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) ||
6408 Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0) || Ty == S64 || Ty == P0)
6409 Opc = AArch64::LD4i64;
6410 else
6411 llvm_unreachable("Unexpected type for st4lane!");
6412 if (!selectVectorLoadLaneIntrinsic(Opc, NumVecs: 4, I))
6413 return false;
6414 break;
6415 }
6416 case Intrinsic::aarch64_neon_ld4r: {
6417 LLT Ty = MRI.getType(Reg: I.getOperand(i: 0).getReg());
6418 unsigned Opc = 0;
6419 if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8))
6420 Opc = AArch64::LD4Rv8b;
6421 else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8))
6422 Opc = AArch64::LD4Rv16b;
6423 else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16))
6424 Opc = AArch64::LD4Rv4h;
6425 else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16))
6426 Opc = AArch64::LD4Rv8h;
6427 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32))
6428 Opc = AArch64::LD4Rv2s;
6429 else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32))
6430 Opc = AArch64::LD4Rv4s;
6431 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0))
6432 Opc = AArch64::LD4Rv2d;
6433 else if (Ty == S64 || Ty == P0)
6434 Opc = AArch64::LD4Rv1d;
6435 else
6436 llvm_unreachable("Unexpected type for ld4r!");
6437 selectVectorLoadIntrinsic(Opc, NumVecs: 4, I);
6438 break;
6439 }
6440 case Intrinsic::aarch64_neon_st1x2: {
6441 LLT Ty = MRI.getType(Reg: I.getOperand(i: 1).getReg());
6442 unsigned Opc;
6443 if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8))
6444 Opc = AArch64::ST1Twov8b;
6445 else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8))
6446 Opc = AArch64::ST1Twov16b;
6447 else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16))
6448 Opc = AArch64::ST1Twov4h;
6449 else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16))
6450 Opc = AArch64::ST1Twov8h;
6451 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32))
6452 Opc = AArch64::ST1Twov2s;
6453 else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32))
6454 Opc = AArch64::ST1Twov4s;
6455 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0))
6456 Opc = AArch64::ST1Twov2d;
6457 else if (Ty == S64 || Ty == P0)
6458 Opc = AArch64::ST1Twov1d;
6459 else
6460 llvm_unreachable("Unexpected type for st1x2!");
6461 selectVectorStoreIntrinsic(I, NumVecs: 2, Opc);
6462 break;
6463 }
6464 case Intrinsic::aarch64_neon_st1x3: {
6465 LLT Ty = MRI.getType(Reg: I.getOperand(i: 1).getReg());
6466 unsigned Opc;
6467 if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8))
6468 Opc = AArch64::ST1Threev8b;
6469 else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8))
6470 Opc = AArch64::ST1Threev16b;
6471 else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16))
6472 Opc = AArch64::ST1Threev4h;
6473 else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16))
6474 Opc = AArch64::ST1Threev8h;
6475 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32))
6476 Opc = AArch64::ST1Threev2s;
6477 else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32))
6478 Opc = AArch64::ST1Threev4s;
6479 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0))
6480 Opc = AArch64::ST1Threev2d;
6481 else if (Ty == S64 || Ty == P0)
6482 Opc = AArch64::ST1Threev1d;
6483 else
6484 llvm_unreachable("Unexpected type for st1x3!");
6485 selectVectorStoreIntrinsic(I, NumVecs: 3, Opc);
6486 break;
6487 }
6488 case Intrinsic::aarch64_neon_st1x4: {
6489 LLT Ty = MRI.getType(Reg: I.getOperand(i: 1).getReg());
6490 unsigned Opc;
6491 if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8))
6492 Opc = AArch64::ST1Fourv8b;
6493 else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8))
6494 Opc = AArch64::ST1Fourv16b;
6495 else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16))
6496 Opc = AArch64::ST1Fourv4h;
6497 else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16))
6498 Opc = AArch64::ST1Fourv8h;
6499 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32))
6500 Opc = AArch64::ST1Fourv2s;
6501 else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32))
6502 Opc = AArch64::ST1Fourv4s;
6503 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0))
6504 Opc = AArch64::ST1Fourv2d;
6505 else if (Ty == S64 || Ty == P0)
6506 Opc = AArch64::ST1Fourv1d;
6507 else
6508 llvm_unreachable("Unexpected type for st1x4!");
6509 selectVectorStoreIntrinsic(I, NumVecs: 4, Opc);
6510 break;
6511 }
6512 case Intrinsic::aarch64_neon_st2: {
6513 LLT Ty = MRI.getType(Reg: I.getOperand(i: 1).getReg());
6514 unsigned Opc;
6515 if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8))
6516 Opc = AArch64::ST2Twov8b;
6517 else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8))
6518 Opc = AArch64::ST2Twov16b;
6519 else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16))
6520 Opc = AArch64::ST2Twov4h;
6521 else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16))
6522 Opc = AArch64::ST2Twov8h;
6523 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32))
6524 Opc = AArch64::ST2Twov2s;
6525 else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32))
6526 Opc = AArch64::ST2Twov4s;
6527 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0))
6528 Opc = AArch64::ST2Twov2d;
6529 else if (Ty == S64 || Ty == P0)
6530 Opc = AArch64::ST1Twov1d;
6531 else
6532 llvm_unreachable("Unexpected type for st2!");
6533 selectVectorStoreIntrinsic(I, NumVecs: 2, Opc);
6534 break;
6535 }
6536 case Intrinsic::aarch64_neon_st3: {
6537 LLT Ty = MRI.getType(Reg: I.getOperand(i: 1).getReg());
6538 unsigned Opc;
6539 if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8))
6540 Opc = AArch64::ST3Threev8b;
6541 else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8))
6542 Opc = AArch64::ST3Threev16b;
6543 else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16))
6544 Opc = AArch64::ST3Threev4h;
6545 else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16))
6546 Opc = AArch64::ST3Threev8h;
6547 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32))
6548 Opc = AArch64::ST3Threev2s;
6549 else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32))
6550 Opc = AArch64::ST3Threev4s;
6551 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0))
6552 Opc = AArch64::ST3Threev2d;
6553 else if (Ty == S64 || Ty == P0)
6554 Opc = AArch64::ST1Threev1d;
6555 else
6556 llvm_unreachable("Unexpected type for st3!");
6557 selectVectorStoreIntrinsic(I, NumVecs: 3, Opc);
6558 break;
6559 }
6560 case Intrinsic::aarch64_neon_st4: {
6561 LLT Ty = MRI.getType(Reg: I.getOperand(i: 1).getReg());
6562 unsigned Opc;
6563 if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8))
6564 Opc = AArch64::ST4Fourv8b;
6565 else if (Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8))
6566 Opc = AArch64::ST4Fourv16b;
6567 else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16))
6568 Opc = AArch64::ST4Fourv4h;
6569 else if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16))
6570 Opc = AArch64::ST4Fourv8h;
6571 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32))
6572 Opc = AArch64::ST4Fourv2s;
6573 else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32))
6574 Opc = AArch64::ST4Fourv4s;
6575 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) || Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0))
6576 Opc = AArch64::ST4Fourv2d;
6577 else if (Ty == S64 || Ty == P0)
6578 Opc = AArch64::ST1Fourv1d;
6579 else
6580 llvm_unreachable("Unexpected type for st4!");
6581 selectVectorStoreIntrinsic(I, NumVecs: 4, Opc);
6582 break;
6583 }
6584 case Intrinsic::aarch64_neon_st2lane: {
6585 LLT Ty = MRI.getType(Reg: I.getOperand(i: 1).getReg());
6586 unsigned Opc;
6587 if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8) || Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8))
6588 Opc = AArch64::ST2i8;
6589 else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16) || Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16))
6590 Opc = AArch64::ST2i16;
6591 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32) || Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32))
6592 Opc = AArch64::ST2i32;
6593 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) ||
6594 Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0) || Ty == S64 || Ty == P0)
6595 Opc = AArch64::ST2i64;
6596 else
6597 llvm_unreachable("Unexpected type for st2lane!");
6598 if (!selectVectorStoreLaneIntrinsic(I, NumVecs: 2, Opc))
6599 return false;
6600 break;
6601 }
6602 case Intrinsic::aarch64_neon_st3lane: {
6603 LLT Ty = MRI.getType(Reg: I.getOperand(i: 1).getReg());
6604 unsigned Opc;
6605 if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8) || Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8))
6606 Opc = AArch64::ST3i8;
6607 else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16) || Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16))
6608 Opc = AArch64::ST3i16;
6609 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32) || Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32))
6610 Opc = AArch64::ST3i32;
6611 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) ||
6612 Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0) || Ty == S64 || Ty == P0)
6613 Opc = AArch64::ST3i64;
6614 else
6615 llvm_unreachable("Unexpected type for st3lane!");
6616 if (!selectVectorStoreLaneIntrinsic(I, NumVecs: 3, Opc))
6617 return false;
6618 break;
6619 }
6620 case Intrinsic::aarch64_neon_st4lane: {
6621 LLT Ty = MRI.getType(Reg: I.getOperand(i: 1).getReg());
6622 unsigned Opc;
6623 if (Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S8) || Ty == LLT::fixed_vector(NumElements: 16, ScalarTy: S8))
6624 Opc = AArch64::ST4i8;
6625 else if (Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S16) || Ty == LLT::fixed_vector(NumElements: 8, ScalarTy: S16))
6626 Opc = AArch64::ST4i16;
6627 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S32) || Ty == LLT::fixed_vector(NumElements: 4, ScalarTy: S32))
6628 Opc = AArch64::ST4i32;
6629 else if (Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: S64) ||
6630 Ty == LLT::fixed_vector(NumElements: 2, ScalarTy: P0) || Ty == S64 || Ty == P0)
6631 Opc = AArch64::ST4i64;
6632 else
6633 llvm_unreachable("Unexpected type for st4lane!");
6634 if (!selectVectorStoreLaneIntrinsic(I, NumVecs: 4, Opc))
6635 return false;
6636 break;
6637 }
6638 case Intrinsic::aarch64_mops_memset_tag: {
6639 // Transform
6640 // %dst:gpr(p0) = \
6641 // G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.mops.memset.tag),
6642 // \ %dst:gpr(p0), %val:gpr(s64), %n:gpr(s64)
6643 // where %dst is updated, into
6644 // %Rd:GPR64common, %Rn:GPR64) = \
6645 // MOPSMemorySetTaggingPseudo \
6646 // %Rd:GPR64common, %Rn:GPR64, %Rm:GPR64
6647 // where Rd and Rn are tied.
6648 // It is expected that %val has been extended to s64 in legalization.
6649 // Note that the order of the size/value operands are swapped.
6650
6651 Register DstDef = I.getOperand(i: 0).getReg();
6652 // I.getOperand(1) is the intrinsic function
6653 Register DstUse = I.getOperand(i: 2).getReg();
6654 Register ValUse = I.getOperand(i: 3).getReg();
6655 Register SizeUse = I.getOperand(i: 4).getReg();
6656
6657 // MOPSMemorySetTaggingPseudo has two defs; the intrinsic call has only one.
6658 // Therefore an additional virtual register is required for the updated size
6659 // operand. This value is not accessible via the semantics of the intrinsic.
6660 Register SizeDef = MRI.createGenericVirtualRegister(Ty: LLT::scalar(SizeInBits: 64));
6661
6662 auto Memset = MIB.buildInstr(Opc: AArch64::MOPSMemorySetTaggingPseudo,
6663 DstOps: {DstDef, SizeDef}, SrcOps: {DstUse, SizeUse, ValUse});
6664 Memset.cloneMemRefs(OtherMI: I);
6665 constrainSelectedInstRegOperands(I&: *Memset, TII, TRI, RBI);
6666 break;
6667 }
6668 }
6669
6670 I.eraseFromParent();
6671 return true;
6672}
6673
6674bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
6675 MachineRegisterInfo &MRI) {
6676 unsigned IntrinID = cast<GIntrinsic>(Val&: I).getIntrinsicID();
6677
6678 switch (IntrinID) {
6679 default:
6680 break;
6681 case Intrinsic::aarch64_crypto_sha1h: {
6682 Register DstReg = I.getOperand(i: 0).getReg();
6683 Register SrcReg = I.getOperand(i: 2).getReg();
6684
6685 // FIXME: Should this be an assert?
6686 if (MRI.getType(Reg: DstReg).getSizeInBits() != 32 ||
6687 MRI.getType(Reg: SrcReg).getSizeInBits() != 32)
6688 return false;
6689
6690 // The operation has to happen on FPRs. Set up some new FPR registers for
6691 // the source and destination if they are on GPRs.
6692 if (RBI.getRegBank(Reg: SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
6693 SrcReg = MRI.createVirtualRegister(RegClass: &AArch64::FPR32RegClass);
6694 MIB.buildCopy(Res: {SrcReg}, Op: {I.getOperand(i: 2)});
6695
6696 // Make sure the copy ends up getting constrained properly.
6697 RBI.constrainGenericRegister(Reg: I.getOperand(i: 2).getReg(),
6698 RC: AArch64::GPR32RegClass, MRI);
6699 }
6700
6701 if (RBI.getRegBank(Reg: DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID)
6702 DstReg = MRI.createVirtualRegister(RegClass: &AArch64::FPR32RegClass);
6703
6704 // Actually insert the instruction.
6705 auto SHA1Inst = MIB.buildInstr(Opc: AArch64::SHA1Hrr, DstOps: {DstReg}, SrcOps: {SrcReg});
6706 constrainSelectedInstRegOperands(I&: *SHA1Inst, TII, TRI, RBI);
6707
6708 // Did we create a new register for the destination?
6709 if (DstReg != I.getOperand(i: 0).getReg()) {
6710 // Yep. Copy the result of the instruction back into the original
6711 // destination.
6712 MIB.buildCopy(Res: {I.getOperand(i: 0)}, Op: {DstReg});
6713 RBI.constrainGenericRegister(Reg: I.getOperand(i: 0).getReg(),
6714 RC: AArch64::GPR32RegClass, MRI);
6715 }
6716
6717 I.eraseFromParent();
6718 return true;
6719 }
6720 case Intrinsic::ptrauth_resign: {
6721 Register DstReg = I.getOperand(i: 0).getReg();
6722 Register ValReg = I.getOperand(i: 2).getReg();
6723 uint64_t AUTKey = I.getOperand(i: 3).getImm();
6724 Register AUTDisc = I.getOperand(i: 4).getReg();
6725 uint64_t PACKey = I.getOperand(i: 5).getImm();
6726 Register PACDisc = I.getOperand(i: 6).getReg();
6727
6728 Register AUTAddrDisc = AUTDisc;
6729 uint16_t AUTConstDiscC = 0;
6730 std::tie(args&: AUTConstDiscC, args&: AUTAddrDisc) =
6731 extractPtrauthBlendDiscriminators(Disc: AUTDisc, MRI);
6732
6733 Register PACAddrDisc = PACDisc;
6734 uint16_t PACConstDiscC = 0;
6735 std::tie(args&: PACConstDiscC, args&: PACAddrDisc) =
6736 extractPtrauthBlendDiscriminators(Disc: PACDisc, MRI);
6737
6738 MIB.buildCopy(Res: {AArch64::X16}, Op: {ValReg});
6739 MIB.buildInstr(Opc: TargetOpcode::IMPLICIT_DEF, DstOps: {AArch64::X17}, SrcOps: {});
6740 MIB.buildInstr(Opcode: AArch64::AUTPAC)
6741 .addImm(Val: AUTKey)
6742 .addImm(Val: AUTConstDiscC)
6743 .addUse(RegNo: AUTAddrDisc)
6744 .addImm(Val: PACKey)
6745 .addImm(Val: PACConstDiscC)
6746 .addUse(RegNo: PACAddrDisc)
6747 .constrainAllUses(TII, TRI, RBI);
6748 MIB.buildCopy(Res: {DstReg}, Op: Register(AArch64::X16));
6749
6750 RBI.constrainGenericRegister(Reg: DstReg, RC: AArch64::GPR64RegClass, MRI);
6751 I.eraseFromParent();
6752 return true;
6753 }
6754 case Intrinsic::ptrauth_auth: {
6755 Register DstReg = I.getOperand(i: 0).getReg();
6756 Register ValReg = I.getOperand(i: 2).getReg();
6757 uint64_t AUTKey = I.getOperand(i: 3).getImm();
6758 Register AUTDisc = I.getOperand(i: 4).getReg();
6759
6760 Register AUTAddrDisc = AUTDisc;
6761 uint16_t AUTConstDiscC = 0;
6762 std::tie(args&: AUTConstDiscC, args&: AUTAddrDisc) =
6763 extractPtrauthBlendDiscriminators(Disc: AUTDisc, MRI);
6764
6765 MIB.buildCopy(Res: {AArch64::X16}, Op: {ValReg});
6766 MIB.buildInstr(Opc: TargetOpcode::IMPLICIT_DEF, DstOps: {AArch64::X17}, SrcOps: {});
6767 MIB.buildInstr(Opcode: AArch64::AUT)
6768 .addImm(Val: AUTKey)
6769 .addImm(Val: AUTConstDiscC)
6770 .addUse(RegNo: AUTAddrDisc)
6771 .constrainAllUses(TII, TRI, RBI);
6772 MIB.buildCopy(Res: {DstReg}, Op: Register(AArch64::X16));
6773
6774 RBI.constrainGenericRegister(Reg: DstReg, RC: AArch64::GPR64RegClass, MRI);
6775 I.eraseFromParent();
6776 return true;
6777 }
6778 case Intrinsic::frameaddress:
6779 case Intrinsic::returnaddress: {
6780 MachineFunction &MF = *I.getParent()->getParent();
6781 MachineFrameInfo &MFI = MF.getFrameInfo();
6782
6783 unsigned Depth = I.getOperand(i: 2).getImm();
6784 Register DstReg = I.getOperand(i: 0).getReg();
6785 RBI.constrainGenericRegister(Reg: DstReg, RC: AArch64::GPR64RegClass, MRI);
6786
6787 if (Depth == 0 && IntrinID == Intrinsic::returnaddress) {
6788 if (!MFReturnAddr) {
6789 // Insert the copy from LR/X30 into the entry block, before it can be
6790 // clobbered by anything.
6791 MFI.setReturnAddressIsTaken(true);
6792 MFReturnAddr = getFunctionLiveInPhysReg(
6793 MF, TII, PhysReg: AArch64::LR, RC: AArch64::GPR64RegClass, DL: I.getDebugLoc());
6794 }
6795
6796 if (STI.hasPAuth()) {
6797 MIB.buildInstr(Opc: AArch64::XPACI, DstOps: {DstReg}, SrcOps: {MFReturnAddr});
6798 } else {
6799 MIB.buildCopy(Res: {Register(AArch64::LR)}, Op: {MFReturnAddr});
6800 MIB.buildInstr(Opcode: AArch64::XPACLRI);
6801 MIB.buildCopy(Res: {DstReg}, Op: {Register(AArch64::LR)});
6802 }
6803
6804 I.eraseFromParent();
6805 return true;
6806 }
6807
6808 MFI.setFrameAddressIsTaken(true);
6809 Register FrameAddr(AArch64::FP);
6810 while (Depth--) {
6811 Register NextFrame = MRI.createVirtualRegister(RegClass: &AArch64::GPR64spRegClass);
6812 auto Ldr =
6813 MIB.buildInstr(Opc: AArch64::LDRXui, DstOps: {NextFrame}, SrcOps: {FrameAddr}).addImm(Val: 0);
6814 constrainSelectedInstRegOperands(I&: *Ldr, TII, TRI, RBI);
6815 FrameAddr = NextFrame;
6816 }
6817
6818 if (IntrinID == Intrinsic::frameaddress)
6819 MIB.buildCopy(Res: {DstReg}, Op: {FrameAddr});
6820 else {
6821 MFI.setReturnAddressIsTaken(true);
6822
6823 if (STI.hasPAuth()) {
6824 Register TmpReg = MRI.createVirtualRegister(RegClass: &AArch64::GPR64RegClass);
6825 MIB.buildInstr(Opc: AArch64::LDRXui, DstOps: {TmpReg}, SrcOps: {FrameAddr}).addImm(Val: 1);
6826 MIB.buildInstr(Opc: AArch64::XPACI, DstOps: {DstReg}, SrcOps: {TmpReg});
6827 } else {
6828 MIB.buildInstr(Opc: AArch64::LDRXui, DstOps: {Register(AArch64::LR)}, SrcOps: {FrameAddr})
6829 .addImm(Val: 1);
6830 MIB.buildInstr(Opcode: AArch64::XPACLRI);
6831 MIB.buildCopy(Res: {DstReg}, Op: {Register(AArch64::LR)});
6832 }
6833 }
6834
6835 I.eraseFromParent();
6836 return true;
6837 }
6838 case Intrinsic::aarch64_neon_tbl2:
6839 SelectTable(I, MRI, NumVecs: 2, Opc1: AArch64::TBLv8i8Two, Opc2: AArch64::TBLv16i8Two, isExt: false);
6840 return true;
6841 case Intrinsic::aarch64_neon_tbl3:
6842 SelectTable(I, MRI, NumVecs: 3, Opc1: AArch64::TBLv8i8Three, Opc2: AArch64::TBLv16i8Three,
6843 isExt: false);
6844 return true;
6845 case Intrinsic::aarch64_neon_tbl4:
6846 SelectTable(I, MRI, NumVecs: 4, Opc1: AArch64::TBLv8i8Four, Opc2: AArch64::TBLv16i8Four, isExt: false);
6847 return true;
6848 case Intrinsic::aarch64_neon_tbx2:
6849 SelectTable(I, MRI, NumVecs: 2, Opc1: AArch64::TBXv8i8Two, Opc2: AArch64::TBXv16i8Two, isExt: true);
6850 return true;
6851 case Intrinsic::aarch64_neon_tbx3:
6852 SelectTable(I, MRI, NumVecs: 3, Opc1: AArch64::TBXv8i8Three, Opc2: AArch64::TBXv16i8Three, isExt: true);
6853 return true;
6854 case Intrinsic::aarch64_neon_tbx4:
6855 SelectTable(I, MRI, NumVecs: 4, Opc1: AArch64::TBXv8i8Four, Opc2: AArch64::TBXv16i8Four, isExt: true);
6856 return true;
6857 case Intrinsic::swift_async_context_addr:
6858 auto Sub = MIB.buildInstr(Opc: AArch64::SUBXri, DstOps: {I.getOperand(i: 0).getReg()},
6859 SrcOps: {Register(AArch64::FP)})
6860 .addImm(Val: 8)
6861 .addImm(Val: 0);
6862 constrainSelectedInstRegOperands(I&: *Sub, TII, TRI, RBI);
6863
6864 MF->getFrameInfo().setFrameAddressIsTaken(true);
6865 MF->getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
6866 I.eraseFromParent();
6867 return true;
6868 }
6869 return false;
6870}
6871
6872// G_PTRAUTH_GLOBAL_VALUE lowering
6873//
6874// We have 3 lowering alternatives to choose from:
6875// - MOVaddrPAC: similar to MOVaddr, with added PAC.
6876// If the GV doesn't need a GOT load (i.e., is locally defined)
6877// materialize the pointer using adrp+add+pac. See LowerMOVaddrPAC.
6878//
6879// - LOADgotPAC: similar to LOADgot, with added PAC.
6880// If the GV needs a GOT load, materialize the pointer using the usual
6881// GOT adrp+ldr, +pac. Pointers in GOT are assumed to be not signed, the GOT
6882// section is assumed to be read-only (for example, via relro mechanism). See
6883// LowerMOVaddrPAC.
6884//
6885// - LOADauthptrstatic: similar to LOADgot, but use a
6886// special stub slot instead of a GOT slot.
6887// Load a signed pointer for symbol 'sym' from a stub slot named
6888// 'sym$auth_ptr$key$disc' filled by dynamic linker during relocation
6889// resolving. This usually lowers to adrp+ldr, but also emits an entry into
6890// .data with an
6891// @AUTH relocation. See LowerLOADauthptrstatic.
6892//
6893// All 3 are pseudos that are expand late to longer sequences: this lets us
6894// provide integrity guarantees on the to-be-signed intermediate values.
6895//
6896// LOADauthptrstatic is undesirable because it requires a large section filled
6897// with often similarly-signed pointers, making it a good harvesting target.
6898// Thus, it's only used for ptrauth references to extern_weak to avoid null
6899// checks.
6900
6901bool AArch64InstructionSelector::selectPtrAuthGlobalValue(
6902 MachineInstr &I, MachineRegisterInfo &MRI) const {
6903 Register DefReg = I.getOperand(i: 0).getReg();
6904 Register Addr = I.getOperand(i: 1).getReg();
6905 uint64_t Key = I.getOperand(i: 2).getImm();
6906 Register AddrDisc = I.getOperand(i: 3).getReg();
6907 uint64_t Disc = I.getOperand(i: 4).getImm();
6908 int64_t Offset = 0;
6909
6910 if (Key > AArch64PACKey::LAST)
6911 report_fatal_error(reason: "key in ptrauth global out of range [0, " +
6912 Twine((int)AArch64PACKey::LAST) + "]");
6913
6914 // Blend only works if the integer discriminator is 16-bit wide.
6915 if (!isUInt<16>(x: Disc))
6916 report_fatal_error(
6917 reason: "constant discriminator in ptrauth global out of range [0, 0xffff]");
6918
6919 // Choosing between 3 lowering alternatives is target-specific.
6920 if (!STI.isTargetELF() && !STI.isTargetMachO())
6921 report_fatal_error(reason: "ptrauth global lowering only supported on MachO/ELF");
6922
6923 if (!MRI.hasOneDef(RegNo: Addr))
6924 return false;
6925
6926 // First match any offset we take from the real global.
6927 const MachineInstr *DefMI = &*MRI.def_instr_begin(RegNo: Addr);
6928 if (DefMI->getOpcode() == TargetOpcode::G_PTR_ADD) {
6929 Register OffsetReg = DefMI->getOperand(i: 2).getReg();
6930 if (!MRI.hasOneDef(RegNo: OffsetReg))
6931 return false;
6932 const MachineInstr &OffsetMI = *MRI.def_instr_begin(RegNo: OffsetReg);
6933 if (OffsetMI.getOpcode() != TargetOpcode::G_CONSTANT)
6934 return false;
6935
6936 Addr = DefMI->getOperand(i: 1).getReg();
6937 if (!MRI.hasOneDef(RegNo: Addr))
6938 return false;
6939
6940 DefMI = &*MRI.def_instr_begin(RegNo: Addr);
6941 Offset = OffsetMI.getOperand(i: 1).getCImm()->getSExtValue();
6942 }
6943
6944 // We should be left with a genuine unauthenticated GlobalValue.
6945 const GlobalValue *GV;
6946 if (DefMI->getOpcode() == TargetOpcode::G_GLOBAL_VALUE) {
6947 GV = DefMI->getOperand(i: 1).getGlobal();
6948 Offset += DefMI->getOperand(i: 1).getOffset();
6949 } else if (DefMI->getOpcode() == AArch64::G_ADD_LOW) {
6950 GV = DefMI->getOperand(i: 2).getGlobal();
6951 Offset += DefMI->getOperand(i: 2).getOffset();
6952 } else {
6953 return false;
6954 }
6955
6956 MachineIRBuilder MIB(I);
6957
6958 // Classify the reference to determine whether it needs a GOT load.
6959 unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM);
6960 const bool NeedsGOTLoad = ((OpFlags & AArch64II::MO_GOT) != 0);
6961 assert(((OpFlags & (~AArch64II::MO_GOT)) == 0) &&
6962 "unsupported non-GOT op flags on ptrauth global reference");
6963 assert((!GV->hasExternalWeakLinkage() || NeedsGOTLoad) &&
6964 "unsupported non-GOT reference to weak ptrauth global");
6965
6966 std::optional<APInt> AddrDiscVal = getIConstantVRegVal(VReg: AddrDisc, MRI);
6967 bool HasAddrDisc = !AddrDiscVal || *AddrDiscVal != 0;
6968
6969 // Non-extern_weak:
6970 // - No GOT load needed -> MOVaddrPAC
6971 // - GOT load for non-extern_weak -> LOADgotPAC
6972 // Note that we disallow extern_weak refs to avoid null checks later.
6973 if (!GV->hasExternalWeakLinkage()) {
6974 MIB.buildInstr(Opc: TargetOpcode::IMPLICIT_DEF, DstOps: {AArch64::X16}, SrcOps: {});
6975 MIB.buildInstr(Opc: TargetOpcode::IMPLICIT_DEF, DstOps: {AArch64::X17}, SrcOps: {});
6976 MIB.buildInstr(Opcode: NeedsGOTLoad ? AArch64::LOADgotPAC : AArch64::MOVaddrPAC)
6977 .addGlobalAddress(GV, Offset)
6978 .addImm(Val: Key)
6979 .addReg(RegNo: HasAddrDisc ? AddrDisc : AArch64::XZR)
6980 .addImm(Val: Disc)
6981 .constrainAllUses(TII, TRI, RBI);
6982 MIB.buildCopy(Res: DefReg, Op: Register(AArch64::X16));
6983 RBI.constrainGenericRegister(Reg: DefReg, RC: AArch64::GPR64RegClass, MRI);
6984 I.eraseFromParent();
6985 return true;
6986 }
6987
6988 // extern_weak -> LOADauthptrstatic
6989
6990 // Offsets and extern_weak don't mix well: ptrauth aside, you'd get the
6991 // offset alone as a pointer if the symbol wasn't available, which would
6992 // probably break null checks in users. Ptrauth complicates things further:
6993 // error out.
6994 if (Offset != 0)
6995 report_fatal_error(
6996 reason: "unsupported non-zero offset in weak ptrauth global reference");
6997
6998 if (HasAddrDisc)
6999 report_fatal_error(reason: "unsupported weak addr-div ptrauth global");
7000
7001 MIB.buildInstr(Opc: AArch64::LOADauthptrstatic, DstOps: {DefReg}, SrcOps: {})
7002 .addGlobalAddress(GV, Offset)
7003 .addImm(Val: Key)
7004 .addImm(Val: Disc);
7005 RBI.constrainGenericRegister(Reg: DefReg, RC: AArch64::GPR64RegClass, MRI);
7006
7007 I.eraseFromParent();
7008 return true;
7009}
7010
7011void AArch64InstructionSelector::SelectTable(MachineInstr &I,
7012 MachineRegisterInfo &MRI,
7013 unsigned NumVec, unsigned Opc1,
7014 unsigned Opc2, bool isExt) {
7015 Register DstReg = I.getOperand(i: 0).getReg();
7016 unsigned Opc = MRI.getType(Reg: DstReg) == LLT::fixed_vector(NumElements: 8, ScalarSizeInBits: 8) ? Opc1 : Opc2;
7017
7018 // Create the REG_SEQUENCE
7019 SmallVector<Register, 4> Regs;
7020 for (unsigned i = 0; i < NumVec; i++)
7021 Regs.push_back(Elt: I.getOperand(i: i + 2 + isExt).getReg());
7022 Register RegSeq = createQTuple(Regs, MIB);
7023
7024 Register IdxReg = I.getOperand(i: 2 + NumVec + isExt).getReg();
7025 MachineInstrBuilder Instr;
7026 if (isExt) {
7027 Register Reg = I.getOperand(i: 2).getReg();
7028 Instr = MIB.buildInstr(Opc, DstOps: {DstReg}, SrcOps: {Reg, RegSeq, IdxReg});
7029 } else
7030 Instr = MIB.buildInstr(Opc, DstOps: {DstReg}, SrcOps: {RegSeq, IdxReg});
7031 constrainSelectedInstRegOperands(I&: *Instr, TII, TRI, RBI);
7032 I.eraseFromParent();
7033}
7034
7035InstructionSelector::ComplexRendererFns
7036AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const {
7037 auto MaybeImmed = getImmedFromMO(Root);
7038 if (MaybeImmed == std::nullopt || *MaybeImmed > 31)
7039 return std::nullopt;
7040 uint64_t Enc = (32 - *MaybeImmed) & 0x1f;
7041 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Val: Enc); }}};
7042}
7043
7044InstructionSelector::ComplexRendererFns
7045AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const {
7046 auto MaybeImmed = getImmedFromMO(Root);
7047 if (MaybeImmed == std::nullopt || *MaybeImmed > 31)
7048 return std::nullopt;
7049 uint64_t Enc = 31 - *MaybeImmed;
7050 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Val: Enc); }}};
7051}
7052
7053InstructionSelector::ComplexRendererFns
7054AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const {
7055 auto MaybeImmed = getImmedFromMO(Root);
7056 if (MaybeImmed == std::nullopt || *MaybeImmed > 63)
7057 return std::nullopt;
7058 uint64_t Enc = (64 - *MaybeImmed) & 0x3f;
7059 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Val: Enc); }}};
7060}
7061
7062InstructionSelector::ComplexRendererFns
7063AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const {
7064 auto MaybeImmed = getImmedFromMO(Root);
7065 if (MaybeImmed == std::nullopt || *MaybeImmed > 63)
7066 return std::nullopt;
7067 uint64_t Enc = 63 - *MaybeImmed;
7068 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Val: Enc); }}};
7069}
7070
7071/// Helper to select an immediate value that can be represented as a 12-bit
7072/// value shifted left by either 0 or 12. If it is possible to do so, return
7073/// the immediate and shift value. If not, return std::nullopt.
7074///
7075/// Used by selectArithImmed and selectNegArithImmed.
7076InstructionSelector::ComplexRendererFns
7077AArch64InstructionSelector::select12BitValueWithLeftShift(
7078 uint64_t Immed) const {
7079 unsigned ShiftAmt;
7080 if (Immed >> 12 == 0) {
7081 ShiftAmt = 0;
7082 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
7083 ShiftAmt = 12;
7084 Immed = Immed >> 12;
7085 } else
7086 return std::nullopt;
7087
7088 unsigned ShVal = AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: ShiftAmt);
7089 return {{
7090 [=](MachineInstrBuilder &MIB) { MIB.addImm(Val: Immed); },
7091 [=](MachineInstrBuilder &MIB) { MIB.addImm(Val: ShVal); },
7092 }};
7093}
7094
7095/// SelectArithImmed - Select an immediate value that can be represented as
7096/// a 12-bit value shifted left by either 0 or 12. If so, return true with
7097/// Val set to the 12-bit value and Shift set to the shifter operand.
7098InstructionSelector::ComplexRendererFns
7099AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
7100 // This function is called from the addsub_shifted_imm ComplexPattern,
7101 // which lists [imm] as the list of opcode it's interested in, however
7102 // we still need to check whether the operand is actually an immediate
7103 // here because the ComplexPattern opcode list is only used in
7104 // root-level opcode matching.
7105 auto MaybeImmed = getImmedFromMO(Root);
7106 if (MaybeImmed == std::nullopt)
7107 return std::nullopt;
7108 return select12BitValueWithLeftShift(Immed: *MaybeImmed);
7109}
7110
7111/// SelectNegArithImmed - As above, but negates the value before trying to
7112/// select it.
7113InstructionSelector::ComplexRendererFns
7114AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const {
7115 // We need a register here, because we need to know if we have a 64 or 32
7116 // bit immediate.
7117 if (!Root.isReg())
7118 return std::nullopt;
7119 auto MaybeImmed = getImmedFromMO(Root);
7120 if (MaybeImmed == std::nullopt)
7121 return std::nullopt;
7122 uint64_t Immed = *MaybeImmed;
7123
7124 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
7125 // have the opposite effect on the C flag, so this pattern mustn't match under
7126 // those circumstances.
7127 if (Immed == 0)
7128 return std::nullopt;
7129
7130 // Check if we're dealing with a 32-bit type on the root or a 64-bit type on
7131 // the root.
7132 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7133 if (MRI.getType(Reg: Root.getReg()).getSizeInBits() == 32)
7134 Immed = ~((uint32_t)Immed) + 1;
7135 else
7136 Immed = ~Immed + 1ULL;
7137
7138 if (Immed & 0xFFFFFFFFFF000000ULL)
7139 return std::nullopt;
7140
7141 Immed &= 0xFFFFFFULL;
7142 return select12BitValueWithLeftShift(Immed);
7143}
7144
7145/// Checks if we are sure that folding MI into load/store addressing mode is
7146/// beneficial or not.
7147///
7148/// Returns:
7149/// - true if folding MI would be beneficial.
7150/// - false if folding MI would be bad.
7151/// - std::nullopt if it is not sure whether folding MI is beneficial.
7152///
7153/// \p MI can be the offset operand of G_PTR_ADD, e.g. G_SHL in the example:
7154///
7155/// %13:gpr(s64) = G_CONSTANT i64 1
7156/// %8:gpr(s64) = G_SHL %6, %13(s64)
7157/// %9:gpr(p0) = G_PTR_ADD %0, %8(s64)
7158/// %12:gpr(s32) = G_LOAD %9(p0) :: (load (s16))
7159std::optional<bool> AArch64InstructionSelector::isWorthFoldingIntoAddrMode(
7160 MachineInstr &MI, const MachineRegisterInfo &MRI) const {
7161 if (MI.getOpcode() == AArch64::G_SHL) {
7162 // Address operands with shifts are free, except for running on subtargets
7163 // with AddrLSLSlow14.
7164 if (const auto ValAndVeg = getIConstantVRegValWithLookThrough(
7165 VReg: MI.getOperand(i: 2).getReg(), MRI)) {
7166 const APInt ShiftVal = ValAndVeg->Value;
7167
7168 // Don't fold if we know this will be slow.
7169 return !(STI.hasAddrLSLSlow14() && (ShiftVal == 1 || ShiftVal == 4));
7170 }
7171 }
7172 return std::nullopt;
7173}
7174
7175/// Return true if it is worth folding MI into an extended register. That is,
7176/// if it's safe to pull it into the addressing mode of a load or store as a
7177/// shift.
7178/// \p IsAddrOperand whether the def of MI is used as an address operand
7179/// (e.g. feeding into an LDR/STR).
7180bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
7181 MachineInstr &MI, const MachineRegisterInfo &MRI,
7182 bool IsAddrOperand) const {
7183
7184 // Always fold if there is one use, or if we're optimizing for size.
7185 Register DefReg = MI.getOperand(i: 0).getReg();
7186 if (MRI.hasOneNonDBGUse(RegNo: DefReg) ||
7187 MI.getParent()->getParent()->getFunction().hasOptSize())
7188 return true;
7189
7190 if (IsAddrOperand) {
7191 // If we are already sure that folding MI is good or bad, return the result.
7192 if (const auto Worth = isWorthFoldingIntoAddrMode(MI, MRI))
7193 return *Worth;
7194
7195 // Fold G_PTR_ADD if its offset operand can be folded
7196 if (MI.getOpcode() == AArch64::G_PTR_ADD) {
7197 MachineInstr *OffsetInst =
7198 getDefIgnoringCopies(Reg: MI.getOperand(i: 2).getReg(), MRI);
7199
7200 // Note, we already know G_PTR_ADD is used by at least two instructions.
7201 // If we are also sure about whether folding is beneficial or not,
7202 // return the result.
7203 if (const auto Worth = isWorthFoldingIntoAddrMode(MI&: *OffsetInst, MRI))
7204 return *Worth;
7205 }
7206 }
7207
7208 // FIXME: Consider checking HasALULSLFast as appropriate.
7209
7210 // We have a fastpath, so folding a shift in and potentially computing it
7211 // many times may be beneficial. Check if this is only used in memory ops.
7212 // If it is, then we should fold.
7213 return all_of(Range: MRI.use_nodbg_instructions(Reg: DefReg),
7214 P: [](MachineInstr &Use) { return Use.mayLoadOrStore(); });
7215}
7216
7217static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type) {
7218 switch (Type) {
7219 case AArch64_AM::SXTB:
7220 case AArch64_AM::SXTH:
7221 case AArch64_AM::SXTW:
7222 return true;
7223 default:
7224 return false;
7225 }
7226}
7227
7228InstructionSelector::ComplexRendererFns
7229AArch64InstructionSelector::selectExtendedSHL(
7230 MachineOperand &Root, MachineOperand &Base, MachineOperand &Offset,
7231 unsigned SizeInBytes, bool WantsExt) const {
7232 assert(Base.isReg() && "Expected base to be a register operand");
7233 assert(Offset.isReg() && "Expected offset to be a register operand");
7234
7235 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7236 MachineInstr *OffsetInst = MRI.getVRegDef(Reg: Offset.getReg());
7237
7238 unsigned OffsetOpc = OffsetInst->getOpcode();
7239 bool LookedThroughZExt = false;
7240 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) {
7241 // Try to look through a ZEXT.
7242 if (OffsetOpc != TargetOpcode::G_ZEXT || !WantsExt)
7243 return std::nullopt;
7244
7245 OffsetInst = MRI.getVRegDef(Reg: OffsetInst->getOperand(i: 1).getReg());
7246 OffsetOpc = OffsetInst->getOpcode();
7247 LookedThroughZExt = true;
7248
7249 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
7250 return std::nullopt;
7251 }
7252 // Make sure that the memory op is a valid size.
7253 int64_t LegalShiftVal = Log2_32(Value: SizeInBytes);
7254 if (LegalShiftVal == 0)
7255 return std::nullopt;
7256 if (!isWorthFoldingIntoExtendedReg(MI&: *OffsetInst, MRI, IsAddrOperand: true))
7257 return std::nullopt;
7258
7259 // Now, try to find the specific G_CONSTANT. Start by assuming that the
7260 // register we will offset is the LHS, and the register containing the
7261 // constant is the RHS.
7262 Register OffsetReg = OffsetInst->getOperand(i: 1).getReg();
7263 Register ConstantReg = OffsetInst->getOperand(i: 2).getReg();
7264 auto ValAndVReg = getIConstantVRegValWithLookThrough(VReg: ConstantReg, MRI);
7265 if (!ValAndVReg) {
7266 // We didn't get a constant on the RHS. If the opcode is a shift, then
7267 // we're done.
7268 if (OffsetOpc == TargetOpcode::G_SHL)
7269 return std::nullopt;
7270
7271 // If we have a G_MUL, we can use either register. Try looking at the RHS.
7272 std::swap(a&: OffsetReg, b&: ConstantReg);
7273 ValAndVReg = getIConstantVRegValWithLookThrough(VReg: ConstantReg, MRI);
7274 if (!ValAndVReg)
7275 return std::nullopt;
7276 }
7277
7278 // The value must fit into 3 bits, and must be positive. Make sure that is
7279 // true.
7280 int64_t ImmVal = ValAndVReg->Value.getSExtValue();
7281
7282 // Since we're going to pull this into a shift, the constant value must be
7283 // a power of 2. If we got a multiply, then we need to check this.
7284 if (OffsetOpc == TargetOpcode::G_MUL) {
7285 if (!llvm::has_single_bit<uint32_t>(Value: ImmVal))
7286 return std::nullopt;
7287
7288 // Got a power of 2. So, the amount we'll shift is the log base-2 of that.
7289 ImmVal = Log2_32(Value: ImmVal);
7290 }
7291
7292 if ((ImmVal & 0x7) != ImmVal)
7293 return std::nullopt;
7294
7295 // We are only allowed to shift by LegalShiftVal. This shift value is built
7296 // into the instruction, so we can't just use whatever we want.
7297 if (ImmVal != LegalShiftVal)
7298 return std::nullopt;
7299
7300 unsigned SignExtend = 0;
7301 if (WantsExt) {
7302 // Check if the offset is defined by an extend, unless we looked through a
7303 // G_ZEXT earlier.
7304 if (!LookedThroughZExt) {
7305 MachineInstr *ExtInst = getDefIgnoringCopies(Reg: OffsetReg, MRI);
7306 auto Ext = getExtendTypeForInst(MI&: *ExtInst, MRI, IsLoadStore: true);
7307 if (Ext == AArch64_AM::InvalidShiftExtend)
7308 return std::nullopt;
7309
7310 SignExtend = isSignExtendShiftType(Type: Ext) ? 1 : 0;
7311 // We only support SXTW for signed extension here.
7312 if (SignExtend && Ext != AArch64_AM::SXTW)
7313 return std::nullopt;
7314 OffsetReg = ExtInst->getOperand(i: 1).getReg();
7315 }
7316
7317 // Need a 32-bit wide register here.
7318 MachineIRBuilder MIB(*MRI.getVRegDef(Reg: Root.getReg()));
7319 OffsetReg = moveScalarRegClass(Reg: OffsetReg, RC: AArch64::GPR32RegClass, MIB);
7320 }
7321
7322 // We can use the LHS of the GEP as the base, and the LHS of the shift as an
7323 // offset. Signify that we are shifting by setting the shift flag to 1.
7324 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: Base.getReg()); },
7325 [=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: OffsetReg); },
7326 [=](MachineInstrBuilder &MIB) {
7327 // Need to add both immediates here to make sure that they are both
7328 // added to the instruction.
7329 MIB.addImm(Val: SignExtend);
7330 MIB.addImm(Val: 1);
7331 }}};
7332}
7333
7334/// This is used for computing addresses like this:
7335///
7336/// ldr x1, [x2, x3, lsl #3]
7337///
7338/// Where x2 is the base register, and x3 is an offset register. The shift-left
7339/// is a constant value specific to this load instruction. That is, we'll never
7340/// see anything other than a 3 here (which corresponds to the size of the
7341/// element being loaded.)
7342InstructionSelector::ComplexRendererFns
7343AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
7344 MachineOperand &Root, unsigned SizeInBytes) const {
7345 if (!Root.isReg())
7346 return std::nullopt;
7347 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7348
7349 // We want to find something like this:
7350 //
7351 // val = G_CONSTANT LegalShiftVal
7352 // shift = G_SHL off_reg val
7353 // ptr = G_PTR_ADD base_reg shift
7354 // x = G_LOAD ptr
7355 //
7356 // And fold it into this addressing mode:
7357 //
7358 // ldr x, [base_reg, off_reg, lsl #LegalShiftVal]
7359
7360 // Check if we can find the G_PTR_ADD.
7361 MachineInstr *PtrAdd =
7362 getOpcodeDef(Opcode: TargetOpcode::G_PTR_ADD, Reg: Root.getReg(), MRI);
7363 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(MI&: *PtrAdd, MRI, IsAddrOperand: true))
7364 return std::nullopt;
7365
7366 // Now, try to match an opcode which will match our specific offset.
7367 // We want a G_SHL or a G_MUL.
7368 MachineInstr *OffsetInst =
7369 getDefIgnoringCopies(Reg: PtrAdd->getOperand(i: 2).getReg(), MRI);
7370 return selectExtendedSHL(Root, Base&: PtrAdd->getOperand(i: 1),
7371 Offset&: OffsetInst->getOperand(i: 0), SizeInBytes,
7372 /*WantsExt=*/false);
7373}
7374
7375/// This is used for computing addresses like this:
7376///
7377/// ldr x1, [x2, x3]
7378///
7379/// Where x2 is the base register, and x3 is an offset register.
7380///
7381/// When possible (or profitable) to fold a G_PTR_ADD into the address
7382/// calculation, this will do so. Otherwise, it will return std::nullopt.
7383InstructionSelector::ComplexRendererFns
7384AArch64InstructionSelector::selectAddrModeRegisterOffset(
7385 MachineOperand &Root) const {
7386 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7387
7388 // We need a GEP.
7389 MachineInstr *Gep = MRI.getVRegDef(Reg: Root.getReg());
7390 if (Gep->getOpcode() != TargetOpcode::G_PTR_ADD)
7391 return std::nullopt;
7392
7393 // If this is used more than once, let's not bother folding.
7394 // TODO: Check if they are memory ops. If they are, then we can still fold
7395 // without having to recompute anything.
7396 if (!MRI.hasOneNonDBGUse(RegNo: Gep->getOperand(i: 0).getReg()))
7397 return std::nullopt;
7398
7399 // Base is the GEP's LHS, offset is its RHS.
7400 return {{[=](MachineInstrBuilder &MIB) {
7401 MIB.addUse(RegNo: Gep->getOperand(i: 1).getReg());
7402 },
7403 [=](MachineInstrBuilder &MIB) {
7404 MIB.addUse(RegNo: Gep->getOperand(i: 2).getReg());
7405 },
7406 [=](MachineInstrBuilder &MIB) {
7407 // Need to add both immediates here to make sure that they are both
7408 // added to the instruction.
7409 MIB.addImm(Val: 0);
7410 MIB.addImm(Val: 0);
7411 }}};
7412}
7413
7414/// This is intended to be equivalent to selectAddrModeXRO in
7415/// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads.
7416InstructionSelector::ComplexRendererFns
7417AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
7418 unsigned SizeInBytes) const {
7419 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7420 if (!Root.isReg())
7421 return std::nullopt;
7422 MachineInstr *PtrAdd =
7423 getOpcodeDef(Opcode: TargetOpcode::G_PTR_ADD, Reg: Root.getReg(), MRI);
7424 if (!PtrAdd)
7425 return std::nullopt;
7426
7427 // Check for an immediates which cannot be encoded in the [base + imm]
7428 // addressing mode, and can't be encoded in an add/sub. If this happens, we'll
7429 // end up with code like:
7430 //
7431 // mov x0, wide
7432 // add x1 base, x0
7433 // ldr x2, [x1, x0]
7434 //
7435 // In this situation, we can use the [base, xreg] addressing mode to save an
7436 // add/sub:
7437 //
7438 // mov x0, wide
7439 // ldr x2, [base, x0]
7440 auto ValAndVReg =
7441 getIConstantVRegValWithLookThrough(VReg: PtrAdd->getOperand(i: 2).getReg(), MRI);
7442 if (ValAndVReg) {
7443 unsigned Scale = Log2_32(Value: SizeInBytes);
7444 int64_t ImmOff = ValAndVReg->Value.getSExtValue();
7445
7446 // Skip immediates that can be selected in the load/store addressing
7447 // mode.
7448 if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 &&
7449 ImmOff < (0x1000 << Scale))
7450 return std::nullopt;
7451
7452 // Helper lambda to decide whether or not it is preferable to emit an add.
7453 auto isPreferredADD = [](int64_t ImmOff) {
7454 // Constants in [0x0, 0xfff] can be encoded in an add.
7455 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
7456 return true;
7457
7458 // Can it be encoded in an add lsl #12?
7459 if ((ImmOff & 0xffffffffff000fffLL) != 0x0LL)
7460 return false;
7461
7462 // It can be encoded in an add lsl #12, but we may not want to. If it is
7463 // possible to select this as a single movz, then prefer that. A single
7464 // movz is faster than an add with a shift.
7465 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
7466 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
7467 };
7468
7469 // If the immediate can be encoded in a single add/sub, then bail out.
7470 if (isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
7471 return std::nullopt;
7472 }
7473
7474 // Try to fold shifts into the addressing mode.
7475 auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
7476 if (AddrModeFns)
7477 return AddrModeFns;
7478
7479 // If that doesn't work, see if it's possible to fold in registers from
7480 // a GEP.
7481 return selectAddrModeRegisterOffset(Root);
7482}
7483
7484/// This is used for computing addresses like this:
7485///
7486/// ldr x0, [xBase, wOffset, sxtw #LegalShiftVal]
7487///
7488/// Where we have a 64-bit base register, a 32-bit offset register, and an
7489/// extend (which may or may not be signed).
7490InstructionSelector::ComplexRendererFns
7491AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root,
7492 unsigned SizeInBytes) const {
7493 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7494
7495 MachineInstr *PtrAdd =
7496 getOpcodeDef(Opcode: TargetOpcode::G_PTR_ADD, Reg: Root.getReg(), MRI);
7497 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(MI&: *PtrAdd, MRI, IsAddrOperand: true))
7498 return std::nullopt;
7499
7500 MachineOperand &LHS = PtrAdd->getOperand(i: 1);
7501 MachineOperand &RHS = PtrAdd->getOperand(i: 2);
7502 MachineInstr *OffsetInst = getDefIgnoringCopies(Reg: RHS.getReg(), MRI);
7503
7504 // The first case is the same as selectAddrModeXRO, except we need an extend.
7505 // In this case, we try to find a shift and extend, and fold them into the
7506 // addressing mode.
7507 //
7508 // E.g.
7509 //
7510 // off_reg = G_Z/S/ANYEXT ext_reg
7511 // val = G_CONSTANT LegalShiftVal
7512 // shift = G_SHL off_reg val
7513 // ptr = G_PTR_ADD base_reg shift
7514 // x = G_LOAD ptr
7515 //
7516 // In this case we can get a load like this:
7517 //
7518 // ldr x0, [base_reg, ext_reg, sxtw #LegalShiftVal]
7519 auto ExtendedShl = selectExtendedSHL(Root, Base&: LHS, Offset&: OffsetInst->getOperand(i: 0),
7520 SizeInBytes, /*WantsExt=*/true);
7521 if (ExtendedShl)
7522 return ExtendedShl;
7523
7524 // There was no shift. We can try and fold a G_Z/S/ANYEXT in alone though.
7525 //
7526 // e.g.
7527 // ldr something, [base_reg, ext_reg, sxtw]
7528 if (!isWorthFoldingIntoExtendedReg(MI&: *OffsetInst, MRI, IsAddrOperand: true))
7529 return std::nullopt;
7530
7531 // Check if this is an extend. We'll get an extend type if it is.
7532 AArch64_AM::ShiftExtendType Ext =
7533 getExtendTypeForInst(MI&: *OffsetInst, MRI, /*IsLoadStore=*/true);
7534 if (Ext == AArch64_AM::InvalidShiftExtend)
7535 return std::nullopt;
7536
7537 // Need a 32-bit wide register.
7538 MachineIRBuilder MIB(*PtrAdd);
7539 Register ExtReg = moveScalarRegClass(Reg: OffsetInst->getOperand(i: 1).getReg(),
7540 RC: AArch64::GPR32RegClass, MIB);
7541 unsigned SignExtend = Ext == AArch64_AM::SXTW;
7542
7543 // Base is LHS, offset is ExtReg.
7544 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: LHS.getReg()); },
7545 [=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: ExtReg); },
7546 [=](MachineInstrBuilder &MIB) {
7547 MIB.addImm(Val: SignExtend);
7548 MIB.addImm(Val: 0);
7549 }}};
7550}
7551
7552/// Select a "register plus unscaled signed 9-bit immediate" address. This
7553/// should only match when there is an offset that is not valid for a scaled
7554/// immediate addressing mode. The "Size" argument is the size in bytes of the
7555/// memory reference, which is needed here to know what is valid for a scaled
7556/// immediate.
7557InstructionSelector::ComplexRendererFns
7558AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
7559 unsigned Size) const {
7560 MachineRegisterInfo &MRI =
7561 Root.getParent()->getParent()->getParent()->getRegInfo();
7562
7563 if (!Root.isReg())
7564 return std::nullopt;
7565
7566 if (!isBaseWithConstantOffset(Root, MRI))
7567 return std::nullopt;
7568
7569 MachineInstr *RootDef = MRI.getVRegDef(Reg: Root.getReg());
7570
7571 MachineOperand &OffImm = RootDef->getOperand(i: 2);
7572 if (!OffImm.isReg())
7573 return std::nullopt;
7574 MachineInstr *RHS = MRI.getVRegDef(Reg: OffImm.getReg());
7575 if (RHS->getOpcode() != TargetOpcode::G_CONSTANT)
7576 return std::nullopt;
7577 int64_t RHSC;
7578 MachineOperand &RHSOp1 = RHS->getOperand(i: 1);
7579 if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64)
7580 return std::nullopt;
7581 RHSC = RHSOp1.getCImm()->getSExtValue();
7582
7583 if (RHSC >= -256 && RHSC < 256) {
7584 MachineOperand &Base = RootDef->getOperand(i: 1);
7585 return {{
7586 [=](MachineInstrBuilder &MIB) { MIB.add(MO: Base); },
7587 [=](MachineInstrBuilder &MIB) { MIB.addImm(Val: RHSC); },
7588 }};
7589 }
7590 return std::nullopt;
7591}
7592
7593InstructionSelector::ComplexRendererFns
7594AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef,
7595 unsigned Size,
7596 MachineRegisterInfo &MRI) const {
7597 if (RootDef.getOpcode() != AArch64::G_ADD_LOW)
7598 return std::nullopt;
7599 MachineInstr &Adrp = *MRI.getVRegDef(Reg: RootDef.getOperand(i: 1).getReg());
7600 if (Adrp.getOpcode() != AArch64::ADRP)
7601 return std::nullopt;
7602
7603 // TODO: add heuristics like isWorthFoldingADDlow() from SelectionDAG.
7604 auto Offset = Adrp.getOperand(i: 1).getOffset();
7605 if (Offset % Size != 0)
7606 return std::nullopt;
7607
7608 auto GV = Adrp.getOperand(i: 1).getGlobal();
7609 if (GV->isThreadLocal())
7610 return std::nullopt;
7611
7612 auto &MF = *RootDef.getParent()->getParent();
7613 if (GV->getPointerAlignment(DL: MF.getDataLayout()) < Size)
7614 return std::nullopt;
7615
7616 unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM: MF.getTarget());
7617 MachineIRBuilder MIRBuilder(RootDef);
7618 Register AdrpReg = Adrp.getOperand(i: 0).getReg();
7619 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: AdrpReg); },
7620 [=](MachineInstrBuilder &MIB) {
7621 MIB.addGlobalAddress(GV, Offset,
7622 TargetFlags: OpFlags | AArch64II::MO_PAGEOFF |
7623 AArch64II::MO_NC);
7624 }}};
7625}
7626
7627/// Select a "register plus scaled unsigned 12-bit immediate" address. The
7628/// "Size" argument is the size in bytes of the memory reference, which
7629/// determines the scale.
7630InstructionSelector::ComplexRendererFns
7631AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
7632 unsigned Size) const {
7633 MachineFunction &MF = *Root.getParent()->getParent()->getParent();
7634 MachineRegisterInfo &MRI = MF.getRegInfo();
7635
7636 if (!Root.isReg())
7637 return std::nullopt;
7638
7639 MachineInstr *RootDef = MRI.getVRegDef(Reg: Root.getReg());
7640 if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
7641 return {{
7642 [=](MachineInstrBuilder &MIB) { MIB.add(MO: RootDef->getOperand(i: 1)); },
7643 [=](MachineInstrBuilder &MIB) { MIB.addImm(Val: 0); },
7644 }};
7645 }
7646
7647 CodeModel::Model CM = MF.getTarget().getCodeModel();
7648 // Check if we can fold in the ADD of small code model ADRP + ADD address.
7649 // HACK: ld64 on Darwin doesn't support relocations on PRFM, so we can't fold
7650 // globals into the offset.
7651 MachineInstr *RootParent = Root.getParent();
7652 if (CM == CodeModel::Small &&
7653 !(RootParent->getOpcode() == AArch64::G_AARCH64_PREFETCH &&
7654 STI.isTargetDarwin())) {
7655 auto OpFns = tryFoldAddLowIntoImm(RootDef&: *RootDef, Size, MRI);
7656 if (OpFns)
7657 return OpFns;
7658 }
7659
7660 if (isBaseWithConstantOffset(Root, MRI)) {
7661 MachineOperand &LHS = RootDef->getOperand(i: 1);
7662 MachineOperand &RHS = RootDef->getOperand(i: 2);
7663 MachineInstr *LHSDef = MRI.getVRegDef(Reg: LHS.getReg());
7664 MachineInstr *RHSDef = MRI.getVRegDef(Reg: RHS.getReg());
7665
7666 int64_t RHSC = (int64_t)RHSDef->getOperand(i: 1).getCImm()->getZExtValue();
7667 unsigned Scale = Log2_32(Value: Size);
7668 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
7669 if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
7670 return {{
7671 [=](MachineInstrBuilder &MIB) { MIB.add(MO: LHSDef->getOperand(i: 1)); },
7672 [=](MachineInstrBuilder &MIB) { MIB.addImm(Val: RHSC >> Scale); },
7673 }};
7674
7675 return {{
7676 [=](MachineInstrBuilder &MIB) { MIB.add(MO: LHS); },
7677 [=](MachineInstrBuilder &MIB) { MIB.addImm(Val: RHSC >> Scale); },
7678 }};
7679 }
7680 }
7681
7682 // Before falling back to our general case, check if the unscaled
7683 // instructions can handle this. If so, that's preferable.
7684 if (selectAddrModeUnscaled(Root, Size))
7685 return std::nullopt;
7686
7687 return {{
7688 [=](MachineInstrBuilder &MIB) { MIB.add(MO: Root); },
7689 [=](MachineInstrBuilder &MIB) { MIB.addImm(Val: 0); },
7690 }};
7691}
7692
7693/// Given a shift instruction, return the correct shift type for that
7694/// instruction.
7695static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI) {
7696 switch (MI.getOpcode()) {
7697 default:
7698 return AArch64_AM::InvalidShiftExtend;
7699 case TargetOpcode::G_SHL:
7700 return AArch64_AM::LSL;
7701 case TargetOpcode::G_LSHR:
7702 return AArch64_AM::LSR;
7703 case TargetOpcode::G_ASHR:
7704 return AArch64_AM::ASR;
7705 case TargetOpcode::G_ROTR:
7706 return AArch64_AM::ROR;
7707 }
7708}
7709
7710/// Select a "shifted register" operand. If the value is not shifted, set the
7711/// shift operand to a default value of "lsl 0".
7712InstructionSelector::ComplexRendererFns
7713AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root,
7714 bool AllowROR) const {
7715 if (!Root.isReg())
7716 return std::nullopt;
7717 MachineRegisterInfo &MRI =
7718 Root.getParent()->getParent()->getParent()->getRegInfo();
7719
7720 // Check if the operand is defined by an instruction which corresponds to
7721 // a ShiftExtendType. E.g. a G_SHL, G_LSHR, etc.
7722 MachineInstr *ShiftInst = MRI.getVRegDef(Reg: Root.getReg());
7723 AArch64_AM::ShiftExtendType ShType = getShiftTypeForInst(MI&: *ShiftInst);
7724 if (ShType == AArch64_AM::InvalidShiftExtend)
7725 return std::nullopt;
7726 if (ShType == AArch64_AM::ROR && !AllowROR)
7727 return std::nullopt;
7728 if (!isWorthFoldingIntoExtendedReg(MI&: *ShiftInst, MRI, IsAddrOperand: false))
7729 return std::nullopt;
7730
7731 // Need an immediate on the RHS.
7732 MachineOperand &ShiftRHS = ShiftInst->getOperand(i: 2);
7733 auto Immed = getImmedFromMO(Root: ShiftRHS);
7734 if (!Immed)
7735 return std::nullopt;
7736
7737 // We have something that we can fold. Fold in the shift's LHS and RHS into
7738 // the instruction.
7739 MachineOperand &ShiftLHS = ShiftInst->getOperand(i: 1);
7740 Register ShiftReg = ShiftLHS.getReg();
7741
7742 unsigned NumBits = MRI.getType(Reg: ShiftReg).getSizeInBits();
7743 unsigned Val = *Immed & (NumBits - 1);
7744 unsigned ShiftVal = AArch64_AM::getShifterImm(ST: ShType, Imm: Val);
7745
7746 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: ShiftReg); },
7747 [=](MachineInstrBuilder &MIB) { MIB.addImm(Val: ShiftVal); }}};
7748}
7749
7750AArch64_AM::ShiftExtendType AArch64InstructionSelector::getExtendTypeForInst(
7751 MachineInstr &MI, MachineRegisterInfo &MRI, bool IsLoadStore) const {
7752 unsigned Opc = MI.getOpcode();
7753
7754 // Handle explicit extend instructions first.
7755 if (Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG) {
7756 unsigned Size;
7757 if (Opc == TargetOpcode::G_SEXT)
7758 Size = MRI.getType(Reg: MI.getOperand(i: 1).getReg()).getSizeInBits();
7759 else
7760 Size = MI.getOperand(i: 2).getImm();
7761 assert(Size != 64 && "Extend from 64 bits?");
7762 switch (Size) {
7763 case 8:
7764 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTB;
7765 case 16:
7766 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTH;
7767 case 32:
7768 return AArch64_AM::SXTW;
7769 default:
7770 return AArch64_AM::InvalidShiftExtend;
7771 }
7772 }
7773
7774 if (Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_ANYEXT) {
7775 unsigned Size = MRI.getType(Reg: MI.getOperand(i: 1).getReg()).getSizeInBits();
7776 assert(Size != 64 && "Extend from 64 bits?");
7777 switch (Size) {
7778 case 8:
7779 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTB;
7780 case 16:
7781 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTH;
7782 case 32:
7783 return AArch64_AM::UXTW;
7784 default:
7785 return AArch64_AM::InvalidShiftExtend;
7786 }
7787 }
7788
7789 // Don't have an explicit extend. Try to handle a G_AND with a constant mask
7790 // on the RHS.
7791 if (Opc != TargetOpcode::G_AND)
7792 return AArch64_AM::InvalidShiftExtend;
7793
7794 std::optional<uint64_t> MaybeAndMask = getImmedFromMO(Root: MI.getOperand(i: 2));
7795 if (!MaybeAndMask)
7796 return AArch64_AM::InvalidShiftExtend;
7797 uint64_t AndMask = *MaybeAndMask;
7798 switch (AndMask) {
7799 default:
7800 return AArch64_AM::InvalidShiftExtend;
7801 case 0xFF:
7802 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
7803 case 0xFFFF:
7804 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
7805 case 0xFFFFFFFF:
7806 return AArch64_AM::UXTW;
7807 }
7808}
7809
7810Register AArch64InstructionSelector::moveScalarRegClass(
7811 Register Reg, const TargetRegisterClass &RC, MachineIRBuilder &MIB) const {
7812 MachineRegisterInfo &MRI = *MIB.getMRI();
7813 auto Ty = MRI.getType(Reg);
7814 assert(!Ty.isVector() && "Expected scalars only!");
7815 if (Ty.getSizeInBits() == TRI.getRegSizeInBits(RC))
7816 return Reg;
7817
7818 // Create a copy and immediately select it.
7819 // FIXME: We should have an emitCopy function?
7820 auto Copy = MIB.buildCopy(Res: {&RC}, Op: {Reg});
7821 selectCopy(I&: *Copy, TII, MRI, TRI, RBI);
7822 return Copy.getReg(Idx: 0);
7823}
7824
7825/// Select an "extended register" operand. This operand folds in an extend
7826/// followed by an optional left shift.
7827InstructionSelector::ComplexRendererFns
7828AArch64InstructionSelector::selectArithExtendedRegister(
7829 MachineOperand &Root) const {
7830 if (!Root.isReg())
7831 return std::nullopt;
7832 MachineRegisterInfo &MRI =
7833 Root.getParent()->getParent()->getParent()->getRegInfo();
7834
7835 uint64_t ShiftVal = 0;
7836 Register ExtReg;
7837 AArch64_AM::ShiftExtendType Ext;
7838 MachineInstr *RootDef = getDefIgnoringCopies(Reg: Root.getReg(), MRI);
7839 if (!RootDef)
7840 return std::nullopt;
7841
7842 if (!isWorthFoldingIntoExtendedReg(MI&: *RootDef, MRI, IsAddrOperand: false))
7843 return std::nullopt;
7844
7845 // Check if we can fold a shift and an extend.
7846 if (RootDef->getOpcode() == TargetOpcode::G_SHL) {
7847 // Look for a constant on the RHS of the shift.
7848 MachineOperand &RHS = RootDef->getOperand(i: 2);
7849 std::optional<uint64_t> MaybeShiftVal = getImmedFromMO(Root: RHS);
7850 if (!MaybeShiftVal)
7851 return std::nullopt;
7852 ShiftVal = *MaybeShiftVal;
7853 if (ShiftVal > 4)
7854 return std::nullopt;
7855 // Look for a valid extend instruction on the LHS of the shift.
7856 MachineOperand &LHS = RootDef->getOperand(i: 1);
7857 MachineInstr *ExtDef = getDefIgnoringCopies(Reg: LHS.getReg(), MRI);
7858 if (!ExtDef)
7859 return std::nullopt;
7860 Ext = getExtendTypeForInst(MI&: *ExtDef, MRI);
7861 if (Ext == AArch64_AM::InvalidShiftExtend)
7862 return std::nullopt;
7863 ExtReg = ExtDef->getOperand(i: 1).getReg();
7864 } else {
7865 // Didn't get a shift. Try just folding an extend.
7866 Ext = getExtendTypeForInst(MI&: *RootDef, MRI);
7867 if (Ext == AArch64_AM::InvalidShiftExtend)
7868 return std::nullopt;
7869 ExtReg = RootDef->getOperand(i: 1).getReg();
7870
7871 // If we have a 32 bit instruction which zeroes out the high half of a
7872 // register, we get an implicit zero extend for free. Check if we have one.
7873 // FIXME: We actually emit the extend right now even though we don't have
7874 // to.
7875 if (Ext == AArch64_AM::UXTW && MRI.getType(Reg: ExtReg).getSizeInBits() == 32) {
7876 MachineInstr *ExtInst = MRI.getVRegDef(Reg: ExtReg);
7877 if (isDef32(MI: *ExtInst))
7878 return std::nullopt;
7879 }
7880 }
7881
7882 // We require a GPR32 here. Narrow the ExtReg if needed using a subregister
7883 // copy.
7884 MachineIRBuilder MIB(*RootDef);
7885 ExtReg = moveScalarRegClass(Reg: ExtReg, RC: AArch64::GPR32RegClass, MIB);
7886
7887 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: ExtReg); },
7888 [=](MachineInstrBuilder &MIB) {
7889 MIB.addImm(Val: getArithExtendImm(ET: Ext, Imm: ShiftVal));
7890 }}};
7891}
7892
7893InstructionSelector::ComplexRendererFns
7894AArch64InstructionSelector::selectExtractHigh(MachineOperand &Root) const {
7895 if (!Root.isReg())
7896 return std::nullopt;
7897 MachineRegisterInfo &MRI =
7898 Root.getParent()->getParent()->getParent()->getRegInfo();
7899
7900 auto Extract = getDefSrcRegIgnoringCopies(Reg: Root.getReg(), MRI);
7901 while (Extract && Extract->MI->getOpcode() == TargetOpcode::G_BITCAST &&
7902 STI.isLittleEndian())
7903 Extract =
7904 getDefSrcRegIgnoringCopies(Reg: Extract->MI->getOperand(i: 1).getReg(), MRI);
7905 if (!Extract)
7906 return std::nullopt;
7907
7908 if (Extract->MI->getOpcode() == TargetOpcode::G_UNMERGE_VALUES) {
7909 if (Extract->Reg == Extract->MI->getOperand(i: 1).getReg()) {
7910 Register ExtReg = Extract->MI->getOperand(i: 2).getReg();
7911 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: ExtReg); }}};
7912 }
7913 }
7914 if (Extract->MI->getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT) {
7915 LLT SrcTy = MRI.getType(Reg: Extract->MI->getOperand(i: 1).getReg());
7916 auto LaneIdx = getIConstantVRegValWithLookThrough(
7917 VReg: Extract->MI->getOperand(i: 2).getReg(), MRI);
7918 if (LaneIdx && SrcTy == LLT::fixed_vector(NumElements: 2, ScalarSizeInBits: 64) &&
7919 LaneIdx->Value.getSExtValue() == 1) {
7920 Register ExtReg = Extract->MI->getOperand(i: 1).getReg();
7921 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: ExtReg); }}};
7922 }
7923 }
7924
7925 return std::nullopt;
7926}
7927
7928void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
7929 const MachineInstr &MI,
7930 int OpIdx) const {
7931 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
7932 assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7933 "Expected G_CONSTANT");
7934 std::optional<int64_t> CstVal =
7935 getIConstantVRegSExtVal(VReg: MI.getOperand(i: 0).getReg(), MRI);
7936 assert(CstVal && "Expected constant value");
7937 MIB.addImm(Val: *CstVal);
7938}
7939
7940void AArch64InstructionSelector::renderLogicalImm32(
7941 MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
7942 assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7943 "Expected G_CONSTANT");
7944 uint64_t CstVal = I.getOperand(i: 1).getCImm()->getZExtValue();
7945 uint64_t Enc = AArch64_AM::encodeLogicalImmediate(imm: CstVal, regSize: 32);
7946 MIB.addImm(Val: Enc);
7947}
7948
7949void AArch64InstructionSelector::renderLogicalImm64(
7950 MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
7951 assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7952 "Expected G_CONSTANT");
7953 uint64_t CstVal = I.getOperand(i: 1).getCImm()->getZExtValue();
7954 uint64_t Enc = AArch64_AM::encodeLogicalImmediate(imm: CstVal, regSize: 64);
7955 MIB.addImm(Val: Enc);
7956}
7957
7958void AArch64InstructionSelector::renderUbsanTrap(MachineInstrBuilder &MIB,
7959 const MachineInstr &MI,
7960 int OpIdx) const {
7961 assert(MI.getOpcode() == TargetOpcode::G_UBSANTRAP && OpIdx == 0 &&
7962 "Expected G_UBSANTRAP");
7963 MIB.addImm(Val: MI.getOperand(i: 0).getImm() | ('U' << 8));
7964}
7965
7966void AArch64InstructionSelector::renderFPImm16(MachineInstrBuilder &MIB,
7967 const MachineInstr &MI,
7968 int OpIdx) const {
7969 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7970 "Expected G_FCONSTANT");
7971 MIB.addImm(
7972 Val: AArch64_AM::getFP16Imm(FPImm: MI.getOperand(i: 1).getFPImm()->getValueAPF()));
7973}
7974
7975void AArch64InstructionSelector::renderFPImm32(MachineInstrBuilder &MIB,
7976 const MachineInstr &MI,
7977 int OpIdx) const {
7978 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7979 "Expected G_FCONSTANT");
7980 MIB.addImm(
7981 Val: AArch64_AM::getFP32Imm(FPImm: MI.getOperand(i: 1).getFPImm()->getValueAPF()));
7982}
7983
7984void AArch64InstructionSelector::renderFPImm64(MachineInstrBuilder &MIB,
7985 const MachineInstr &MI,
7986 int OpIdx) const {
7987 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7988 "Expected G_FCONSTANT");
7989 MIB.addImm(
7990 Val: AArch64_AM::getFP64Imm(FPImm: MI.getOperand(i: 1).getFPImm()->getValueAPF()));
7991}
7992
7993void AArch64InstructionSelector::renderFPImm32SIMDModImmType4(
7994 MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const {
7995 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7996 "Expected G_FCONSTANT");
7997 MIB.addImm(Val: AArch64_AM::encodeAdvSIMDModImmType4(Imm: MI.getOperand(i: 1)
7998 .getFPImm()
7999 ->getValueAPF()
8000 .bitcastToAPInt()
8001 .getZExtValue()));
8002}
8003
8004bool AArch64InstructionSelector::isLoadStoreOfNumBytes(
8005 const MachineInstr &MI, unsigned NumBytes) const {
8006 if (!MI.mayLoadOrStore())
8007 return false;
8008 assert(MI.hasOneMemOperand() &&
8009 "Expected load/store to have only one mem op!");
8010 return (*MI.memoperands_begin())->getSize() == NumBytes;
8011}
8012
8013bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const {
8014 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
8015 if (MRI.getType(Reg: MI.getOperand(i: 0).getReg()).getSizeInBits() != 32)
8016 return false;
8017
8018 // Only return true if we know the operation will zero-out the high half of
8019 // the 64-bit register. Truncates can be subregister copies, which don't
8020 // zero out the high bits. Copies and other copy-like instructions can be
8021 // fed by truncates, or could be lowered as subregister copies.
8022 switch (MI.getOpcode()) {
8023 default:
8024 return true;
8025 case TargetOpcode::COPY:
8026 case TargetOpcode::G_BITCAST:
8027 case TargetOpcode::G_TRUNC:
8028 case TargetOpcode::G_PHI:
8029 return false;
8030 }
8031}
8032
8033
8034// Perform fixups on the given PHI instruction's operands to force them all
8035// to be the same as the destination regbank.
8036static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI,
8037 const AArch64RegisterBankInfo &RBI) {
8038 assert(MI.getOpcode() == TargetOpcode::G_PHI && "Expected a G_PHI");
8039 Register DstReg = MI.getOperand(i: 0).getReg();
8040 const RegisterBank *DstRB = MRI.getRegBankOrNull(Reg: DstReg);
8041 assert(DstRB && "Expected PHI dst to have regbank assigned");
8042 MachineIRBuilder MIB(MI);
8043
8044 // Go through each operand and ensure it has the same regbank.
8045 for (MachineOperand &MO : llvm::drop_begin(RangeOrContainer: MI.operands())) {
8046 if (!MO.isReg())
8047 continue;
8048 Register OpReg = MO.getReg();
8049 const RegisterBank *RB = MRI.getRegBankOrNull(Reg: OpReg);
8050 if (RB != DstRB) {
8051 // Insert a cross-bank copy.
8052 auto *OpDef = MRI.getVRegDef(Reg: OpReg);
8053 const LLT &Ty = MRI.getType(Reg: OpReg);
8054 MachineBasicBlock &OpDefBB = *OpDef->getParent();
8055
8056 // Any instruction we insert must appear after all PHIs in the block
8057 // for the block to be valid MIR.
8058 MachineBasicBlock::iterator InsertPt = std::next(x: OpDef->getIterator());
8059 if (InsertPt != OpDefBB.end() && InsertPt->isPHI())
8060 InsertPt = OpDefBB.getFirstNonPHI();
8061 MIB.setInsertPt(MBB&: *OpDef->getParent(), II: InsertPt);
8062 auto Copy = MIB.buildCopy(Res: Ty, Op: OpReg);
8063 MRI.setRegBank(Reg: Copy.getReg(Idx: 0), RegBank: *DstRB);
8064 MO.setReg(Copy.getReg(Idx: 0));
8065 }
8066 }
8067}
8068
8069void AArch64InstructionSelector::processPHIs(MachineFunction &MF) {
8070 // We're looking for PHIs, build a list so we don't invalidate iterators.
8071 MachineRegisterInfo &MRI = MF.getRegInfo();
8072 SmallVector<MachineInstr *, 32> Phis;
8073 for (auto &BB : MF) {
8074 for (auto &MI : BB) {
8075 if (MI.getOpcode() == TargetOpcode::G_PHI)
8076 Phis.emplace_back(Args: &MI);
8077 }
8078 }
8079
8080 for (auto *MI : Phis) {
8081 // We need to do some work here if the operand types are < 16 bit and they
8082 // are split across fpr/gpr banks. Since all types <32b on gpr
8083 // end up being assigned gpr32 regclasses, we can end up with PHIs here
8084 // which try to select between a gpr32 and an fpr16. Ideally RBS shouldn't
8085 // be selecting heterogenous regbanks for operands if possible, but we
8086 // still need to be able to deal with it here.
8087 //
8088 // To fix this, if we have a gpr-bank operand < 32b in size and at least
8089 // one other operand is on the fpr bank, then we add cross-bank copies
8090 // to homogenize the operand banks. For simplicity the bank that we choose
8091 // to settle on is whatever bank the def operand has. For example:
8092 //
8093 // %endbb:
8094 // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2:fpr(s16), %bb2
8095 // =>
8096 // %bb2:
8097 // ...
8098 // %in2_copy:gpr(s16) = COPY %in2:fpr(s16)
8099 // ...
8100 // %endbb:
8101 // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2_copy:gpr(s16), %bb2
8102 bool HasGPROp = false, HasFPROp = false;
8103 for (const MachineOperand &MO : llvm::drop_begin(RangeOrContainer: MI->operands())) {
8104 if (!MO.isReg())
8105 continue;
8106 const LLT &Ty = MRI.getType(Reg: MO.getReg());
8107 if (!Ty.isValid() || !Ty.isScalar())
8108 break;
8109 if (Ty.getSizeInBits() >= 32)
8110 break;
8111 const RegisterBank *RB = MRI.getRegBankOrNull(Reg: MO.getReg());
8112 // If for some reason we don't have a regbank yet. Don't try anything.
8113 if (!RB)
8114 break;
8115
8116 if (RB->getID() == AArch64::GPRRegBankID)
8117 HasGPROp = true;
8118 else
8119 HasFPROp = true;
8120 }
8121 // We have heterogenous regbanks, need to fixup.
8122 if (HasGPROp && HasFPROp)
8123 fixupPHIOpBanks(MI&: *MI, MRI, RBI);
8124 }
8125}
8126
8127namespace llvm {
8128InstructionSelector *
8129createAArch64InstructionSelector(const AArch64TargetMachine &TM,
8130 const AArch64Subtarget &Subtarget,
8131 const AArch64RegisterBankInfo &RBI) {
8132 return new AArch64InstructionSelector(TM, Subtarget, RBI);
8133}
8134}
8135