AArch64InstructionSelector.cpp source code [llvm_projects/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp]

1	//===- AArch64InstructionSelector.cpp ----------------------------- C++ --==//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	/// \file
9	/// This file implements the targeting of the InstructionSelector class for
10	/// AArch64.
11	/// \todo This should be generated by TableGen.
12	//===----------------------------------------------------------------------===//
13
14	#include "AArch64GlobalISelUtils.h"
15	#include "AArch64InstrInfo.h"
16	#include "AArch64MachineFunctionInfo.h"
17	#include "AArch64RegisterBankInfo.h"
18	#include "AArch64RegisterInfo.h"
19	#include "AArch64Subtarget.h"
20	#include "AArch64TargetMachine.h"
21	#include "MCTargetDesc/AArch64AddressingModes.h"
22	#include "MCTargetDesc/AArch64MCTargetDesc.h"
23	#include "llvm/BinaryFormat/Dwarf.h"
24	#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
25	#include "llvm/CodeGen/GlobalISel/GISelValueTracking.h"
26	#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
27	#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
28	#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
29	#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
30	#include "llvm/CodeGen/GlobalISel/Utils.h"
31	#include "llvm/CodeGen/MachineBasicBlock.h"
32	#include "llvm/CodeGen/MachineConstantPool.h"
33	#include "llvm/CodeGen/MachineFrameInfo.h"
34	#include "llvm/CodeGen/MachineFunction.h"
35	#include "llvm/CodeGen/MachineInstr.h"
36	#include "llvm/CodeGen/MachineInstrBuilder.h"
37	#include "llvm/CodeGen/MachineMemOperand.h"
38	#include "llvm/CodeGen/MachineOperand.h"
39	#include "llvm/CodeGen/MachineRegisterInfo.h"
40	#include "llvm/CodeGen/TargetOpcodes.h"
41	#include "llvm/CodeGen/TargetRegisterInfo.h"
42	#include "llvm/IR/Constants.h"
43	#include "llvm/IR/DerivedTypes.h"
44	#include "llvm/IR/Instructions.h"
45	#include "llvm/IR/IntrinsicsAArch64.h"
46	#include "llvm/IR/Type.h"
47	#include "llvm/Pass.h"
48	#include "llvm/Support/Debug.h"
49	#include "llvm/Support/raw_ostream.h"
50	#include <optional>
51
52	#define DEBUG_TYPE "aarch64-isel"
53
54	using namespace llvm;
55	using namespace MIPatternMatch;
56	using namespace AArch64GISelUtils;
57
58	namespace llvm {
59	class BlockFrequencyInfo;
60	class ProfileSummaryInfo;
61	}
62
63	namespace {
64
65	#define GET_GLOBALISEL_PREDICATE_BITSET
66	#include "AArch64GenGlobalISel.inc"
67	#undef GET_GLOBALISEL_PREDICATE_BITSET
68
69
70	class AArch64InstructionSelector : public InstructionSelector {
71	public:
72	AArch64InstructionSelector(const AArch64TargetMachine &TM,
73	const AArch64Subtarget &STI,
74	const AArch64RegisterBankInfo &RBI);
75
76	bool select(MachineInstr &I) override;
77	static const char getName() { return* DEBUG_TYPE; }
78
79	void setupMF(MachineFunction &MF, GISelValueTracking *VT,
80	CodeGenCoverage CoverageInfo, ProfileSummaryInfo PSI,
81	BlockFrequencyInfo *BFI) override {
82	InstructionSelector::setupMF(mf&: MF, vt: VT, covinfo: CoverageInfo, psi: PSI, bfi: BFI);
83	MIB.setMF(MF);
84
85	// hasFnAttribute() is expensive to call on every BRCOND selection, so
86	// cache it here for each run of the selector.
87	ProduceNonFlagSettingCondBr =
88	!MF.getFunction().hasFnAttribute(Kind: Attribute::SpeculativeLoadHardening);
89	MFReturnAddr = Register ();
90
91	processPHIs(MF);
92	}
93
94	private:
95	/// tblgen-erated 'select' implementation, used as the initial selector for
96	/// the patterns that don't require complex C++.
97	bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
98
99	// A lowering phase that runs before any selection attempts.
100	// Returns true if the instruction was modified.
101	bool preISelLower(MachineInstr &I);
102
103	// An early selection function that runs before the selectImpl() call.
104	bool earlySelect(MachineInstr &I);
105
106	/// Save state that is shared between select calls, call select on \p I and
107	/// then restore the saved state. This can be used to recursively call select
108	/// within a select call.
109	bool selectAndRestoreState(MachineInstr &I);
110
111	// Do some preprocessing of G_PHIs before we begin selection.
112	void processPHIs(MachineFunction &MF);
113
114	bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI);
115
116	/// Eliminate same-sized cross-bank copies into stores before selectImpl().
117	bool contractCrossBankCopyIntoStore(MachineInstr &I,
118	MachineRegisterInfo &MRI);
119
120	bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI);
121
122	bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
123	MachineRegisterInfo &MRI) const;
124	bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
125	MachineRegisterInfo &MRI) const;
126
127	///@{
128	/// Helper functions for selectCompareBranch.
129	bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp,
130	MachineIRBuilder &MIB) const;
131	bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
132	MachineIRBuilder &MIB) const;
133	bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
134	MachineIRBuilder &MIB) const;
135	bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert,
136	MachineBasicBlock *DstMBB,
137	MachineIRBuilder &MIB) const;
138	///@}
139
140	bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
141	MachineRegisterInfo &MRI);
142
143	bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI);
144	bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI);
145
146	// Helper to generate an equivalent of scalar_to_vector into a new register,
147	// returned via 'Dst'.
148	MachineInstr emitScalarToVector(unsigned* EltSize,
149	const TargetRegisterClass *DstRC,
150	Register Scalar,
151	MachineIRBuilder &MIRBuilder) const;
152	/// Helper to narrow vector that was widened by emitScalarToVector.
153	/// Copy lowest part of 128-bit or 64-bit vector to 64-bit or 32-bit
154	/// vector, correspondingly.
155	MachineInstr *emitNarrowVector(Register DstReg, Register SrcReg,
156	MachineIRBuilder &MIRBuilder,
157	MachineRegisterInfo &MRI) const;
158
159	/// Emit a lane insert into \p DstReg, or a new vector register if
160	/// std::nullopt is provided.
161	///
162	/// The lane inserted into is defined by \p LaneIdx. The vector source
163	/// register is given by \p SrcReg. The register containing the element is
164	/// given by \p EltReg.
165	MachineInstr *emitLaneInsert(std::optional<Register> DstReg, Register SrcReg,
166	Register EltReg, unsigned LaneIdx,
167	const RegisterBank &RB,
168	MachineIRBuilder &MIRBuilder) const;
169
170	/// Emit a sequence of instructions representing a constant \p CV for a
171	/// vector register \p Dst. (E.g. a MOV, or a load from a constant pool.)
172	///
173	/// \returns the last instruction in the sequence on success, and nullptr
174	/// otherwise.
175	MachineInstr emitConstantVector(Register Dst, Constant CV,
176	MachineIRBuilder &MIRBuilder,
177	MachineRegisterInfo &MRI);
178
179	MachineInstr tryAdvSIMDModImm8(Register Dst, unsigned* DstSize, APInt Bits,
180	MachineIRBuilder &MIRBuilder);
181
182	MachineInstr tryAdvSIMDModImm16(Register Dst, unsigned* DstSize, APInt Bits,
183	MachineIRBuilder &MIRBuilder, bool Inv);
184
185	MachineInstr tryAdvSIMDModImm32(Register Dst, unsigned* DstSize, APInt Bits,
186	MachineIRBuilder &MIRBuilder, bool Inv);
187	MachineInstr tryAdvSIMDModImm64(Register Dst, unsigned* DstSize, APInt Bits,
188	MachineIRBuilder &MIRBuilder);
189	MachineInstr tryAdvSIMDModImm321s(Register Dst, unsigned* DstSize, APInt Bits,
190	MachineIRBuilder &MIRBuilder, bool Inv);
191	MachineInstr tryAdvSIMDModImmFP(Register Dst, unsigned* DstSize, APInt Bits,
192	MachineIRBuilder &MIRBuilder);
193
194	bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
195	MachineRegisterInfo &MRI);
196	/// \returns true if a G_BUILD_VECTOR instruction \p MI can be selected as a
197	/// SUBREG_TO_REG.
198	bool tryOptBuildVecToSubregToReg(MachineInstr &MI, MachineRegisterInfo &MRI);
199	bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI);
200	bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI);
201	bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI);
202
203	bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI);
204	bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI);
205	bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI);
206	bool selectSplitVectorUnmerge(MachineInstr &I, MachineRegisterInfo &MRI);
207
208	/// Helper function to select vector load intrinsics like
209	/// @llvm.aarch64.neon.ld2., @llvm.aarch64.neon.ld4., etc.
210	/// \p Opc is the opcode that the selected instruction should use.
211	/// \p NumVecs is the number of vector destinations for the instruction.
212	/// \p I is the original G_INTRINSIC_W_SIDE_EFFECTS instruction.
213	bool selectVectorLoadIntrinsic(unsigned Opc, unsigned NumVecs,
214	MachineInstr &I);
215	bool selectVectorLoadLaneIntrinsic(unsigned Opc, unsigned NumVecs,
216	MachineInstr &I);
217	void selectVectorStoreIntrinsic(MachineInstr &I, unsigned NumVecs,
218	unsigned Opc);
219	bool selectVectorStoreLaneIntrinsic(MachineInstr &I, unsigned NumVecs,
220	unsigned Opc);
221	bool selectIntrinsicWithSideEffects(MachineInstr &I,
222	MachineRegisterInfo &MRI);
223	bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI);
224	bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI);
225	bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI);
226	bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI);
227	bool selectPtrAuthGlobalValue(MachineInstr &I,
228	MachineRegisterInfo &MRI) const;
229	bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI);
230	bool selectMOPS(MachineInstr &I, MachineRegisterInfo &MRI);
231	bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI);
232	void SelectTable(MachineInstr &I, MachineRegisterInfo &MRI, unsigned NumVecs,
233	unsigned Opc1, unsigned Opc2, bool isExt);
234
235	bool selectIndexedExtLoad(MachineInstr &I, MachineRegisterInfo &MRI);
236	bool selectIndexedLoad(MachineInstr &I, MachineRegisterInfo &MRI);
237	bool selectIndexedStore(GIndexedStore &I, MachineRegisterInfo &MRI);
238
239	unsigned emitConstantPoolEntry(const Constant *CPVal,
240	MachineFunction &MF) const;
241	MachineInstr emitLoadFromConstantPool(const* Constant *CPVal,
242	MachineIRBuilder &MIRBuilder) const;
243
244	// Emit a vector concat operation.
245	MachineInstr *emitVectorConcat(std::optional<Register> Dst, Register Op1,
246	Register Op2,
247	MachineIRBuilder &MIRBuilder) const;
248
249	// Emit an integer compare between LHS and RHS, which checks for Predicate.
250	MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
251	MachineOperand &Predicate,
252	MachineIRBuilder &MIRBuilder) const;
253
254	/// Emit a floating point comparison between \p LHS and \p RHS.
255	/// \p Pred if given is the intended predicate to use.
256	MachineInstr *
257	emitFPCompare(Register LHS, Register RHS, MachineIRBuilder &MIRBuilder,
258	std::optional<CmpInst::Predicate> = std::nullopt) const;
259
260	MachineInstr *
261	emitInstr(unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
262	std::initializer_list<llvm::SrcOp> SrcOps,
263	MachineIRBuilder &MIRBuilder,
264	const ComplexRendererFns &RenderFns = std::nullopt) const;
265	/// Helper function to emit an add or sub instruction.
266	///
267	/// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above
268	/// in a specific order.
269	///
270	/// Below is an example of the expected input to \p AddrModeAndSizeToOpcode.
271	///
272	/// \code
273	/// const std::array<std::array<unsigned, 2>, 4> Table {
274	/// {{AArch64::ADDXri, AArch64::ADDWri},
275	/// {AArch64::ADDXrs, AArch64::ADDWrs},
276	/// {AArch64::ADDXrr, AArch64::ADDWrr},
277	/// {AArch64::SUBXri, AArch64::SUBWri},
278	/// {AArch64::ADDXrx, AArch64::ADDWrx}}};
279	/// \endcode
280	///
281	/// Each row in the table corresponds to a different addressing mode. Each
282	/// column corresponds to a different register size.
283	///
284	/// \attention Rows must be structured as follows:
285	/// - Row 0: The ri opcode variants
286	/// - Row 1: The rs opcode variants
287	/// - Row 2: The rr opcode variants
288	/// - Row 3: The ri opcode variants for negative immediates
289	/// - Row 4: The rx opcode variants
290	///
291	/// \attention Columns must be structured as follows:
292	/// - Column 0: The 64-bit opcode variants
293	/// - Column 1: The 32-bit opcode variants
294	///
295	/// \p Dst is the destination register of the binop to emit.
296	/// \p LHS is the left-hand operand of the binop to emit.
297	/// \p RHS is the right-hand operand of the binop to emit.
298	MachineInstr *emitAddSub(
299	const std::array<std::array<unsigned, `2`>, `5`> &AddrModeAndSizeToOpcode,
300	Register Dst, MachineOperand &LHS, MachineOperand &RHS,
301	MachineIRBuilder &MIRBuilder) const;
302	MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,
303	MachineOperand &RHS,
304	MachineIRBuilder &MIRBuilder) const;
305	MachineInstr *emitADDS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
306	MachineIRBuilder &MIRBuilder) const;
307	MachineInstr *emitSUBS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
308	MachineIRBuilder &MIRBuilder) const;
309	MachineInstr *emitADCS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
310	MachineIRBuilder &MIRBuilder) const;
311	MachineInstr *emitSBCS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
312	MachineIRBuilder &MIRBuilder) const;
313	MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS,
314	MachineIRBuilder &MIRBuilder) const;
315	MachineInstr *emitTST(MachineOperand &LHS, MachineOperand &RHS,
316	MachineIRBuilder &MIRBuilder) const;
317	MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS,
318	AArch64CC::CondCode CC,
319	MachineIRBuilder &MIRBuilder) const;
320	MachineInstr *emitExtractVectorElt(std::optional<Register> DstReg,
321	const RegisterBank &DstRB, LLT ScalarTy,
322	Register VecReg, unsigned LaneIdx,
323	MachineIRBuilder &MIRBuilder) const;
324	MachineInstr *emitCSINC(Register Dst, Register Src1, Register Src2,
325	AArch64CC::CondCode Pred,
326	MachineIRBuilder &MIRBuilder) const;
327	/// Emit a CSet for a FP compare.
328	///
329	/// \p Dst is expected to be a 32-bit scalar register.
330	MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred,
331	MachineIRBuilder &MIRBuilder) const;
332
333	/// Emit an instruction that sets NZCV to the carry-in expected by \p I.
334	/// Might elide the instruction if the previous instruction already sets NZCV
335	/// correctly.
336	MachineInstr *emitCarryIn(MachineInstr &I, Register CarryReg);
337
338	/// Emit the overflow op for \p Opcode.
339	///
340	/// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO,
341	/// G_USUBO, etc.
342	std::pair<MachineInstr *, AArch64CC::CondCode>
343	emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS,
344	MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const;
345
346	bool selectOverflowOp(MachineInstr &I, MachineRegisterInfo &MRI);
347
348	/// Emit expression as a conjunction (a series of CCMP/CFCMP ops).
349	/// In some cases this is even possible with OR operations in the expression.
350	MachineInstr *emitConjunction(Register Val, AArch64CC::CondCode &OutCC,
351	MachineIRBuilder &MIB) const;
352	MachineInstr *emitConditionalComparison(Register LHS, Register RHS,
353	CmpInst::Predicate CC,
354	AArch64CC::CondCode Predicate,
355	AArch64CC::CondCode OutCC,
356	MachineIRBuilder &MIB) const;
357	MachineInstr *emitConjunctionRec(Register Val, AArch64CC::CondCode &OutCC,
358	bool Negate, Register CCOp,
359	AArch64CC::CondCode Predicate,
360	MachineIRBuilder &MIB) const;
361
362	/// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.
363	/// \p IsNegative is true if the test should be "not zero".
364	/// This will also optimize the test bit instruction when possible.
365	MachineInstr emitTestBit(Register TestReg, uint64_t Bit, bool* IsNegative,
366	MachineBasicBlock *DstMBB,
367	MachineIRBuilder &MIB) const;
368
369	/// Emit a CB(N)Z instruction which branches to \p DestMBB.
370	MachineInstr emitCBZ(Register CompareReg, bool* IsNegative,
371	MachineBasicBlock *DestMBB,
372	MachineIRBuilder &MIB) const;
373
374	// Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
375	// We use these manually instead of using the importer since it doesn't
376	// support SDNodeXForm.
377	ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
378	ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
379	ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
380	ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
381
382	ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const;
383	ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
384	ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const;
385
386	ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
387	unsigned Size) const;
388
389	ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
390	return selectAddrModeUnscaled(Root, Size: `1`);
391	}
392	ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
393	return selectAddrModeUnscaled(Root, Size: `2`);
394	}
395	ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
396	return selectAddrModeUnscaled(Root, Size: `4`);
397	}
398	ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
399	return selectAddrModeUnscaled(Root, Size: `8`);
400	}
401	ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
402	return selectAddrModeUnscaled(Root, Size: `16`);
403	}
404
405	/// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used
406	/// from complex pattern matchers like selectAddrModeIndexed().
407	ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size,
408	MachineRegisterInfo &MRI) const;
409
410	ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
411	unsigned Size) const;
412	template <int Width>
413	ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
414	return selectAddrModeIndexed(Root, Size: Width / `8`);
415	}
416
417	std::optional<bool>
418	isWorthFoldingIntoAddrMode(MachineInstr &MI,
419	const MachineRegisterInfo &MRI) const;
420
421	bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,
422	const MachineRegisterInfo &MRI,
423	bool IsAddrOperand) const;
424	ComplexRendererFns
425	selectAddrModeShiftedExtendXReg(MachineOperand &Root,
426	unsigned SizeInBytes) const;
427
428	/// Returns a \p ComplexRendererFns which contains a base, offset, and whether
429	/// or not a shift + extend should be folded into an addressing mode. Returns
430	/// None when this is not profitable or possible.
431	ComplexRendererFns
432	selectExtendedSHL(MachineOperand &Root, MachineOperand &Base,
433	MachineOperand &Offset, unsigned SizeInBytes,
434	bool WantsExt) const;
435	ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
436	ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
437	unsigned SizeInBytes) const;
438	template <int Width>
439	ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const {
440	return selectAddrModeXRO(Root, SizeInBytes: Width / `8`);
441	}
442
443	ComplexRendererFns selectAddrModeWRO(MachineOperand &Root,
444	unsigned SizeInBytes) const;
445	template <int Width>
446	ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const {
447	return selectAddrModeWRO(Root, SizeInBytes: Width / `8`);
448	}
449
450	ComplexRendererFns selectShiftedRegister(MachineOperand &Root,
451	bool AllowROR = false) const;
452
453	ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
454	return selectShiftedRegister(Root);
455	}
456
457	ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
458	return selectShiftedRegister(Root, AllowROR: true);
459	}
460
461	/// Given an extend instruction, determine the correct shift-extend type for
462	/// that instruction.
463	///
464	/// If the instruction is going to be used in a load or store, pass
465	/// \p IsLoadStore = true.
466	AArch64_AM::ShiftExtendType
467	getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI,
468	bool IsLoadStore = false) const;
469
470	/// Move \p Reg to \p RC if \p Reg is not already on \p RC.
471	///
472	/// \returns Either \p Reg if no change was necessary, or the new register
473	/// created by moving \p Reg.
474	///
475	/// Note: This uses emitCopy right now.
476	Register moveScalarRegClass(Register Reg, const TargetRegisterClass &RC,
477	MachineIRBuilder &MIB) const;
478
479	ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
480
481	ComplexRendererFns selectExtractHigh(MachineOperand &Root) const;
482
483	void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
484	int OpIdx = -`1`) const;
485	void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I,
486	int OpIdx = -`1`) const;
487	void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I,
488	int OpIdx = -`1`) const;
489	void renderUbsanTrap(MachineInstrBuilder &MIB, const MachineInstr &MI,
490	int OpIdx) const;
491	void renderFPImm16(MachineInstrBuilder &MIB, const MachineInstr &MI,
492	int OpIdx = -`1`) const;
493	void renderFPImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
494	int OpIdx = -`1`) const;
495	void renderFPImm64(MachineInstrBuilder &MIB, const MachineInstr &MI,
496	int OpIdx = -`1`) const;
497	void renderFPImm32SIMDModImmType4(MachineInstrBuilder &MIB,
498	const MachineInstr &MI,
499	int OpIdx = -`1`) const;
500
501	// Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
502	void materializeLargeCMVal(MachineInstr &I, const Value V, unsigned* OpFlags);
503
504	// Optimization methods.
505	bool tryOptSelect(GSelect &Sel);
506	bool tryOptSelectConjunction(GSelect &Sel, MachineInstr &CondMI);
507	MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
508	MachineOperand &Predicate,
509	MachineIRBuilder &MIRBuilder) const;
510
511	/// Return true if \p MI is a load or store of \p NumBytes bytes.
512	bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
513
514	/// Returns true if \p MI is guaranteed to have the high-half of a 64-bit
515	/// register zeroed out. In other words, the result of MI has been explicitly
516	/// zero extended.
517	bool isDef32(const MachineInstr &MI) const;
518
519	const AArch64TargetMachine &TM;
520	const AArch64Subtarget &STI;
521	const AArch64InstrInfo &TII;
522	const AArch64RegisterInfo &TRI;
523	const AArch64RegisterBankInfo &RBI;
524
525	bool ProduceNonFlagSettingCondBr = false;
526
527	// Some cached values used during selection.
528	// We use LR as a live-in register, and we keep track of it here as it can be
529	// clobbered by calls.
530	Register MFReturnAddr;
531
532	MachineIRBuilder MIB;
533
534	#define GET_GLOBALISEL_PREDICATES_DECL
535	#include "AArch64GenGlobalISel.inc"
536	#undef GET_GLOBALISEL_PREDICATES_DECL
537
538	// We declare the temporaries used by selectImpl() in the class to minimize the
539	// cost of constructing placeholder values.
540	#define GET_GLOBALISEL_TEMPORARIES_DECL
541	#include "AArch64GenGlobalISel.inc"
542	#undef GET_GLOBALISEL_TEMPORARIES_DECL
543	};
544
545	} // end anonymous namespace
546
547	#define GET_GLOBALISEL_IMPL
548	#include "AArch64GenGlobalISel.inc"
549	#undef GET_GLOBALISEL_IMPL
550
551	AArch64InstructionSelector::AArch64InstructionSelector(
552	const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
553	const AArch64RegisterBankInfo &RBI)
554	: TM(TM), STI(STI), TII(STI.getInstrInfo()), TRI(STI.getRegisterInfo()),
555	RBI(RBI),
556	#define GET_GLOBALISEL_PREDICATES_INIT
557	#include "AArch64GenGlobalISel.inc"
558	#undef GET_GLOBALISEL_PREDICATES_INIT
559	#define GET_GLOBALISEL_TEMPORARIES_INIT
560	#include "AArch64GenGlobalISel.inc"
561	#undef GET_GLOBALISEL_TEMPORARIES_INIT
562	{
563	}
564
565	// FIXME: This should be target-independent, inferred from the types declared
566	// for each class in the bank.
567	//
568	/// Given a register bank, and a type, return the smallest register class that
569	/// can represent that combination.
570	static const TargetRegisterClass *
571	getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
572	bool GetAllRegSet = false) {
573	if (RB.getID() == AArch64::GPRRegBankID) {
574	if (Ty.getSizeInBits() <= `32`)
575	return GetAllRegSet ? &AArch64::GPR32allRegClass
576	: &AArch64::GPR32RegClass;
577	if (Ty.getSizeInBits() == `64`)
578	return GetAllRegSet ? &AArch64::GPR64allRegClass
579	: &AArch64::GPR64RegClass;
580	if (Ty.getSizeInBits() == `128`)
581	return &AArch64::XSeqPairsClassRegClass;
582	return nullptr;
583	}
584
585	if (RB.getID() == AArch64::FPRRegBankID) {
586	switch (Ty.getSizeInBits()) {
587	case `8`:
588	return &AArch64::FPR8RegClass;
589	case `16`:
590	return &AArch64::FPR16RegClass;
591	case `32`:
592	return &AArch64::FPR32RegClass;
593	case `64`:
594	return &AArch64::FPR64RegClass;
595	case `128`:
596	return &AArch64::FPR128RegClass;
597	}
598	return nullptr;
599	}
600
601	return nullptr;
602	}
603
604	/// Given a register bank, and size in bits, return the smallest register class
605	/// that can represent that combination.
606	static const TargetRegisterClass *
607	getMinClassForRegBank(const RegisterBank &RB, TypeSize SizeInBits,
608	bool GetAllRegSet = false) {
609	if (SizeInBits.isScalable()) {
610	assert(RB.getID() == AArch64::FPRRegBankID &&
611	"Expected FPR regbank for scalable type size");
612	return &AArch64::ZPRRegClass;
613	}
614
615	unsigned RegBankID = RB.getID();
616
617	if (RegBankID == AArch64::GPRRegBankID) {
618	assert(!SizeInBits.isScalable() && "Unexpected scalable register size");
619	if (SizeInBits <= `32`)
620	return GetAllRegSet ? &AArch64::GPR32allRegClass
621	: &AArch64::GPR32RegClass;
622	if (SizeInBits == `64`)
623	return GetAllRegSet ? &AArch64::GPR64allRegClass
624	: &AArch64::GPR64RegClass;
625	if (SizeInBits == `128`)
626	return &AArch64::XSeqPairsClassRegClass;
627	}
628
629	if (RegBankID == AArch64::FPRRegBankID) {
630	if (SizeInBits.isScalable()) {
631	assert(SizeInBits == TypeSize::getScalable(`128`) &&
632	"Unexpected scalable register size");
633	return &AArch64::ZPRRegClass;
634	}
635
636	switch (SizeInBits) {
637	default:
638	return nullptr;
639	case `8`:
640	return &AArch64::FPR8RegClass;
641	case `16`:
642	return &AArch64::FPR16RegClass;
643	case `32`:
644	return &AArch64::FPR32RegClass;
645	case `64`:
646	return &AArch64::FPR64RegClass;
647	case `128`:
648	return &AArch64::FPR128RegClass;
649	}
650	}
651
652	return nullptr;
653	}
654
655	/// Returns the correct subregister to use for a given register class.
656	static bool getSubRegForClass(const TargetRegisterClass *RC,
657	const TargetRegisterInfo &TRI, unsigned &SubReg) {
658	switch (TRI.getRegSizeInBits(RC: *RC)) {
659	case `8`:
660	SubReg = AArch64::bsub;
661	break;
662	case `16`:
663	SubReg = AArch64::hsub;
664	break;
665	case `32`:
666	if (RC != &AArch64::FPR32RegClass)
667	SubReg = AArch64::sub_32;
668	else
669	SubReg = AArch64::ssub;
670	break;
671	case `64`:
672	SubReg = AArch64::dsub;
673	break;
674	default:
675	LLVM_DEBUG(
676	dbgs() << "Couldn't find appropriate subregister for register class.");
677	return false;
678	}
679
680	return true;
681	}
682
683	/// Returns the minimum size the given register bank can hold.
684	static unsigned getMinSizeForRegBank(const RegisterBank &RB) {
685	switch (RB.getID()) {
686	case AArch64::GPRRegBankID:
687	return `32`;
688	case AArch64::FPRRegBankID:
689	return `8`;
690	default:
691	llvm_unreachable("Tried to get minimum size for unknown register bank.");
692	}
693	}
694
695	/// Create a REG_SEQUENCE instruction using the registers in \p Regs.
696	/// Helper function for functions like createDTuple and createQTuple.
697	///
698	/// \p RegClassIDs - The list of register class IDs available for some tuple of
699	/// a scalar class. E.g. QQRegClassID, QQQRegClassID, QQQQRegClassID. This is
700	/// expected to contain between 2 and 4 tuple classes.
701	///
702	/// \p SubRegs - The list of subregister classes associated with each register
703	/// class ID in \p RegClassIDs. E.g., QQRegClassID should use the qsub0
704	/// subregister class. The index of each subregister class is expected to
705	/// correspond with the index of each register class.
706	///
707	/// \returns Either the destination register of REG_SEQUENCE instruction that
708	/// was created, or the 0th element of \p Regs if \p Regs contains a single
709	/// element.
710	static Register createTuple(ArrayRef<Register> Regs,
711	const unsigned RegClassIDs[],
712	const unsigned SubRegs[], MachineIRBuilder &MIB) {
713	unsigned NumRegs = Regs.size();
714	if (NumRegs == `1`)
715	return Regs [`0`];
716	assert(NumRegs >= `2` && NumRegs <= `4` &&
717	"Only support between two and 4 registers in a tuple!");
718	const TargetRegisterInfo *TRI = MIB.getMF().getSubtarget().getRegisterInfo();
719	auto *DesiredClass = TRI->getRegClass(i: RegClassIDs[NumRegs - `2`]);
720	auto RegSequence =
721	MIB.buildInstr(Opc: TargetOpcode::REG_SEQUENCE, DstOps: {DesiredClass}, SrcOps: {});
722	for (unsigned I = `0`, E = Regs.size(); I < E; ++I) {
723	RegSequence.addUse(RegNo: Regs [I]);
724	RegSequence.addImm(Val: SubRegs[I]);
725	}
726	return RegSequence.getReg(Idx: `0`);
727	}
728
729	/// Create a tuple of D-registers using the registers in \p Regs.
730	static Register createDTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) {
731	static const unsigned RegClassIDs[] = {
732	AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
733	static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
734	AArch64::dsub2, AArch64::dsub3};
735	return createTuple(Regs, RegClassIDs, SubRegs, MIB);
736	}
737
738	/// Create a tuple of Q-registers using the registers in \p Regs.
739	static Register createQTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) {
740	static const unsigned RegClassIDs[] = {
741	AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
742	static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
743	AArch64::qsub2, AArch64::qsub3};
744	return createTuple(Regs, RegClassIDs, SubRegs, MIB);
745	}
746
747	static std::optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
748	auto &MI = *Root.getParent();
749	auto &MBB = *MI.getParent();
750	auto &MF = *MBB.getParent();
751	auto &MRI = MF.getRegInfo();
752	uint64_t Immed;
753	if (Root.isImm())
754	Immed = Root.getImm();
755	else if (Root.isCImm())
756	Immed = Root.getCImm()->getZExtValue();
757	else if (Root.isReg()) {
758	auto ValAndVReg =
759	getIConstantVRegValWithLookThrough(VReg: Root.getReg(), MRI, LookThroughInstrs: true);
760	if (!ValAndVReg)
761	return std::nullopt;
762	Immed = ValAndVReg ->Value.getSExtValue();
763	} else
764	return std::nullopt;
765	return Immed;
766	}
767
768	/// Check whether \p I is a currently unsupported binary operation:
769	/// - it has an unsized type
770	/// - an operand is not a vreg
771	/// - all operands are not in the same bank
772	/// These are checks that should someday live in the verifier, but right now,
773	/// these are mostly limitations of the aarch64 selector.
774	static bool unsupportedBinOp(const MachineInstr &I,
775	const AArch64RegisterBankInfo &RBI,
776	const MachineRegisterInfo &MRI,
777	const AArch64RegisterInfo &TRI) {
778	LLT Ty = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
779	if (!Ty.isValid()) {
780	LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n");
781	return true;
782	}
783
784	const RegisterBank PrevOpBank = nullptr*;
785	for (auto &MO : I.operands()) {
786	// FIXME: Support non-register operands.
787	if (!MO.isReg()) {
788	LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n");
789	return true;
790	}
791
792	// FIXME: Can generic operations have physical registers operands? If
793	// so, this will need to be taught about that, and we'll need to get the
794	// bank out of the minimal class for the register.
795	// Either way, this needs to be documented (and possibly verified).
796	if (!MO.getReg().isVirtual()) {
797	LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n");
798	return true;
799	}
800
801	const RegisterBank *OpBank = RBI.getRegBank(Reg: MO.getReg(), MRI, TRI);
802	if (!OpBank) {
803	LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n");
804	return true;
805	}
806
807	if (PrevOpBank && OpBank != PrevOpBank) {
808	LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n");
809	return true;
810	}
811	PrevOpBank = OpBank;
812	}
813	return false;
814	}
815
816	/// Select the AArch64 opcode for the basic binary operation \p GenericOpc
817	/// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
818	/// and of size \p OpSize.
819	/// \returns \p GenericOpc if the combination is unsupported.
820	static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
821	unsigned OpSize) {
822	switch (RegBankID) {
823	case AArch64::GPRRegBankID:
824	if (OpSize == `32`) {
825	switch (GenericOpc) {
826	case TargetOpcode::G_SHL:
827	return AArch64::LSLVWr;
828	case TargetOpcode::G_LSHR:
829	return AArch64::LSRVWr;
830	case TargetOpcode::G_ASHR:
831	return AArch64::ASRVWr;
832	default:
833	return GenericOpc;
834	}
835	} else if (OpSize == `64`) {
836	switch (GenericOpc) {
837	case TargetOpcode::G_PTR_ADD:
838	return AArch64::ADDXrr;
839	case TargetOpcode::G_SHL:
840	return AArch64::LSLVXr;
841	case TargetOpcode::G_LSHR:
842	return AArch64::LSRVXr;
843	case TargetOpcode::G_ASHR:
844	return AArch64::ASRVXr;
845	default:
846	return GenericOpc;
847	}
848	}
849	break;
850	case AArch64::FPRRegBankID:
851	switch (OpSize) {
852	case `32`:
853	switch (GenericOpc) {
854	case TargetOpcode::G_FADD:
855	return AArch64::FADDSrr;
856	case TargetOpcode::G_FSUB:
857	return AArch64::FSUBSrr;
858	case TargetOpcode::G_FMUL:
859	return AArch64::FMULSrr;
860	case TargetOpcode::G_FDIV:
861	return AArch64::FDIVSrr;
862	default:
863	return GenericOpc;
864	}
865	case `64`:
866	switch (GenericOpc) {
867	case TargetOpcode::G_FADD:
868	return AArch64::FADDDrr;
869	case TargetOpcode::G_FSUB:
870	return AArch64::FSUBDrr;
871	case TargetOpcode::G_FMUL:
872	return AArch64::FMULDrr;
873	case TargetOpcode::G_FDIV:
874	return AArch64::FDIVDrr;
875	case TargetOpcode::G_OR:
876	return AArch64::ORRv8i8;
877	default:
878	return GenericOpc;
879	}
880	}
881	break;
882	}
883	return GenericOpc;
884	}
885
886	/// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
887	/// appropriate for the (value) register bank \p RegBankID and of memory access
888	/// size \p OpSize. This returns the variant with the base+unsigned-immediate
889	/// addressing mode (e.g., LDRXui).
890	/// \returns \p GenericOpc if the combination is unsupported.
891	static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
892	unsigned OpSize) {
893	const bool isStore = GenericOpc == TargetOpcode::G_STORE;
894	switch (RegBankID) {
895	case AArch64::GPRRegBankID:
896	switch (OpSize) {
897	case `8`:
898	return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
899	case `16`:
900	return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
901	case `32`:
902	return isStore ? AArch64::STRWui : AArch64::LDRWui;
903	case `64`:
904	return isStore ? AArch64::STRXui : AArch64::LDRXui;
905	}
906	break;
907	case AArch64::FPRRegBankID:
908	switch (OpSize) {
909	case `8`:
910	return isStore ? AArch64::STRBui : AArch64::LDRBui;
911	case `16`:
912	return isStore ? AArch64::STRHui : AArch64::LDRHui;
913	case `32`:
914	return isStore ? AArch64::STRSui : AArch64::LDRSui;
915	case `64`:
916	return isStore ? AArch64::STRDui : AArch64::LDRDui;
917	case `128`:
918	return isStore ? AArch64::STRQui : AArch64::LDRQui;
919	}
920	break;
921	}
922	return GenericOpc;
923	}
924
925	/// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg
926	/// to \p To.*
927	///
928	/// E.g "To = COPY SrcReg:SubReg"
929	static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI,
930	const RegisterBankInfo &RBI, Register SrcReg,
931	const TargetRegisterClass To, unsigned* SubReg) {
932	assert(SrcReg.isValid() && "Expected a valid source register?");
933	assert(To && "Destination register class cannot be null");
934	assert(SubReg && "Expected a valid subregister");
935
936	MachineIRBuilder MIB(I);
937	auto SubRegCopy =
938	MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {To}, SrcOps: {}).addReg(RegNo: SrcReg, flags: `0`, SubReg);
939	MachineOperand &RegOp = I.getOperand(i: `1`);
940	RegOp.setReg(SubRegCopy.getReg(Idx: `0`));
941
942	// It's possible that the destination register won't be constrained. Make
943	// sure that happens.
944	if (!I.getOperand(i: `0`).getReg().isPhysical())
945	RBI.constrainGenericRegister(Reg: I.getOperand(i: `0`).getReg(), RC: *To, MRI);
946
947	return true;
948	}
949
950	/// Helper function to get the source and destination register classes for a
951	/// copy. Returns a std::pair containing the source register class for the
952	/// copy, and the destination register class for the copy. If a register class
953	/// cannot be determined, then it will be nullptr.
954	static std::pair<const TargetRegisterClass , const* TargetRegisterClass *>
955	getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,
956	MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
957	const RegisterBankInfo &RBI) {
958	Register DstReg = I.getOperand(i: `0`).getReg();
959	Register SrcReg = I.getOperand(i: `1`).getReg();
960	const RegisterBank &DstRegBank = *RBI.getRegBank(Reg: DstReg, MRI, TRI);
961	const RegisterBank &SrcRegBank = *RBI.getRegBank(Reg: SrcReg, MRI, TRI);
962
963	TypeSize DstSize = RBI.getSizeInBits(Reg: DstReg, MRI, TRI);
964	TypeSize SrcSize = RBI.getSizeInBits(Reg: SrcReg, MRI, TRI);
965
966	// Special casing for cross-bank copies of s1s. We can technically represent
967	// a 1-bit value with any size of register. The minimum size for a GPR is 32
968	// bits. So, we need to put the FPR on 32 bits as well.
969	//
970	// FIXME: I'm not sure if this case holds true outside of copies. If it does,
971	// then we can pull it into the helpers that get the appropriate class for a
972	// register bank. Or make a new helper that carries along some constraint
973	// information.
974	if (SrcRegBank != DstRegBank &&
975	(DstSize == TypeSize::getFixed(ExactSize: `1`) && SrcSize == TypeSize::getFixed(ExactSize: `1`)))
976	SrcSize = DstSize = TypeSize::getFixed(ExactSize: `32`);
977
978	return {getMinClassForRegBank(RB: SrcRegBank, SizeInBits: SrcSize, GetAllRegSet: true),
979	getMinClassForRegBank(RB: DstRegBank, SizeInBits: DstSize, GetAllRegSet: true)};
980	}
981
982	// FIXME: We need some sort of API in RBI/TRI to allow generic code to
983	// constrain operands of simple instructions given a TargetRegisterClass
984	// and LLT
985	static bool selectDebugInstr(MachineInstr &I, MachineRegisterInfo &MRI,
986	const RegisterBankInfo &RBI) {
987	for (MachineOperand &MO : I.operands()) {
988	if (!MO.isReg())
989	continue;
990	Register Reg = MO.getReg();
991	if (!Reg)
992	continue;
993	if (Reg.isPhysical())
994	continue;
995	LLT Ty = MRI.getType(Reg);
996	const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
997	const TargetRegisterClass *RC =
998	dyn_cast<const TargetRegisterClass *>(Val: RegClassOrBank);
999	if (!RC) {
1000	const RegisterBank &RB = cast<const* RegisterBank *>(Val: RegClassOrBank);
1001	RC = getRegClassForTypeOnBank(Ty, RB);
1002	if (!RC) {
1003	LLVM_DEBUG(
1004	dbgs() << "Warning: DBG_VALUE operand has unexpected size/bank\n");
1005	break;
1006	}
1007	}
1008	RBI.constrainGenericRegister(Reg, RC: *RC, MRI);
1009	}
1010
1011	return true;
1012	}
1013
1014	static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
1015	MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
1016	const RegisterBankInfo &RBI) {
1017	Register DstReg = I.getOperand(i: `0`).getReg();
1018	Register SrcReg = I.getOperand(i: `1`).getReg();
1019	const RegisterBank &DstRegBank = *RBI.getRegBank(Reg: DstReg, MRI, TRI);
1020	const RegisterBank &SrcRegBank = *RBI.getRegBank(Reg: SrcReg, MRI, TRI);
1021
1022	// Find the correct register classes for the source and destination registers.
1023	const TargetRegisterClass *SrcRC;
1024	const TargetRegisterClass *DstRC;
1025	std::tie(args&: SrcRC, args&: DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
1026
1027	if (!DstRC) {
1028	LLVM_DEBUG(dbgs() << "Unexpected dest size "
1029	<< RBI.getSizeInBits(DstReg, MRI, TRI) << `'\n'`);
1030	return false;
1031	}
1032
1033	// Is this a copy? If so, then we may need to insert a subregister copy.
1034	if (I.isCopy()) {
1035	// Yes. Check if there's anything to fix up.
1036	if (!SrcRC) {
1037	LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
1038	return false;
1039	}
1040
1041	const TypeSize SrcSize = TRI.getRegSizeInBits(RC: *SrcRC);
1042	const TypeSize DstSize = TRI.getRegSizeInBits(RC: *DstRC);
1043	unsigned SubReg;
1044
1045	// If the source bank doesn't support a subregister copy small enough,
1046	// then we first need to copy to the destination bank.
1047	if (getMinSizeForRegBank(RB: SrcRegBank) > DstSize) {
1048	const TargetRegisterClass *DstTempRC =
1049	getMinClassForRegBank(RB: DstRegBank, SizeInBits: SrcSize, / GetAllRegSet / true);
1050	getSubRegForClass(RC: DstRC, TRI, SubReg);
1051
1052	MachineIRBuilder MIB(I);
1053	auto Copy = MIB.buildCopy(Res: {DstTempRC}, Op: {SrcReg});
1054	copySubReg(I, MRI, RBI, SrcReg: Copy.getReg(Idx: `0`), To: DstRC, SubReg);
1055	} else if (SrcSize > DstSize) {
1056	// If the source register is bigger than the destination we need to
1057	// perform a subregister copy.
1058	const TargetRegisterClass *SubRegRC =
1059	getMinClassForRegBank(RB: SrcRegBank, SizeInBits: DstSize, / GetAllRegSet / true);
1060	getSubRegForClass(RC: SubRegRC, TRI, SubReg);
1061	copySubReg(I, MRI, RBI, SrcReg, To: DstRC, SubReg);
1062	} else if (DstSize > SrcSize) {
1063	// If the destination register is bigger than the source we need to do
1064	// a promotion using SUBREG_TO_REG.
1065	const TargetRegisterClass *PromotionRC =
1066	getMinClassForRegBank(RB: SrcRegBank, SizeInBits: DstSize, / GetAllRegSet / true);
1067	getSubRegForClass(RC: SrcRC, TRI, SubReg);
1068
1069	Register PromoteReg = MRI.createVirtualRegister(RegClass: PromotionRC);
1070	BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(),
1071	MCID: TII.get(Opcode: AArch64::SUBREG_TO_REG), DestReg: PromoteReg)
1072	.addImm(Val: `0`)
1073	.addUse(RegNo: SrcReg)
1074	.addImm(Val: SubReg);
1075	MachineOperand &RegOp = I.getOperand(i: `1`);
1076	RegOp.setReg(PromoteReg);
1077	}
1078
1079	// If the destination is a physical register, then there's nothing to
1080	// change, so we're done.
1081	if (DstReg.isPhysical())
1082	return true;
1083	}
1084
1085	// No need to constrain SrcReg. It will get constrained when we hit another
1086	// of its use or its defs. Copies do not have constraints.
1087	if (!RBI.constrainGenericRegister(Reg: DstReg, RC: *DstRC, MRI)) {
1088	LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
1089	<< " operand\n");
1090	return false;
1091	}
1092
1093	// If this a GPR ZEXT that we want to just reduce down into a copy.
1094	// The sizes will be mismatched with the source < 32b but that's ok.
1095	if (I.getOpcode() == TargetOpcode::G_ZEXT) {
1096	I.setDesc(TII.get(Opcode: AArch64::COPY));
1097	assert(SrcRegBank.getID() == AArch64::GPRRegBankID);
1098	return selectCopy(I, TII, MRI, TRI, RBI);
1099	}
1100
1101	I.setDesc(TII.get(Opcode: AArch64::COPY));
1102	return true;
1103	}
1104
1105	static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
1106	if (!DstTy.isScalar() \|\| !SrcTy.isScalar())
1107	return GenericOpc;
1108
1109	const unsigned DstSize = DstTy.getSizeInBits();
1110	const unsigned SrcSize = SrcTy.getSizeInBits();
1111
1112	switch (DstSize) {
1113	case `32`:
1114	switch (SrcSize) {
1115	case `32`:
1116	switch (GenericOpc) {
1117	case TargetOpcode::G_SITOFP:
1118	return AArch64::SCVTFUWSri;
1119	case TargetOpcode::G_UITOFP:
1120	return AArch64::UCVTFUWSri;
1121	case TargetOpcode::G_FPTOSI:
1122	return AArch64::FCVTZSUWSr;
1123	case TargetOpcode::G_FPTOUI:
1124	return AArch64::FCVTZUUWSr;
1125	default:
1126	return GenericOpc;
1127	}
1128	case `64`:
1129	switch (GenericOpc) {
1130	case TargetOpcode::G_SITOFP:
1131	return AArch64::SCVTFUXSri;
1132	case TargetOpcode::G_UITOFP:
1133	return AArch64::UCVTFUXSri;
1134	case TargetOpcode::G_FPTOSI:
1135	return AArch64::FCVTZSUWDr;
1136	case TargetOpcode::G_FPTOUI:
1137	return AArch64::FCVTZUUWDr;
1138	default:
1139	return GenericOpc;
1140	}
1141	default:
1142	return GenericOpc;
1143	}
1144	case `64`:
1145	switch (SrcSize) {
1146	case `32`:
1147	switch (GenericOpc) {
1148	case TargetOpcode::G_SITOFP:
1149	return AArch64::SCVTFUWDri;
1150	case TargetOpcode::G_UITOFP:
1151	return AArch64::UCVTFUWDri;
1152	case TargetOpcode::G_FPTOSI:
1153	return AArch64::FCVTZSUXSr;
1154	case TargetOpcode::G_FPTOUI:
1155	return AArch64::FCVTZUUXSr;
1156	default:
1157	return GenericOpc;
1158	}
1159	case `64`:
1160	switch (GenericOpc) {
1161	case TargetOpcode::G_SITOFP:
1162	return AArch64::SCVTFUXDri;
1163	case TargetOpcode::G_UITOFP:
1164	return AArch64::UCVTFUXDri;
1165	case TargetOpcode::G_FPTOSI:
1166	return AArch64::FCVTZSUXDr;
1167	case TargetOpcode::G_FPTOUI:
1168	return AArch64::FCVTZUUXDr;
1169	default:
1170	return GenericOpc;
1171	}
1172	default:
1173	return GenericOpc;
1174	}
1175	default:
1176	return GenericOpc;
1177	};
1178	return GenericOpc;
1179	}
1180
1181	MachineInstr *
1182	AArch64InstructionSelector::emitSelect(Register Dst, Register True,
1183	Register False, AArch64CC::CondCode CC,
1184	MachineIRBuilder &MIB) const {
1185	MachineRegisterInfo &MRI = *MIB.getMRI();
1186	assert(RBI.getRegBank(False, MRI, TRI)->getID() ==
1187	RBI.getRegBank(True, MRI, TRI)->getID() &&
1188	"Expected both select operands to have the same regbank?");
1189	LLT Ty = MRI.getType(Reg: True);
1190	if (Ty.isVector())
1191	return nullptr;
1192	const unsigned Size = Ty.getSizeInBits();
1193	assert((Size == `32` \|\| Size == `64`) &&
1194	"Expected 32 bit or 64 bit select only?");
1195	const bool Is32Bit = Size == `32`;
1196	if (RBI.getRegBank(Reg: True, MRI, TRI)->getID() != AArch64::GPRRegBankID) {
1197	unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
1198	auto FCSel = MIB.buildInstr(Opc, DstOps: {Dst}, SrcOps: {True, False}).addImm(Val: CC);
1199	constrainSelectedInstRegOperands(I&: *FCSel, TII, TRI, RBI);
1200	return &*FCSel;
1201	}
1202
1203	// By default, we'll try and emit a CSEL.
1204	unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
1205	bool Optimized = false;
1206	auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI,
1207	&Optimized](Register &Reg, Register &OtherReg,
1208	bool Invert) {
1209	if (Optimized)
1210	return false;
1211
1212	// Attempt to fold:
1213	//
1214	// %sub = G_SUB 0, %x
1215	// %select = G_SELECT cc, %reg, %sub
1216	//
1217	// Into:
1218	// %select = CSNEG %reg, %x, cc
1219	Register MatchReg;
1220	if (mi_match(R: Reg, MRI, P: m_Neg(Src: m_Reg(R&: MatchReg)))) {
1221	Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
1222	Reg = MatchReg;
1223	if (Invert) {
1224	CC = AArch64CC::getInvertedCondCode(Code: CC);
1225	std::swap(a&: Reg, b&: OtherReg);
1226	}
1227	return true;
1228	}
1229
1230	// Attempt to fold:
1231	//
1232	// %xor = G_XOR %x, -1
1233	// %select = G_SELECT cc, %reg, %xor
1234	//
1235	// Into:
1236	// %select = CSINV %reg, %x, cc
1237	if (mi_match(R: Reg, MRI, P: m_Not(Src: m_Reg(R&: MatchReg)))) {
1238	Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1239	Reg = MatchReg;
1240	if (Invert) {
1241	CC = AArch64CC::getInvertedCondCode(Code: CC);
1242	std::swap(a&: Reg, b&: OtherReg);
1243	}
1244	return true;
1245	}
1246
1247	// Attempt to fold:
1248	//
1249	// %add = G_ADD %x, 1
1250	// %select = G_SELECT cc, %reg, %add
1251	//
1252	// Into:
1253	// %select = CSINC %reg, %x, cc
1254	if (mi_match(R: Reg, MRI,
1255	P: m_any_of(preds: m_GAdd(L: m_Reg(R&: MatchReg), R: m_SpecificICst(RequestedValue: `1`)),
1256	preds: m_GPtrAdd(L: m_Reg(R&: MatchReg), R: m_SpecificICst(RequestedValue: `1`))))) {
1257	Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1258	Reg = MatchReg;
1259	if (Invert) {
1260	CC = AArch64CC::getInvertedCondCode(Code: CC);
1261	std::swap(a&: Reg, b&: OtherReg);
1262	}
1263	return true;
1264	}
1265
1266	return false;
1267	};
1268
1269	// Helper lambda which tries to use CSINC/CSINV for the instruction when its
1270	// true/false values are constants.
1271	// FIXME: All of these patterns already exist in tablegen. We should be
1272	// able to import these.
1273	auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI,
1274	&Optimized]() {
1275	if (Optimized)
1276	return false;
1277	auto TrueCst = getIConstantVRegValWithLookThrough(VReg: True, MRI);
1278	auto FalseCst = getIConstantVRegValWithLookThrough(VReg: False, MRI);
1279	if (!TrueCst && !FalseCst)
1280	return false;
1281
1282	Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
1283	if (TrueCst && FalseCst) {
1284	int64_t T = TrueCst ->Value.getSExtValue();
1285	int64_t F = FalseCst ->Value.getSExtValue();
1286
1287	if (T == `0` && F == `1`) {
1288	// G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
1289	Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1290	True = ZReg;
1291	False = ZReg;
1292	return true;
1293	}
1294
1295	if (T == `0` && F == -`1`) {
1296	// G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
1297	Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1298	True = ZReg;
1299	False = ZReg;
1300	return true;
1301	}
1302	}
1303
1304	if (TrueCst) {
1305	int64_t T = TrueCst ->Value.getSExtValue();
1306	if (T == `1`) {
1307	// G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
1308	Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1309	True = False;
1310	False = ZReg;
1311	CC = AArch64CC::getInvertedCondCode(Code: CC);
1312	return true;
1313	}
1314
1315	if (T == -`1`) {
1316	// G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
1317	Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1318	True = False;
1319	False = ZReg;
1320	CC = AArch64CC::getInvertedCondCode(Code: CC);
1321	return true;
1322	}
1323	}
1324
1325	if (FalseCst) {
1326	int64_t F = FalseCst ->Value.getSExtValue();
1327	if (F == `1`) {
1328	// G_SELECT cc, t, 1 -> CSINC t, zreg, cc
1329	Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1330	False = ZReg;
1331	return true;
1332	}
1333
1334	if (F == -`1`) {
1335	// G_SELECT cc, t, -1 -> CSINC t, zreg, cc
1336	Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1337	False = ZReg;
1338	return true;
1339	}
1340	}
1341	return false;
1342	};
1343
1344	Optimized \|= TryFoldBinOpIntoSelect (False, True, /Invert = / false);
1345	Optimized \|= TryFoldBinOpIntoSelect (True, False, /Invert = / true);
1346	Optimized \|= TryOptSelectCst ();
1347	auto SelectInst = MIB.buildInstr(Opc, DstOps: {Dst}, SrcOps: {True, False}).addImm(Val: CC);
1348	constrainSelectedInstRegOperands(I&: *SelectInst, TII, TRI, RBI);
1349	return &*SelectInst;
1350	}
1351
1352	static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
1353	switch (P) {
1354	default:
1355	llvm_unreachable("Unknown condition code!");
1356	case CmpInst::ICMP_NE:
1357	return AArch64CC::NE;
1358	case CmpInst::ICMP_EQ:
1359	return AArch64CC::EQ;
1360	case CmpInst::ICMP_SGT:
1361	return AArch64CC::GT;
1362	case CmpInst::ICMP_SGE:
1363	return AArch64CC::GE;
1364	case CmpInst::ICMP_SLT:
1365	return AArch64CC::LT;
1366	case CmpInst::ICMP_SLE:
1367	return AArch64CC::LE;
1368	case CmpInst::ICMP_UGT:
1369	return AArch64CC::HI;
1370	case CmpInst::ICMP_UGE:
1371	return AArch64CC::HS;
1372	case CmpInst::ICMP_ULT:
1373	return AArch64CC::LO;
1374	case CmpInst::ICMP_ULE:
1375	return AArch64CC::LS;
1376	}
1377	}
1378
1379	/// changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC.
1380	static void changeFPCCToORAArch64CC(CmpInst::Predicate CC,
1381	AArch64CC::CondCode &CondCode,
1382	AArch64CC::CondCode &CondCode2) {
1383	CondCode2 = AArch64CC::AL;
1384	switch (CC) {
1385	default:
1386	llvm_unreachable("Unknown FP condition!");
1387	case CmpInst::FCMP_OEQ:
1388	CondCode = AArch64CC::EQ;
1389	break;
1390	case CmpInst::FCMP_OGT:
1391	CondCode = AArch64CC::GT;
1392	break;
1393	case CmpInst::FCMP_OGE:
1394	CondCode = AArch64CC::GE;
1395	break;
1396	case CmpInst::FCMP_OLT:
1397	CondCode = AArch64CC::MI;
1398	break;
1399	case CmpInst::FCMP_OLE:
1400	CondCode = AArch64CC::LS;
1401	break;
1402	case CmpInst::FCMP_ONE:
1403	CondCode = AArch64CC::MI;
1404	CondCode2 = AArch64CC::GT;
1405	break;
1406	case CmpInst::FCMP_ORD:
1407	CondCode = AArch64CC::VC;
1408	break;
1409	case CmpInst::FCMP_UNO:
1410	CondCode = AArch64CC::VS;
1411	break;
1412	case CmpInst::FCMP_UEQ:
1413	CondCode = AArch64CC::EQ;
1414	CondCode2 = AArch64CC::VS;
1415	break;
1416	case CmpInst::FCMP_UGT:
1417	CondCode = AArch64CC::HI;
1418	break;
1419	case CmpInst::FCMP_UGE:
1420	CondCode = AArch64CC::PL;
1421	break;
1422	case CmpInst::FCMP_ULT:
1423	CondCode = AArch64CC::LT;
1424	break;
1425	case CmpInst::FCMP_ULE:
1426	CondCode = AArch64CC::LE;
1427	break;
1428	case CmpInst::FCMP_UNE:
1429	CondCode = AArch64CC::NE;
1430	break;
1431	}
1432	}
1433
1434	/// Convert an IR fp condition code to an AArch64 CC.
1435	/// This differs from changeFPCCToAArch64CC in that it returns cond codes that
1436	/// should be AND'ed instead of OR'ed.
1437	static void changeFPCCToANDAArch64CC(CmpInst::Predicate CC,
1438	AArch64CC::CondCode &CondCode,
1439	AArch64CC::CondCode &CondCode2) {
1440	CondCode2 = AArch64CC::AL;
1441	switch (CC) {
1442	default:
1443	changeFPCCToORAArch64CC(CC, CondCode, CondCode2);
1444	assert(CondCode2 == AArch64CC::AL);
1445	break;
1446	case CmpInst::FCMP_ONE:
1447	// (a one b)
1448	// == ((a olt b) \|\| (a ogt b))
1449	// == ((a ord b) && (a une b))
1450	CondCode = AArch64CC::VC;
1451	CondCode2 = AArch64CC::NE;
1452	break;
1453	case CmpInst::FCMP_UEQ:
1454	// (a ueq b)
1455	// == ((a uno b) \|\| (a oeq b))
1456	// == ((a ule b) && (a uge b))
1457	CondCode = AArch64CC::PL;
1458	CondCode2 = AArch64CC::LE;
1459	break;
1460	}
1461	}
1462
1463	/// Return a register which can be used as a bit to test in a TB(N)Z.
1464	static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
1465	MachineRegisterInfo &MRI) {
1466	assert(Reg.isValid() && "Expected valid register!");
1467	bool HasZext = false;
1468	while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) {
1469	unsigned Opc = MI->getOpcode();
1470
1471	if (!MI->getOperand(i: `0`).isReg() \|\|
1472	!MRI.hasOneNonDBGUse(RegNo: MI->getOperand(i: `0`).getReg()))
1473	break;
1474
1475	// (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
1476	//
1477	// (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number
1478	// on the truncated x is the same as the bit number on x.
1479	if (Opc == TargetOpcode::G_ANYEXT \|\| Opc == TargetOpcode::G_ZEXT \|\|
1480	Opc == TargetOpcode::G_TRUNC) {
1481	if (Opc == TargetOpcode::G_ZEXT)
1482	HasZext = true;
1483
1484	Register NextReg = MI->getOperand(i: `1`).getReg();
1485	// Did we find something worth folding?
1486	if (!NextReg.isValid() \|\| !MRI.hasOneNonDBGUse(RegNo: NextReg))
1487	break;
1488
1489	// NextReg is worth folding. Keep looking.
1490	Reg = NextReg;
1491	continue;
1492	}
1493
1494	// Attempt to find a suitable operation with a constant on one side.
1495	std::optional<uint64_t> C;
1496	Register TestReg;
1497	switch (Opc) {
1498	default:
1499	break;
1500	case TargetOpcode::G_AND:
1501	case TargetOpcode::G_XOR: {
1502	TestReg = MI->getOperand(i: `1`).getReg();
1503	Register ConstantReg = MI->getOperand(i: `2`).getReg();
1504	auto VRegAndVal = getIConstantVRegValWithLookThrough(VReg: ConstantReg, MRI);
1505	if (!VRegAndVal) {
1506	// AND commutes, check the other side for a constant.
1507	// FIXME: Can we canonicalize the constant so that it's always on the
1508	// same side at some point earlier?
1509	std::swap(a&: ConstantReg, b&: TestReg);
1510	VRegAndVal = getIConstantVRegValWithLookThrough(VReg: ConstantReg, MRI);
1511	}
1512	if (VRegAndVal) {
1513	if (HasZext)
1514	C = VRegAndVal ->Value.getZExtValue();
1515	else
1516	C = VRegAndVal ->Value.getSExtValue();
1517	}
1518	break;
1519	}
1520	case TargetOpcode::G_ASHR:
1521	case TargetOpcode::G_LSHR:
1522	case TargetOpcode::G_SHL: {
1523	TestReg = MI->getOperand(i: `1`).getReg();
1524	auto VRegAndVal =
1525	getIConstantVRegValWithLookThrough(VReg: MI->getOperand(i: `2`).getReg(), MRI);
1526	if (VRegAndVal)
1527	C = VRegAndVal ->Value.getSExtValue();
1528	break;
1529	}
1530	}
1531
1532	// Didn't find a constant or viable register. Bail out of the loop.
1533	if (!C \|\| !TestReg.isValid())
1534	break;
1535
1536	// We found a suitable instruction with a constant. Check to see if we can
1537	// walk through the instruction.
1538	Register NextReg;
1539	unsigned TestRegSize = MRI.getType(Reg: TestReg).getSizeInBits();
1540	switch (Opc) {
1541	default:
1542	break;
1543	case TargetOpcode::G_AND:
1544	// (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set.
1545	if ((*C >> Bit) & `1`)
1546	NextReg = TestReg;
1547	break;
1548	case TargetOpcode::G_SHL:
1549	// (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in
1550	// the type of the register.
1551	if (C <= Bit && (Bit - C) < TestRegSize) {
1552	NextReg = TestReg;
1553	Bit = Bit - *C;
1554	}
1555	break;
1556	case TargetOpcode::G_ASHR:
1557	// (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits
1558	// in x
1559	NextReg = TestReg;
1560	Bit = Bit + *C;
1561	if (Bit >= TestRegSize)
1562	Bit = TestRegSize - `1`;
1563	break;
1564	case TargetOpcode::G_LSHR:
1565	// (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x
1566	if ((Bit + *C) < TestRegSize) {
1567	NextReg = TestReg;
1568	Bit = Bit + *C;
1569	}
1570	break;
1571	case TargetOpcode::G_XOR:
1572	// We can walk through a G_XOR by inverting whether we use tbz/tbnz when
1573	// appropriate.
1574	//
1575	// e.g. If x' = xor x, c, and the b-th bit is set in c then
1576	//
1577	// tbz x', b -> tbnz x, b
1578	//
1579	// Because x' only has the b-th bit set if x does not.
1580	if ((*C >> Bit) & `1`)
1581	Invert = !Invert;
1582	NextReg = TestReg;
1583	break;
1584	}
1585
1586	// Check if we found anything worth folding.
1587	if (!NextReg.isValid())
1588	return Reg;
1589	Reg = NextReg;
1590	}
1591
1592	return Reg;
1593	}
1594
1595	MachineInstr *AArch64InstructionSelector::emitTestBit(
1596	Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB,
1597	MachineIRBuilder &MIB) const {
1598	assert(TestReg.isValid());
1599	assert(ProduceNonFlagSettingCondBr &&
1600	"Cannot emit TB(N)Z with speculation tracking!");
1601	MachineRegisterInfo &MRI = *MIB.getMRI();
1602
1603	// Attempt to optimize the test bit by walking over instructions.
1604	TestReg = getTestBitReg(Reg: TestReg, Bit, Invert&: IsNegative, MRI);
1605	LLT Ty = MRI.getType(Reg: TestReg);
1606	unsigned Size = Ty.getSizeInBits();
1607	assert(!Ty.isVector() && "Expected a scalar!");
1608	assert(Bit < `64` && "Bit is too large!");
1609
1610	// When the test register is a 64-bit register, we have to narrow to make
1611	// TBNZW work.
1612	bool UseWReg = Bit < `32`;
1613	unsigned NecessarySize = UseWReg ? `32` : `64`;
1614	if (Size != NecessarySize)
1615	TestReg = moveScalarRegClass(
1616	Reg: TestReg, RC: UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass,
1617	MIB);
1618
1619	static const unsigned OpcTable[`2`][`2`] = {{AArch64::TBZX, AArch64::TBNZX},
1620	{AArch64::TBZW, AArch64::TBNZW}};
1621	unsigned Opc = OpcTable[UseWReg][IsNegative];
1622	auto TestBitMI =
1623	MIB.buildInstr(Opcode: Opc).addReg(RegNo: TestReg).addImm(Val: Bit).addMBB(MBB: DstMBB);
1624	constrainSelectedInstRegOperands(I&: *TestBitMI, TII, TRI, RBI);
1625	return &*TestBitMI;
1626	}
1627
1628	bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
1629	MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB,
1630	MachineIRBuilder &MIB) const {
1631	assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?");
1632	// Given something like this:
1633	//
1634	// %x = ...Something...
1635	// %one = G_CONSTANT i64 1
1636	// %zero = G_CONSTANT i64 0
1637	// %and = G_AND %x, %one
1638	// %cmp = G_ICMP intpred(ne), %and, %zero
1639	// %cmp_trunc = G_TRUNC %cmp
1640	// G_BRCOND %cmp_trunc, %bb.3
1641	//
1642	// We want to try and fold the AND into the G_BRCOND and produce either a
1643	// TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)).
1644	//
1645	// In this case, we'd get
1646	//
1647	// TBNZ %x %bb.3
1648	//
1649
1650	// Check if the AND has a constant on its RHS which we can use as a mask.
1651	// If it's a power of 2, then it's the same as checking a specific bit.
1652	// (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)
1653	auto MaybeBit = getIConstantVRegValWithLookThrough(
1654	VReg: AndInst.getOperand(i: `2`).getReg(), MRI: *MIB.getMRI());
1655	if (!MaybeBit)
1656	return false;
1657
1658	int32_t Bit = MaybeBit ->Value.exactLogBase2();
1659	if (Bit < `0`)
1660	return false;
1661
1662	Register TestReg = AndInst.getOperand(i: `1`).getReg();
1663
1664	// Emit a TB(N)Z.
1665	emitTestBit(TestReg, Bit, IsNegative: Invert, DstMBB, MIB);
1666	return true;
1667	}
1668
1669	MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg,
1670	bool IsNegative,
1671	MachineBasicBlock *DestMBB,
1672	MachineIRBuilder &MIB) const {
1673	assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!");
1674	MachineRegisterInfo &MRI = *MIB.getMRI();
1675	assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() ==
1676	AArch64::GPRRegBankID &&
1677	"Expected GPRs only?");
1678	auto Ty = MRI.getType(Reg: CompareReg);
1679	unsigned Width = Ty.getSizeInBits();
1680	assert(!Ty.isVector() && "Expected scalar only?");
1681	assert(Width <= `64` && "Expected width to be at most 64?");
1682	static const unsigned OpcTable[`2`][`2`] = {{AArch64::CBZW, AArch64::CBZX},
1683	{AArch64::CBNZW, AArch64::CBNZX}};
1684	unsigned Opc = OpcTable[IsNegative][Width == `64`];
1685	auto BranchMI = MIB.buildInstr(Opc, DstOps: {}, SrcOps: {CompareReg}).addMBB(MBB: DestMBB);
1686	constrainSelectedInstRegOperands(I&: *BranchMI, TII, TRI, RBI);
1687	return &*BranchMI;
1688	}
1689
1690	bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
1691	MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const {
1692	assert(FCmp.getOpcode() == TargetOpcode::G_FCMP);
1693	assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1694	// Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
1695	// totally clean. Some of them require two branches to implement.
1696	auto Pred = (CmpInst::Predicate)FCmp.getOperand(i: `1`).getPredicate();
1697	emitFPCompare(LHS: FCmp.getOperand(i: `2`).getReg(), RHS: FCmp.getOperand(i: `3`).getReg(), MIRBuilder&: MIB,
1698	Pred);
1699	AArch64CC::CondCode CC1, CC2;
1700	changeFCMPPredToAArch64CC(P: static_cast<CmpInst::Predicate>(Pred), CondCode&: CC1, CondCode2&: CC2);
1701	MachineBasicBlock *DestMBB = I.getOperand(i: `1`).getMBB();
1702	MIB.buildInstr(Opc: AArch64::Bcc, DstOps: {}, SrcOps: {}).addImm(Val: CC1).addMBB(MBB: DestMBB);
1703	if (CC2 != AArch64CC::AL)
1704	MIB.buildInstr(Opc: AArch64::Bcc, DstOps: {}, SrcOps: {}).addImm(Val: CC2).addMBB(MBB: DestMBB);
1705	I.eraseFromParent();
1706	return true;
1707	}
1708
1709	bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
1710	MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1711	assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
1712	assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1713	// Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z.
1714	//
1715	// Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1716	// instructions will not be produced, as they are conditional branch
1717	// instructions that do not set flags.
1718	if (!ProduceNonFlagSettingCondBr)
1719	return false;
1720
1721	MachineRegisterInfo &MRI = *MIB.getMRI();
1722	MachineBasicBlock *DestMBB = I.getOperand(i: `1`).getMBB();
1723	auto Pred =
1724	static_cast<CmpInst::Predicate>(ICmp.getOperand(i: `1`).getPredicate());
1725	Register LHS = ICmp.getOperand(i: `2`).getReg();
1726	Register RHS = ICmp.getOperand(i: `3`).getReg();
1727
1728	// We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that.
1729	auto VRegAndVal = getIConstantVRegValWithLookThrough(VReg: RHS, MRI);
1730	MachineInstr *AndInst = getOpcodeDef(Opcode: TargetOpcode::G_AND, Reg: LHS, MRI);
1731
1732	// When we can emit a TB(N)Z, prefer that.
1733	//
1734	// Handle non-commutative condition codes first.
1735	// Note that we don't want to do this when we have a G_AND because it can
1736	// become a tst. The tst will make the test bit in the TB(N)Z redundant.
1737	if (VRegAndVal && !AndInst) {
1738	int64_t C = VRegAndVal ->Value.getSExtValue();
1739
1740	// When we have a greater-than comparison, we can just test if the msb is
1741	// zero.
1742	if (C == -`1` && Pred == CmpInst::ICMP_SGT) {
1743	uint64_t Bit = MRI.getType(Reg: LHS).getSizeInBits() - `1`;
1744	emitTestBit(TestReg: LHS, Bit, /IsNegative = / false, DstMBB: DestMBB, MIB);
1745	I.eraseFromParent();
1746	return true;
1747	}
1748
1749	// When we have a less than comparison, we can just test if the msb is not
1750	// zero.
1751	if (C == `0` && Pred == CmpInst::ICMP_SLT) {
1752	uint64_t Bit = MRI.getType(Reg: LHS).getSizeInBits() - `1`;
1753	emitTestBit(TestReg: LHS, Bit, /IsNegative = / true, DstMBB: DestMBB, MIB);
1754	I.eraseFromParent();
1755	return true;
1756	}
1757
1758	// Inversely, if we have a signed greater-than-or-equal comparison to zero,
1759	// we can test if the msb is zero.
1760	if (C == `0` && Pred == CmpInst::ICMP_SGE) {
1761	uint64_t Bit = MRI.getType(Reg: LHS).getSizeInBits() - `1`;
1762	emitTestBit(TestReg: LHS, Bit, /IsNegative = / false, DstMBB: DestMBB, MIB);
1763	I.eraseFromParent();
1764	return true;
1765	}
1766	}
1767
1768	// Attempt to handle commutative condition codes. Right now, that's only
1769	// eq/ne.
1770	if (ICmpInst::isEquality(P: Pred)) {
1771	if (!VRegAndVal) {
1772	std::swap(a&: RHS, b&: LHS);
1773	VRegAndVal = getIConstantVRegValWithLookThrough(VReg: RHS, MRI);
1774	AndInst = getOpcodeDef(Opcode: TargetOpcode::G_AND, Reg: LHS, MRI);
1775	}
1776
1777	if (VRegAndVal && VRegAndVal ->Value == `0`) {
1778	// If there's a G_AND feeding into this branch, try to fold it away by
1779	// emitting a TB(N)Z instead.
1780	//
1781	// Note: If we have LT, then it is* possible to fold, but it wouldn't be*
1782	// beneficial. When we have an AND and LT, we need a TST/ANDS, so folding
1783	// would be redundant.
1784	if (AndInst &&
1785	tryOptAndIntoCompareBranch(
1786	AndInst&: AndInst, /Invert = /* Pred == CmpInst::ICMP_NE, DstMBB: DestMBB, MIB)) {
1787	I.eraseFromParent();
1788	return true;
1789	}
1790
1791	// Otherwise, try to emit a CB(N)Z instead.
1792	auto LHSTy = MRI.getType(Reg: LHS);
1793	if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= `64`) {
1794	emitCBZ(CompareReg: LHS, /IsNegative = / Pred == CmpInst::ICMP_NE, DestMBB, MIB);
1795	I.eraseFromParent();
1796	return true;
1797	}
1798	}
1799	}
1800
1801	return false;
1802	}
1803
1804	bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
1805	MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1806	assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
1807	assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1808	if (tryOptCompareBranchFedByICmp(I, ICmp, MIB))
1809	return true;
1810
1811	// Couldn't optimize. Emit a compare + a Bcc.
1812	MachineBasicBlock *DestMBB = I.getOperand(i: `1`).getMBB();
1813	auto PredOp = ICmp.getOperand(i: `1`);
1814	emitIntegerCompare(LHS&: ICmp.getOperand(i: `2`), RHS&: ICmp.getOperand(i: `3`), Predicate&: PredOp, MIRBuilder&: MIB);
1815	const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(
1816	P: static_cast<CmpInst::Predicate>(PredOp.getPredicate()));
1817	MIB.buildInstr(Opc: AArch64::Bcc, DstOps: {}, SrcOps: {}).addImm(Val: CC).addMBB(MBB: DestMBB);
1818	I.eraseFromParent();
1819	return true;
1820	}
1821
1822	bool AArch64InstructionSelector::selectCompareBranch(
1823	MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) {
1824	Register CondReg = I.getOperand(i: `0`).getReg();
1825	MachineInstr *CCMI = MRI.getVRegDef(Reg: CondReg);
1826	// Try to select the G_BRCOND using whatever is feeding the condition if
1827	// possible.
1828	unsigned CCMIOpc = CCMI->getOpcode();
1829	if (CCMIOpc == TargetOpcode::G_FCMP)
1830	return selectCompareBranchFedByFCmp(I, FCmp&: *CCMI, MIB);
1831	if (CCMIOpc == TargetOpcode::G_ICMP)
1832	return selectCompareBranchFedByICmp(I, ICmp&: *CCMI, MIB);
1833
1834	// Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1835	// instructions will not be produced, as they are conditional branch
1836	// instructions that do not set flags.
1837	if (ProduceNonFlagSettingCondBr) {
1838	emitTestBit(TestReg: CondReg, /Bit = / `0`, /IsNegative = / true,
1839	DstMBB: I.getOperand(i: `1`).getMBB(), MIB);
1840	I.eraseFromParent();
1841	return true;
1842	}
1843
1844	// Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead.
1845	auto TstMI =
1846	MIB.buildInstr(Opc: AArch64::ANDSWri, DstOps: {LLT::scalar(SizeInBits: `32`)}, SrcOps: {CondReg}).addImm(Val: `1`);
1847	constrainSelectedInstRegOperands(I&: *TstMI, TII, TRI, RBI);
1848	auto Bcc = MIB.buildInstr(Opcode: AArch64::Bcc)
1849	.addImm(Val: AArch64CC::NE)
1850	.addMBB(MBB: I.getOperand(i: `1`).getMBB());
1851	I.eraseFromParent();
1852	return constrainSelectedInstRegOperands(I&: *Bcc, TII, TRI, RBI);
1853	}
1854
1855	/// Returns the element immediate value of a vector shift operand if found.
1856	/// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR.
1857	static std::optional<int64_t> getVectorShiftImm(Register Reg,
1858	MachineRegisterInfo &MRI) {
1859	assert(MRI.getType(Reg).isVector() && "Expected a vector shift operand");
1860	MachineInstr *OpMI = MRI.getVRegDef(Reg);
1861	return getAArch64VectorSplatScalar(MI: *OpMI, MRI);
1862	}
1863
1864	/// Matches and returns the shift immediate value for a SHL instruction given
1865	/// a shift operand.
1866	static std::optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg,
1867	MachineRegisterInfo &MRI) {
1868	std::optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI);
1869	if (!ShiftImm)
1870	return std::nullopt;
1871	// Check the immediate is in range for a SHL.
1872	int64_t Imm = *ShiftImm;
1873	if (Imm < `0`)
1874	return std::nullopt;
1875	switch (SrcTy.getElementType().getSizeInBits()) {
1876	default:
1877	LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift");
1878	return std::nullopt;
1879	case `8`:
1880	if (Imm > `7`)
1881	return std::nullopt;
1882	break;
1883	case `16`:
1884	if (Imm > `15`)
1885	return std::nullopt;
1886	break;
1887	case `32`:
1888	if (Imm > `31`)
1889	return std::nullopt;
1890	break;
1891	case `64`:
1892	if (Imm > `63`)
1893	return std::nullopt;
1894	break;
1895	}
1896	return Imm;
1897	}
1898
1899	bool AArch64InstructionSelector::selectVectorSHL(MachineInstr &I,
1900	MachineRegisterInfo &MRI) {
1901	assert(I.getOpcode() == TargetOpcode::G_SHL);
1902	Register DstReg = I.getOperand(i: `0`).getReg();
1903	const LLT Ty = MRI.getType(Reg: DstReg);
1904	Register Src1Reg = I.getOperand(i: `1`).getReg();
1905	Register Src2Reg = I.getOperand(i: `2`).getReg();
1906
1907	if (!Ty.isVector())
1908	return false;
1909
1910	// Check if we have a vector of constants on RHS that we can select as the
1911	// immediate form.
1912	std::optional<int64_t> ImmVal = getVectorSHLImm(SrcTy: Ty, Reg: Src2Reg, MRI);
1913
1914	unsigned Opc = `0`;
1915	if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarSizeInBits: `64`)) {
1916	Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
1917	} else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarSizeInBits: `32`)) {
1918	Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
1919	} else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarSizeInBits: `32`)) {
1920	Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
1921	} else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarSizeInBits: `16`)) {
1922	Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
1923	} else if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarSizeInBits: `16`)) {
1924	Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
1925	} else if (Ty == LLT::fixed_vector(NumElements: `16`, ScalarSizeInBits: `8`)) {
1926	Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
1927	} else if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarSizeInBits: `8`)) {
1928	Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
1929	} else {
1930	LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
1931	return false;
1932	}
1933
1934	auto Shl = MIB.buildInstr(Opc, DstOps: {DstReg}, SrcOps: {Src1Reg});
1935	if (ImmVal)
1936	Shl.addImm(Val: *ImmVal);
1937	else
1938	Shl.addUse(RegNo: Src2Reg);
1939	constrainSelectedInstRegOperands(I&: *Shl, TII, TRI, RBI);
1940	I.eraseFromParent();
1941	return true;
1942	}
1943
1944	bool AArch64InstructionSelector::selectVectorAshrLshr(
1945	MachineInstr &I, MachineRegisterInfo &MRI) {
1946	assert(I.getOpcode() == TargetOpcode::G_ASHR \|\|
1947	I.getOpcode() == TargetOpcode::G_LSHR);
1948	Register DstReg = I.getOperand(i: `0`).getReg();
1949	const LLT Ty = MRI.getType(Reg: DstReg);
1950	Register Src1Reg = I.getOperand(i: `1`).getReg();
1951	Register Src2Reg = I.getOperand(i: `2`).getReg();
1952
1953	if (!Ty.isVector())
1954	return false;
1955
1956	bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR;
1957
1958	// We expect the immediate case to be lowered in the PostLegalCombiner to
1959	// AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents.
1960
1961	// There is not a shift right register instruction, but the shift left
1962	// register instruction takes a signed value, where negative numbers specify a
1963	// right shift.
1964
1965	unsigned Opc = `0`;
1966	unsigned NegOpc = `0`;
1967	const TargetRegisterClass *RC =
1968	getRegClassForTypeOnBank(Ty, RB: RBI.getRegBank(ID: AArch64::FPRRegBankID));
1969	if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarSizeInBits: `64`)) {
1970	Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
1971	NegOpc = AArch64::NEGv2i64;
1972	} else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarSizeInBits: `32`)) {
1973	Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
1974	NegOpc = AArch64::NEGv4i32;
1975	} else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarSizeInBits: `32`)) {
1976	Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
1977	NegOpc = AArch64::NEGv2i32;
1978	} else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarSizeInBits: `16`)) {
1979	Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
1980	NegOpc = AArch64::NEGv4i16;
1981	} else if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarSizeInBits: `16`)) {
1982	Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
1983	NegOpc = AArch64::NEGv8i16;
1984	} else if (Ty == LLT::fixed_vector(NumElements: `16`, ScalarSizeInBits: `8`)) {
1985	Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
1986	NegOpc = AArch64::NEGv16i8;
1987	} else if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarSizeInBits: `8`)) {
1988	Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
1989	NegOpc = AArch64::NEGv8i8;
1990	} else {
1991	LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
1992	return false;
1993	}
1994
1995	auto Neg = MIB.buildInstr(Opc: NegOpc, DstOps: {RC}, SrcOps: {Src2Reg});
1996	constrainSelectedInstRegOperands(I&: *Neg, TII, TRI, RBI);
1997	auto SShl = MIB.buildInstr(Opc, DstOps: {DstReg}, SrcOps: {Src1Reg, Neg});
1998	constrainSelectedInstRegOperands(I&: *SShl, TII, TRI, RBI);
1999	I.eraseFromParent();
2000	return true;
2001	}
2002
2003	bool AArch64InstructionSelector::selectVaStartAAPCS(
2004	MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
2005
2006	if (STI.isCallingConvWin64(CC: MF.getFunction().getCallingConv(),
2007	IsVarArg: MF.getFunction().isVarArg()))
2008	return false;
2009
2010	// The layout of the va_list struct is specified in the AArch64 Procedure Call
2011	// Standard, section 10.1.5.
2012
2013	const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
2014	const unsigned PtrSize = STI.isTargetILP32() ? `4` : `8`;
2015	const auto *PtrRegClass =
2016	STI.isTargetILP32() ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
2017
2018	const MCInstrDesc &MCIDAddAddr =
2019	TII.get(Opcode: STI.isTargetILP32() ? AArch64::ADDWri : AArch64::ADDXri);
2020	const MCInstrDesc &MCIDStoreAddr =
2021	TII.get(Opcode: STI.isTargetILP32() ? AArch64::STRWui : AArch64::STRXui);
2022
2023	/*
2024	* typedef struct va_list {
2025	* void * stack; // next stack param
2026	* void * gr_top; // end of GP arg reg save area
2027	* void * vr_top; // end of FP/SIMD arg reg save area
2028	* int gr_offs; // offset from gr_top to next GP register arg
2029	* int vr_offs; // offset from vr_top to next FP/SIMD register arg
2030	* } va_list;
2031	*/
2032	const auto VAList = I.getOperand(i: `0`).getReg();
2033
2034	// Our current offset in bytes from the va_list struct (VAList).
2035	unsigned OffsetBytes = `0`;
2036
2037	// Helper function to store (FrameIndex + Imm) to VAList at offset OffsetBytes
2038	// and increment OffsetBytes by PtrSize.
2039	const auto PushAddress = [&](const int FrameIndex, const int64_t Imm) {
2040	const Register Top = MRI.createVirtualRegister(RegClass: PtrRegClass);
2041	auto MIB = BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), MCID: MCIDAddAddr)
2042	.addDef(RegNo: Top)
2043	.addFrameIndex(Idx: FrameIndex)
2044	.addImm(Val: Imm)
2045	.addImm(Val: `0`);
2046	constrainSelectedInstRegOperands(I&: *MIB, TII, TRI, RBI);
2047
2048	const auto MMO = I.memoperands_begin();
2049	MIB = BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), MCID: MCIDStoreAddr)
2050	.addUse(RegNo: Top)
2051	.addUse(RegNo: VAList)
2052	.addImm(Val: OffsetBytes / PtrSize)
2053	.addMemOperand(MMO: MF.getMachineMemOperand(
2054	PtrInfo: MMO->getPointerInfo().getWithOffset(O: OffsetBytes),
2055	F: MachineMemOperand::MOStore, Size: PtrSize, BaseAlignment: MMO->getBaseAlign()));
2056	constrainSelectedInstRegOperands(I&: *MIB, TII, TRI, RBI);
2057
2058	OffsetBytes += PtrSize;
2059	};
2060
2061	// void stack at offset 0*
2062	PushAddress (FuncInfo->getVarArgsStackIndex(), `0`);
2063
2064	// void gr_top at offset 8 (4 on ILP32)*
2065	const unsigned GPRSize = FuncInfo->getVarArgsGPRSize();
2066	PushAddress (FuncInfo->getVarArgsGPRIndex(), GPRSize);
2067
2068	// void vr_top at offset 16 (8 on ILP32)*
2069	const unsigned FPRSize = FuncInfo->getVarArgsFPRSize();
2070	PushAddress (FuncInfo->getVarArgsFPRIndex(), FPRSize);
2071
2072	// Helper function to store a 4-byte integer constant to VAList at offset
2073	// OffsetBytes, and increment OffsetBytes by 4.
2074	const auto PushIntConstant = [&](const int32_t Value) {
2075	constexpr int IntSize = `4`;
2076	const Register Temp = MRI.createVirtualRegister(RegClass: &AArch64::GPR32RegClass);
2077	auto MIB =
2078	BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: AArch64::MOVi32imm))
2079	.addDef(RegNo: Temp)
2080	.addImm(Val: Value);
2081	constrainSelectedInstRegOperands(I&: *MIB, TII, TRI, RBI);
2082
2083	const auto MMO = I.memoperands_begin();
2084	MIB = BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: AArch64::STRWui))
2085	.addUse(RegNo: Temp)
2086	.addUse(RegNo: VAList)
2087	.addImm(Val: OffsetBytes / IntSize)
2088	.addMemOperand(MMO: MF.getMachineMemOperand(
2089	PtrInfo: MMO->getPointerInfo().getWithOffset(O: OffsetBytes),
2090	F: MachineMemOperand::MOStore, Size: IntSize, BaseAlignment: MMO->getBaseAlign()));
2091	constrainSelectedInstRegOperands(I&: *MIB, TII, TRI, RBI);
2092	OffsetBytes += IntSize;
2093	};
2094
2095	// int gr_offs at offset 24 (12 on ILP32)
2096	PushIntConstant (-static_cast<int32_t>(GPRSize));
2097
2098	// int vr_offs at offset 28 (16 on ILP32)
2099	PushIntConstant (-static_cast<int32_t>(FPRSize));
2100
2101	assert(OffsetBytes == (STI.isTargetILP32() ? `20` : `32`) && "Unexpected offset");
2102
2103	I.eraseFromParent();
2104	return true;
2105	}
2106
2107	bool AArch64InstructionSelector::selectVaStartDarwin(
2108	MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
2109	AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
2110	Register ListReg = I.getOperand(i: `0`).getReg();
2111
2112	Register ArgsAddrReg = MRI.createVirtualRegister(RegClass: &AArch64::GPR64RegClass);
2113
2114	int FrameIdx = FuncInfo->getVarArgsStackIndex();
2115	if (MF.getSubtarget<AArch64Subtarget>().isCallingConvWin64(
2116	CC: MF.getFunction().getCallingConv(), IsVarArg: MF.getFunction().isVarArg())) {
2117	FrameIdx = FuncInfo->getVarArgsGPRSize() > `0`
2118	? FuncInfo->getVarArgsGPRIndex()
2119	: FuncInfo->getVarArgsStackIndex();
2120	}
2121
2122	auto MIB =
2123	BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: AArch64::ADDXri))
2124	.addDef(RegNo: ArgsAddrReg)
2125	.addFrameIndex(Idx: FrameIdx)
2126	.addImm(Val: `0`)
2127	.addImm(Val: `0`);
2128
2129	constrainSelectedInstRegOperands(I&: *MIB, TII, TRI, RBI);
2130
2131	MIB = BuildMI(BB&: *I.getParent(), I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: AArch64::STRXui))
2132	.addUse(RegNo: ArgsAddrReg)
2133	.addUse(RegNo: ListReg)
2134	.addImm(Val: `0`)
2135	.addMemOperand(MMO: *I.memoperands_begin());
2136
2137	constrainSelectedInstRegOperands(I&: *MIB, TII, TRI, RBI);
2138	I.eraseFromParent();
2139	return true;
2140	}
2141
2142	void AArch64InstructionSelector::materializeLargeCMVal(
2143	MachineInstr &I, const Value V, unsigned* OpFlags) {
2144	MachineBasicBlock &MBB = *I.getParent();
2145	MachineFunction &MF = *MBB.getParent();
2146	MachineRegisterInfo &MRI = MF.getRegInfo();
2147
2148	auto MovZ = MIB.buildInstr(Opc: AArch64::MOVZXi, DstOps: {&AArch64::GPR64RegClass}, SrcOps: {});
2149	MovZ ->addOperand(MF, Op: I.getOperand(i: `1`));
2150	MovZ ->getOperand(i: `1`).setTargetFlags(OpFlags \| AArch64II::MO_G0 \|
2151	AArch64II::MO_NC);
2152	MovZ ->addOperand(MF, Op: MachineOperand::CreateImm(Val: `0`));
2153	constrainSelectedInstRegOperands(I&: *MovZ, TII, TRI, RBI);
2154
2155	auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
2156	Register ForceDstReg) {
2157	Register DstReg = ForceDstReg
2158	? ForceDstReg
2159	: MRI.createVirtualRegister(RegClass: &AArch64::GPR64RegClass);
2160	auto MovI = MIB.buildInstr(Opcode: AArch64::MOVKXi).addDef(RegNo: DstReg).addUse(RegNo: SrcReg);
2161	if (auto *GV = dyn_cast<GlobalValue>(Val: V)) {
2162	MovI ->addOperand(MF, Op: MachineOperand::CreateGA(
2163	GV, Offset: MovZ ->getOperand(i: `1`).getOffset(), TargetFlags: Flags));
2164	} else {
2165	MovI ->addOperand(
2166	MF, Op: MachineOperand::CreateBA(BA: cast<BlockAddress>(Val: V),
2167	Offset: MovZ ->getOperand(i: `1`).getOffset(), TargetFlags: Flags));
2168	}
2169	MovI ->addOperand(MF, Op: MachineOperand::CreateImm(Val: Offset));
2170	constrainSelectedInstRegOperands(I&: *MovI, TII, TRI, RBI);
2171	return DstReg;
2172	};
2173	Register DstReg = BuildMovK (MovZ.getReg(Idx: `0`),
2174	AArch64II::MO_G1 \| AArch64II::MO_NC, `16`, `0`);
2175	DstReg = BuildMovK (DstReg, AArch64II::MO_G2 \| AArch64II::MO_NC, `32`, `0`);
2176	BuildMovK (DstReg, AArch64II::MO_G3, `48`, I.getOperand(i: `0`).getReg());
2177	}
2178
2179	bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
2180	MachineBasicBlock &MBB = *I.getParent();
2181	MachineFunction &MF = *MBB.getParent();
2182	MachineRegisterInfo &MRI = MF.getRegInfo();
2183
2184	switch (I.getOpcode()) {
2185	case TargetOpcode::G_STORE: {
2186	bool Changed = contractCrossBankCopyIntoStore(I, MRI);
2187	MachineOperand &SrcOp = I.getOperand(i: `0`);
2188	if (MRI.getType(Reg: SrcOp.getReg()).isPointer()) {
2189	// Allow matching with imported patterns for stores of pointers. Unlike
2190	// G_LOAD/G_PTR_ADD, we may not have selected all users. So, emit a copy
2191	// and constrain.
2192	auto Copy = MIB.buildCopy(Res: LLT::scalar(SizeInBits: `64`), Op: SrcOp);
2193	Register NewSrc = Copy.getReg(Idx: `0`);
2194	SrcOp.setReg(NewSrc);
2195	RBI.constrainGenericRegister(Reg: NewSrc, RC: AArch64::GPR64RegClass, MRI);
2196	Changed = true;
2197	}
2198	return Changed;
2199	}
2200	case TargetOpcode::G_PTR_ADD: {
2201	// If Checked Pointer Arithmetic (FEAT_CPA) is present, preserve the pointer
2202	// arithmetic semantics instead of falling back to regular arithmetic.
2203	const auto &TL = STI.getTargetLowering();
2204	if (TL->shouldPreservePtrArith(F: MF.getFunction(), PtrVT: EVT ()))
2205	return false;
2206	return convertPtrAddToAdd(I, MRI);
2207	}
2208	case TargetOpcode::G_LOAD: {
2209	// For scalar loads of pointers, we try to convert the dest type from p0
2210	// to s64 so that our imported patterns can match. Like with the G_PTR_ADD
2211	// conversion, this should be ok because all users should have been
2212	// selected already, so the type doesn't matter for them.
2213	Register DstReg = I.getOperand(i: `0`).getReg();
2214	const LLT DstTy = MRI.getType(Reg: DstReg);
2215	if (!DstTy.isPointer())
2216	return false;
2217	MRI.setType(VReg: DstReg, Ty: LLT::scalar(SizeInBits: `64`));
2218	return true;
2219	}
2220	case AArch64::G_DUP: {
2221	// Convert the type from p0 to s64 to help selection.
2222	LLT DstTy = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
2223	if (!DstTy.isPointerVector())
2224	return false;
2225	auto NewSrc = MIB.buildCopy(Res: LLT::scalar(SizeInBits: `64`), Op: I.getOperand(i: `1`).getReg());
2226	MRI.setType(VReg: I.getOperand(i: `0`).getReg(),
2227	Ty: DstTy.changeElementType(NewEltTy: LLT::scalar(SizeInBits: `64`)));
2228	MRI.setRegClass(Reg: NewSrc.getReg(Idx: `0`), RC: &AArch64::GPR64RegClass);
2229	I.getOperand(i: `1`).setReg(NewSrc.getReg(Idx: `0`));
2230	return true;
2231	}
2232	case AArch64::G_INSERT_VECTOR_ELT: {
2233	// Convert the type from p0 to s64 to help selection.
2234	LLT DstTy = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
2235	LLT SrcVecTy = MRI.getType(Reg: I.getOperand(i: `1`).getReg());
2236	if (!SrcVecTy.isPointerVector())
2237	return false;
2238	auto NewSrc = MIB.buildCopy(Res: LLT::scalar(SizeInBits: `64`), Op: I.getOperand(i: `2`).getReg());
2239	MRI.setType(VReg: I.getOperand(i: `1`).getReg(),
2240	Ty: DstTy.changeElementType(NewEltTy: LLT::scalar(SizeInBits: `64`)));
2241	MRI.setType(VReg: I.getOperand(i: `0`).getReg(),
2242	Ty: DstTy.changeElementType(NewEltTy: LLT::scalar(SizeInBits: `64`)));
2243	MRI.setRegClass(Reg: NewSrc.getReg(Idx: `0`), RC: &AArch64::GPR64RegClass);
2244	I.getOperand(i: `2`).setReg(NewSrc.getReg(Idx: `0`));
2245	return true;
2246	}
2247	case TargetOpcode::G_UITOFP:
2248	case TargetOpcode::G_SITOFP: {
2249	// If both source and destination regbanks are FPR, then convert the opcode
2250	// to G_SITOF so that the importer can select it to an fpr variant.
2251	// Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank
2252	// copy.
2253	Register SrcReg = I.getOperand(i: `1`).getReg();
2254	LLT SrcTy = MRI.getType(Reg: SrcReg);
2255	LLT DstTy = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
2256	if (SrcTy.isVector() \|\| SrcTy.getSizeInBits() != DstTy.getSizeInBits())
2257	return false;
2258
2259	if (RBI.getRegBank(Reg: SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) {
2260	if (I.getOpcode() == TargetOpcode::G_SITOFP)
2261	I.setDesc(TII.get(Opcode: AArch64::G_SITOF));
2262	else
2263	I.setDesc(TII.get(Opcode: AArch64::G_UITOF));
2264	return true;
2265	}
2266	return false;
2267	}
2268	default:
2269	return false;
2270	}
2271	}
2272
2273	/// This lowering tries to look for G_PTR_ADD instructions and then converts
2274	/// them to a standard G_ADD with a COPY on the source.
2275	///
2276	/// The motivation behind this is to expose the add semantics to the imported
2277	/// tablegen patterns. We shouldn't need to check for uses being loads/stores,
2278	/// because the selector works bottom up, uses before defs. By the time we
2279	/// end up trying to select a G_PTR_ADD, we should have already attempted to
2280	/// fold this into addressing modes and were therefore unsuccessful.
2281	bool AArch64InstructionSelector::convertPtrAddToAdd(
2282	MachineInstr &I, MachineRegisterInfo &MRI) {
2283	assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
2284	Register DstReg = I.getOperand(i: `0`).getReg();
2285	Register AddOp1Reg = I.getOperand(i: `1`).getReg();
2286	const LLT PtrTy = MRI.getType(Reg: DstReg);
2287	if (PtrTy.getAddressSpace() != `0`)
2288	return false;
2289
2290	const LLT CastPtrTy =
2291	PtrTy.isVector() ? LLT::fixed_vector(NumElements: `2`, ScalarSizeInBits: `64`) : LLT::scalar(SizeInBits: `64`);
2292	auto PtrToInt = MIB.buildPtrToInt(Dst: CastPtrTy, Src: AddOp1Reg);
2293	// Set regbanks on the registers.
2294	if (PtrTy.isVector())
2295	MRI.setRegBank(Reg: PtrToInt.getReg(Idx: `0`), RegBank: RBI.getRegBank(ID: AArch64::FPRRegBankID));
2296	else
2297	MRI.setRegBank(Reg: PtrToInt.getReg(Idx: `0`), RegBank: RBI.getRegBank(ID: AArch64::GPRRegBankID));
2298
2299	// Now turn the %dst(p0) = G_PTR_ADD %base, off into:
2300	// %dst(intty) = G_ADD %intbase, off
2301	I.setDesc(TII.get(Opcode: TargetOpcode::G_ADD));
2302	MRI.setType(VReg: DstReg, Ty: CastPtrTy);
2303	I.getOperand(i: `1`).setReg(PtrToInt.getReg(Idx: `0`));
2304	if (!select(I&: *PtrToInt)) {
2305	LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd");
2306	return false;
2307	}
2308
2309	// Also take the opportunity here to try to do some optimization.
2310	// Try to convert this into a G_SUB if the offset is a 0-x negate idiom.
2311	Register NegatedReg;
2312	if (!mi_match(R: I.getOperand(i: `2`).getReg(), MRI, P: m_Neg(Src: m_Reg(R&: NegatedReg))))
2313	return true;
2314	I.getOperand(i: `2`).setReg(NegatedReg);
2315	I.setDesc(TII.get(Opcode: TargetOpcode::G_SUB));
2316	return true;
2317	}
2318
2319	bool AArch64InstructionSelector::earlySelectSHL(MachineInstr &I,
2320	MachineRegisterInfo &MRI) {
2321	// We try to match the immediate variant of LSL, which is actually an alias
2322	// for a special case of UBFM. Otherwise, we fall back to the imported
2323	// selector which will match the register variant.
2324	assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op");
2325	const auto &MO = I.getOperand(i: `2`);
2326	auto VRegAndVal = getIConstantVRegVal(VReg: MO.getReg(), MRI);
2327	if (!VRegAndVal)
2328	return false;
2329
2330	const LLT DstTy = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
2331	if (DstTy.isVector())
2332	return false;
2333	bool Is64Bit = DstTy.getSizeInBits() == `64`;
2334	auto Imm1Fn = Is64Bit ? selectShiftA_64(Root: MO) : selectShiftA_32(Root: MO);
2335	auto Imm2Fn = Is64Bit ? selectShiftB_64(Root: MO) : selectShiftB_32(Root: MO);
2336
2337	if (!Imm1Fn \|\| !Imm2Fn)
2338	return false;
2339
2340	auto NewI =
2341	MIB.buildInstr(Opc: Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
2342	DstOps: {I.getOperand(i: `0`).getReg()}, SrcOps: {I.getOperand(i: `1`).getReg()});
2343
2344	for (auto &RenderFn : *Imm1Fn)
2345	RenderFn (NewI);
2346	for (auto &RenderFn : *Imm2Fn)
2347	RenderFn (NewI);
2348
2349	I.eraseFromParent();
2350	return constrainSelectedInstRegOperands(I&: *NewI, TII, TRI, RBI);
2351	}
2352
2353	bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
2354	MachineInstr &I, MachineRegisterInfo &MRI) {
2355	assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE");
2356	// If we're storing a scalar, it doesn't matter what register bank that
2357	// scalar is on. All that matters is the size.
2358	//
2359	// So, if we see something like this (with a 32-bit scalar as an example):
2360	//
2361	// %x:gpr(s32) = ... something ...
2362	// %y:fpr(s32) = COPY %x:gpr(s32)
2363	// G_STORE %y:fpr(s32)
2364	//
2365	// We can fix this up into something like this:
2366	//
2367	// G_STORE %x:gpr(s32)
2368	//
2369	// And then continue the selection process normally.
2370	Register DefDstReg = getSrcRegIgnoringCopies(Reg: I.getOperand(i: `0`).getReg(), MRI);
2371	if (!DefDstReg.isValid())
2372	return false;
2373	LLT DefDstTy = MRI.getType(Reg: DefDstReg);
2374	Register StoreSrcReg = I.getOperand(i: `0`).getReg();
2375	LLT StoreSrcTy = MRI.getType(Reg: StoreSrcReg);
2376
2377	// If we get something strange like a physical register, then we shouldn't
2378	// go any further.
2379	if (!DefDstTy.isValid())
2380	return false;
2381
2382	// Are the source and dst types the same size?
2383	if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
2384	return false;
2385
2386	if (RBI.getRegBank(Reg: StoreSrcReg, MRI, TRI) ==
2387	RBI.getRegBank(Reg: DefDstReg, MRI, TRI))
2388	return false;
2389
2390	// We have a cross-bank copy, which is entering a store. Let's fold it.
2391	I.getOperand(i: `0`).setReg(DefDstReg);
2392	return true;
2393	}
2394
2395	bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
2396	assert(I.getParent() && "Instruction should be in a basic block!");
2397	assert(I.getParent()->getParent() && "Instruction should be in a function!");
2398
2399	MachineBasicBlock &MBB = *I.getParent();
2400	MachineFunction &MF = *MBB.getParent();
2401	MachineRegisterInfo &MRI = MF.getRegInfo();
2402
2403	switch (I.getOpcode()) {
2404	case AArch64::G_DUP: {
2405	// Before selecting a DUP instruction, check if it is better selected as a
2406	// MOV or load from a constant pool.
2407	Register Src = I.getOperand(i: `1`).getReg();
2408	auto ValAndVReg = getAnyConstantVRegValWithLookThrough(VReg: Src, MRI);
2409	if (!ValAndVReg)
2410	return false;
2411	LLVMContext &Ctx = MF.getFunction().getContext();
2412	Register Dst = I.getOperand(i: `0`).getReg();
2413	auto *CV = ConstantDataVector::getSplat(
2414	NumElts: MRI.getType(Reg: Dst).getNumElements(),
2415	Elt: ConstantInt::get(
2416	Ty: Type::getIntNTy(C&: Ctx, N: MRI.getType(Reg: Dst).getScalarSizeInBits()),
2417	V: ValAndVReg ->Value.trunc(width: MRI.getType(Reg: Dst).getScalarSizeInBits())));
2418	if (!emitConstantVector(Dst, CV, MIRBuilder&: MIB, MRI))
2419	return false;
2420	I.eraseFromParent();
2421	return true;
2422	}
2423	case TargetOpcode::G_SEXT:
2424	// Check for i64 sext(i32 vector_extract) prior to tablegen to select SMOV
2425	// over a normal extend.
2426	if (selectUSMovFromExtend(I, MRI))
2427	return true;
2428	return false;
2429	case TargetOpcode::G_BR:
2430	return false;
2431	case TargetOpcode::G_SHL:
2432	return earlySelectSHL(I, MRI);
2433	case TargetOpcode::G_CONSTANT: {
2434	bool IsZero = false;
2435	if (I.getOperand(i: `1`).isCImm())
2436	IsZero = I.getOperand(i: `1`).getCImm()->isZero();
2437	else if (I.getOperand(i: `1`).isImm())
2438	IsZero = I.getOperand(i: `1`).getImm() == `0`;
2439
2440	if (!IsZero)
2441	return false;
2442
2443	Register DefReg = I.getOperand(i: `0`).getReg();
2444	LLT Ty = MRI.getType(Reg: DefReg);
2445	if (Ty.getSizeInBits() == `64`) {
2446	I.getOperand(i: `1`).ChangeToRegister(Reg: AArch64::XZR, isDef: false);
2447	RBI.constrainGenericRegister(Reg: DefReg, RC: AArch64::GPR64RegClass, MRI);
2448	} else if (Ty.getSizeInBits() == `32`) {
2449	I.getOperand(i: `1`).ChangeToRegister(Reg: AArch64::WZR, isDef: false);
2450	RBI.constrainGenericRegister(Reg: DefReg, RC: AArch64::GPR32RegClass, MRI);
2451	} else
2452	return false;
2453
2454	I.setDesc(TII.get(Opcode: TargetOpcode::COPY));
2455	return true;
2456	}
2457
2458	case TargetOpcode::G_ADD: {
2459	// Check if this is being fed by a G_ICMP on either side.
2460	//
2461	// (cmp pred, x, y) + z
2462	//
2463	// In the above case, when the cmp is true, we increment z by 1. So, we can
2464	// fold the add into the cset for the cmp by using cinc.
2465	//
2466	// FIXME: This would probably be a lot nicer in PostLegalizerLowering.
2467	Register AddDst = I.getOperand(i: `0`).getReg();
2468	Register AddLHS = I.getOperand(i: `1`).getReg();
2469	Register AddRHS = I.getOperand(i: `2`).getReg();
2470	// Only handle scalars.
2471	LLT Ty = MRI.getType(Reg: AddLHS);
2472	if (Ty.isVector())
2473	return false;
2474	// Since G_ICMP is modeled as ADDS/SUBS/ANDS, we can handle 32 bits or 64
2475	// bits.
2476	unsigned Size = Ty.getSizeInBits();
2477	if (Size != `32` && Size != `64`)
2478	return false;
2479	auto MatchCmp = [&](Register Reg) -> MachineInstr * {
2480	if (!MRI.hasOneNonDBGUse(RegNo: Reg))
2481	return nullptr;
2482	// If the LHS of the add is 32 bits, then we want to fold a 32-bit
2483	// compare.
2484	if (Size == `32`)
2485	return getOpcodeDef(Opcode: TargetOpcode::G_ICMP, Reg, MRI);
2486	// We model scalar compares using 32-bit destinations right now.
2487	// If it's a 64-bit compare, it'll have 64-bit sources.
2488	Register ZExt;
2489	if (!mi_match(R: Reg, MRI,
2490	P: m_OneNonDBGUse(SP: m_GZExt(Src: m_OneNonDBGUse(SP: m_Reg(R&: ZExt))))))
2491	return nullptr;
2492	auto *Cmp = getOpcodeDef(Opcode: TargetOpcode::G_ICMP, Reg: ZExt, MRI);
2493	if (!Cmp \|\|
2494	MRI.getType(Reg: Cmp->getOperand(i: `2`).getReg()).getSizeInBits() != `64`)
2495	return nullptr;
2496	return Cmp;
2497	};
2498	// Try to match
2499	// z + (cmp pred, x, y)
2500	MachineInstr *Cmp = MatchCmp (AddRHS);
2501	if (!Cmp) {
2502	// (cmp pred, x, y) + z
2503	std::swap(a&: AddLHS, b&: AddRHS);
2504	Cmp = MatchCmp (AddRHS);
2505	if (!Cmp)
2506	return false;
2507	}
2508	auto &PredOp = Cmp->getOperand(i: `1`);
2509	auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());
2510	const AArch64CC::CondCode InvCC =
2511	changeICMPPredToAArch64CC(P: CmpInst::getInversePredicate(pred: Pred));
2512	MIB.setInstrAndDebugLoc(I);
2513	emitIntegerCompare(/LHS=/Cmp->getOperand(i: `2`),
2514	/RHS=/Cmp->getOperand(i: `3`), Predicate&: PredOp, MIRBuilder&: MIB);
2515	emitCSINC(/Dst=/AddDst, /Src =/Src1: AddLHS, /Src2=/AddLHS, Pred: InvCC, MIRBuilder&: MIB);
2516	I.eraseFromParent();
2517	return true;
2518	}
2519	case TargetOpcode::G_OR: {
2520	// Look for operations that take the lower `Width=Size-ShiftImm` bits of
2521	// `ShiftSrc` and insert them into the upper `Width` bits of `MaskSrc` via
2522	// shifting and masking that we can replace with a BFI (encoded as a BFM).
2523	Register Dst = I.getOperand(i: `0`).getReg();
2524	LLT Ty = MRI.getType(Reg: Dst);
2525
2526	if (!Ty.isScalar())
2527	return false;
2528
2529	unsigned Size = Ty.getSizeInBits();
2530	if (Size != `32` && Size != `64`)
2531	return false;
2532
2533	Register ShiftSrc;
2534	int64_t ShiftImm;
2535	Register MaskSrc;
2536	int64_t MaskImm;
2537	if (!mi_match(
2538	R: Dst, MRI,
2539	P: m_GOr(L: m_OneNonDBGUse(SP: m_GShl(L: m_Reg(R&: ShiftSrc), R: m_ICst(Cst&: ShiftImm))),
2540	R: m_OneNonDBGUse(SP: m_GAnd(L: m_Reg(R&: MaskSrc), R: m_ICst(Cst&: MaskImm))))))
2541	return false;
2542
2543	if (ShiftImm > Size \|\| ((`1ULL` << ShiftImm) - `1ULL`) != uint64_t(MaskImm))
2544	return false;
2545
2546	int64_t Immr = Size - ShiftImm;
2547	int64_t Imms = Size - ShiftImm - `1`;
2548	unsigned Opc = Size == `32` ? AArch64::BFMWri : AArch64::BFMXri;
2549	emitInstr(Opcode: Opc, DstOps: {Dst}, SrcOps: {MaskSrc, ShiftSrc, Immr, Imms}, MIRBuilder&: MIB);
2550	I.eraseFromParent();
2551	return true;
2552	}
2553	case TargetOpcode::G_FENCE: {
2554	if (I.getOperand(i: `1`).getImm() == `0`)
2555	BuildMI(BB&: MBB, I, MIMD: MIMetadata (I), MCID: TII.get(Opcode: TargetOpcode::MEMBARRIER));
2556	else
2557	BuildMI(BB&: MBB, I, MIMD: MIMetadata (I), MCID: TII.get(Opcode: AArch64::DMB))
2558	.addImm(Val: I.getOperand(i: `0`).getImm() == `4` ? `0x9` : `0xb`);
2559	I.eraseFromParent();
2560	return true;
2561	}
2562	default:
2563	return false;
2564	}
2565	}
2566
2567	bool AArch64InstructionSelector::select(MachineInstr &I) {
2568	assert(I.getParent() && "Instruction should be in a basic block!");
2569	assert(I.getParent()->getParent() && "Instruction should be in a function!");
2570
2571	MachineBasicBlock &MBB = *I.getParent();
2572	MachineFunction &MF = *MBB.getParent();
2573	MachineRegisterInfo &MRI = MF.getRegInfo();
2574
2575	const AArch64Subtarget *Subtarget = &MF.getSubtarget<AArch64Subtarget>();
2576	if (Subtarget->requiresStrictAlign()) {
2577	// We don't support this feature yet.
2578	LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n");
2579	return false;
2580	}
2581
2582	MIB.setInstrAndDebugLoc(I);
2583
2584	unsigned Opcode = I.getOpcode();
2585	// G_PHI requires same handling as PHI
2586	if (!I.isPreISelOpcode() \|\| Opcode == TargetOpcode::G_PHI) {
2587	// Certain non-generic instructions also need some special handling.
2588
2589	if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
2590	return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2591
2592	if (Opcode == TargetOpcode::PHI \|\| Opcode == TargetOpcode::G_PHI) {
2593	const Register DefReg = I.getOperand(i: `0`).getReg();
2594	const LLT DefTy = MRI.getType(Reg: DefReg);
2595
2596	const RegClassOrRegBank &RegClassOrBank =
2597	MRI.getRegClassOrRegBank(Reg: DefReg);
2598
2599	const TargetRegisterClass *DefRC =
2600	dyn_cast<const TargetRegisterClass *>(Val: RegClassOrBank);
2601	if (!DefRC) {
2602	if (!DefTy.isValid()) {
2603	LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
2604	return false;
2605	}
2606	const RegisterBank &RB = cast<const* RegisterBank *>(Val: RegClassOrBank);
2607	DefRC = getRegClassForTypeOnBank(Ty: DefTy, RB);
2608	if (!DefRC) {
2609	LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
2610	return false;
2611	}
2612	}
2613
2614	I.setDesc(TII.get(Opcode: TargetOpcode::PHI));
2615
2616	return RBI.constrainGenericRegister(Reg: DefReg, RC: *DefRC, MRI);
2617	}
2618
2619	if (I.isCopy())
2620	return selectCopy(I, TII, MRI, TRI, RBI);
2621
2622	if (I.isDebugInstr())
2623	return selectDebugInstr(I, MRI, RBI);
2624
2625	return true;
2626	}
2627
2628
2629	if (I.getNumOperands() != I.getNumExplicitOperands()) {
2630	LLVM_DEBUG(
2631	dbgs() << "Generic instruction has unexpected implicit operands\n");
2632	return false;
2633	}
2634
2635	// Try to do some lowering before we start instruction selecting. These
2636	// lowerings are purely transformations on the input G_MIR and so selection
2637	// must continue after any modification of the instruction.
2638	if (preISelLower(I)) {
2639	Opcode = I.getOpcode(); // The opcode may have been modified, refresh it.
2640	}
2641
2642	// There may be patterns where the importer can't deal with them optimally,
2643	// but does select it to a suboptimal sequence so our custom C++ selection
2644	// code later never has a chance to work on it. Therefore, we have an early
2645	// selection attempt here to give priority to certain selection routines
2646	// over the imported ones.
2647	if (earlySelect(I))
2648	return true;
2649
2650	if (selectImpl(I, CoverageInfo&: *CoverageInfo))
2651	return true;
2652
2653	LLT Ty =
2654	I.getOperand(i: `0`).isReg() ? MRI.getType(Reg: I.getOperand(i: `0`).getReg()) : LLT {};
2655
2656	switch (Opcode) {
2657	case TargetOpcode::G_SBFX:
2658	case TargetOpcode::G_UBFX: {
2659	static const unsigned OpcTable[`2`][`2`] = {
2660	{AArch64::UBFMWri, AArch64::UBFMXri},
2661	{AArch64::SBFMWri, AArch64::SBFMXri}};
2662	bool IsSigned = Opcode == TargetOpcode::G_SBFX;
2663	unsigned Size = Ty.getSizeInBits();
2664	unsigned Opc = OpcTable[IsSigned][Size == `64`];
2665	auto Cst1 =
2666	getIConstantVRegValWithLookThrough(VReg: I.getOperand(i: `2`).getReg(), MRI);
2667	assert(Cst1 && "Should have gotten a constant for src 1?");
2668	auto Cst2 =
2669	getIConstantVRegValWithLookThrough(VReg: I.getOperand(i: `3`).getReg(), MRI);
2670	assert(Cst2 && "Should have gotten a constant for src 2?");
2671	auto LSB = Cst1 ->Value.getZExtValue();
2672	auto Width = Cst2 ->Value.getZExtValue();
2673	auto BitfieldInst =
2674	MIB.buildInstr(Opc, DstOps: {I.getOperand(i: `0`)}, SrcOps: {I.getOperand(i: `1`)})
2675	.addImm(Val: LSB)
2676	.addImm(Val: LSB + Width - `1`);
2677	I.eraseFromParent();
2678	return constrainSelectedInstRegOperands(I&: *BitfieldInst, TII, TRI, RBI);
2679	}
2680	case TargetOpcode::G_BRCOND:
2681	return selectCompareBranch(I, MF, MRI);
2682
2683	case TargetOpcode::G_BRINDIRECT: {
2684	const Function &Fn = MF.getFunction();
2685	if (std::optional<uint16_t> BADisc =
2686	STI.getPtrAuthBlockAddressDiscriminatorIfEnabled(ParentFn: Fn)) {
2687	auto MI = MIB.buildInstr(Opc: AArch64::BRA, DstOps: {}, SrcOps: {I.getOperand(i: `0`).getReg()});
2688	MI.addImm(Val: AArch64PACKey::IA);
2689	MI.addImm(Val: *BADisc);
2690	MI.addReg(/AddrDisc=/RegNo: AArch64::XZR);
2691	I.eraseFromParent();
2692	return constrainSelectedInstRegOperands(I&: *MI, TII, TRI, RBI);
2693	}
2694	I.setDesc(TII.get(Opcode: AArch64::BR));
2695	return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2696	}
2697
2698	case TargetOpcode::G_BRJT:
2699	return selectBrJT(I, MRI);
2700
2701	case AArch64::G_ADD_LOW: {
2702	// This op may have been separated from it's ADRP companion by the localizer
2703	// or some other code motion pass. Given that many CPUs will try to
2704	// macro fuse these operations anyway, select this into a MOVaddr pseudo
2705	// which will later be expanded into an ADRP+ADD pair after scheduling.
2706	MachineInstr *BaseMI = MRI.getVRegDef(Reg: I.getOperand(i: `1`).getReg());
2707	if (BaseMI->getOpcode() != AArch64::ADRP) {
2708	I.setDesc(TII.get(Opcode: AArch64::ADDXri));
2709	I.addOperand(Op: MachineOperand::CreateImm(Val: `0`));
2710	return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2711	}
2712	assert(TM.getCodeModel() == CodeModel::Small &&
2713	"Expected small code model");
2714	auto Op1 = BaseMI->getOperand(i: `1`);
2715	auto Op2 = I.getOperand(i: `2`);
2716	auto MovAddr = MIB.buildInstr(Opc: AArch64::MOVaddr, DstOps: {I.getOperand(i: `0`)}, SrcOps: {})
2717	.addGlobalAddress(GV: Op1.getGlobal(), Offset: Op1.getOffset(),
2718	TargetFlags: Op1.getTargetFlags())
2719	.addGlobalAddress(GV: Op2.getGlobal(), Offset: Op2.getOffset(),
2720	TargetFlags: Op2.getTargetFlags());
2721	I.eraseFromParent();
2722	return constrainSelectedInstRegOperands(I&: *MovAddr, TII, TRI, RBI);
2723	}
2724
2725	case TargetOpcode::G_FCONSTANT:
2726	case TargetOpcode::G_CONSTANT: {
2727	const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
2728
2729	const LLT s8 = LLT::scalar(SizeInBits: `8`);
2730	const LLT s16 = LLT::scalar(SizeInBits: `16`);
2731	const LLT s32 = LLT::scalar(SizeInBits: `32`);
2732	const LLT s64 = LLT::scalar(SizeInBits: `64`);
2733	const LLT s128 = LLT::scalar(SizeInBits: `128`);
2734	const LLT p0 = LLT::pointer(AddressSpace: `0`, SizeInBits: `64`);
2735
2736	const Register DefReg = I.getOperand(i: `0`).getReg();
2737	const LLT DefTy = MRI.getType(Reg: DefReg);
2738	const unsigned DefSize = DefTy.getSizeInBits();
2739	const RegisterBank &RB = *RBI.getRegBank(Reg: DefReg, MRI, TRI);
2740
2741	// FIXME: Redundant check, but even less readable when factored out.
2742	if (isFP) {
2743	if (Ty != s16 && Ty != s32 && Ty != s64 && Ty != s128) {
2744	LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
2745	<< " constant, expected: " << s16 << " or " << s32
2746	<< " or " << s64 << " or " << s128 << `'\n'`);
2747	return false;
2748	}
2749
2750	if (RB.getID() != AArch64::FPRRegBankID) {
2751	LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
2752	<< " constant on bank: " << RB
2753	<< ", expected: FPR\n");
2754	return false;
2755	}
2756
2757	// The case when we have 0.0 is covered by tablegen. Reject it here so we
2758	// can be sure tablegen works correctly and isn't rescued by this code.
2759	// 0.0 is not covered by tablegen for FP128. So we will handle this
2760	// scenario in the code here.
2761	if (DefSize != `128` && I.getOperand(i: `1`).getFPImm()->isExactlyValue(V: `0.0`))
2762	return false;
2763	} else {
2764	// s32 and s64 are covered by tablegen.
2765	if (Ty != p0 && Ty != s8 && Ty != s16) {
2766	LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
2767	<< " constant, expected: " << s32 << ", " << s64
2768	<< ", or " << p0 << `'\n'`);
2769	return false;
2770	}
2771
2772	if (RB.getID() != AArch64::GPRRegBankID) {
2773	LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
2774	<< " constant on bank: " << RB
2775	<< ", expected: GPR\n");
2776	return false;
2777	}
2778	}
2779
2780	if (isFP) {
2781	const TargetRegisterClass &FPRRC = *getRegClassForTypeOnBank(Ty: DefTy, RB);
2782	// For 16, 64, and 128b values, emit a constant pool load.
2783	switch (DefSize) {
2784	default:
2785	llvm_unreachable("Unexpected destination size for G_FCONSTANT?");
2786	case `32`:
2787	case `64`: {
2788	bool OptForSize = shouldOptForSize(MF: &MF);
2789	const auto &TLI = MF.getSubtarget().getTargetLowering();
2790	// If TLI says that this fpimm is illegal, then we'll expand to a
2791	// constant pool load.
2792	if (TLI->isFPImmLegal(I.getOperand(i: `1`).getFPImm()->getValueAPF(),
2793	EVT::getFloatingPointVT(BitWidth: DefSize), ForCodeSize: OptForSize))
2794	break;
2795	[[fallthrough]];
2796	}
2797	case `16`:
2798	case `128`: {
2799	auto *FPImm = I.getOperand(i: `1`).getFPImm();
2800	auto *LoadMI = emitLoadFromConstantPool(CPVal: FPImm, MIRBuilder&: MIB);
2801	if (!LoadMI) {
2802	LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n");
2803	return false;
2804	}
2805	MIB.buildCopy(Res: {DefReg}, Op: {LoadMI->getOperand(i: `0`).getReg()});
2806	I.eraseFromParent();
2807	return RBI.constrainGenericRegister(Reg: DefReg, RC: FPRRC, MRI);
2808	}
2809	}
2810
2811	assert((DefSize == `32` \|\| DefSize == `64`) && "Unexpected const def size");
2812	// Either emit a FMOV, or emit a copy to emit a normal mov.
2813	const Register DefGPRReg = MRI.createVirtualRegister(
2814	RegClass: DefSize == `32` ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2815	MachineOperand &RegOp = I.getOperand(i: `0`);
2816	RegOp.setReg(DefGPRReg);
2817	MIB.setInsertPt(MBB&: MIB.getMBB(), II: std::next(x: I.getIterator()));
2818	MIB.buildCopy(Res: {DefReg}, Op: {DefGPRReg});
2819
2820	if (!RBI.constrainGenericRegister(Reg: DefReg, RC: FPRRC, MRI)) {
2821	LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
2822	return false;
2823	}
2824
2825	MachineOperand &ImmOp = I.getOperand(i: `1`);
2826	// FIXME: Is going through int64_t always correct?
2827	ImmOp.ChangeToImmediate(
2828	ImmVal: ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
2829	} else if (I.getOperand(i: `1`).isCImm()) {
2830	uint64_t Val = I.getOperand(i: `1`).getCImm()->getZExtValue();
2831	I.getOperand(i: `1`).ChangeToImmediate(ImmVal: Val);
2832	} else if (I.getOperand(i: `1`).isImm()) {
2833	uint64_t Val = I.getOperand(i: `1`).getImm();
2834	I.getOperand(i: `1`).ChangeToImmediate(ImmVal: Val);
2835	}
2836
2837	const unsigned MovOpc =
2838	DefSize == `64` ? AArch64::MOVi64imm : AArch64::MOVi32imm;
2839	I.setDesc(TII.get(Opcode: MovOpc));
2840	constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2841	return true;
2842	}
2843	case TargetOpcode::G_EXTRACT: {
2844	Register DstReg = I.getOperand(i: `0`).getReg();
2845	Register SrcReg = I.getOperand(i: `1`).getReg();
2846	LLT SrcTy = MRI.getType(Reg: SrcReg);
2847	LLT DstTy = MRI.getType(Reg: DstReg);
2848	(void)DstTy;
2849	unsigned SrcSize = SrcTy.getSizeInBits();
2850
2851	if (SrcTy.getSizeInBits() > `64`) {
2852	// This should be an extract of an s128, which is like a vector extract.
2853	if (SrcTy.getSizeInBits() != `128`)
2854	return false;
2855	// Only support extracting 64 bits from an s128 at the moment.
2856	if (DstTy.getSizeInBits() != `64`)
2857	return false;
2858
2859	unsigned Offset = I.getOperand(i: `2`).getImm();
2860	if (Offset % `64` != `0`)
2861	return false;
2862
2863	// Check we have the right regbank always.
2864	const RegisterBank &SrcRB = *RBI.getRegBank(Reg: SrcReg, MRI, TRI);
2865	const RegisterBank &DstRB = *RBI.getRegBank(Reg: DstReg, MRI, TRI);
2866	assert(SrcRB.getID() == DstRB.getID() && "Wrong extract regbank!");
2867
2868	if (SrcRB.getID() == AArch64::GPRRegBankID) {
2869	auto NewI =
2870	MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {DstReg}, SrcOps: {})
2871	.addUse(RegNo: SrcReg, Flags: `0`,
2872	SubReg: Offset == `0` ? AArch64::sube64 : AArch64::subo64);
2873	constrainOperandRegClass(MF, TRI, MRI, TII, RBI, InsertPt&: *NewI,
2874	RegClass: AArch64::GPR64RegClass, RegMO&: NewI ->getOperand(i: `0`));
2875	I.eraseFromParent();
2876	return true;
2877	}
2878
2879	// Emit the same code as a vector extract.
2880	// Offset must be a multiple of 64.
2881	unsigned LaneIdx = Offset / `64`;
2882	MachineInstr *Extract = emitExtractVectorElt(
2883	DstReg, DstRB, ScalarTy: LLT::scalar(SizeInBits: `64`), VecReg: SrcReg, LaneIdx, MIRBuilder&: MIB);
2884	if (!Extract)
2885	return false;
2886	I.eraseFromParent();
2887	return true;
2888	}
2889
2890	I.setDesc(TII.get(Opcode: SrcSize == `64` ? AArch64::UBFMXri : AArch64::UBFMWri));
2891	MachineInstrBuilder (MF, I).addImm(Val: I.getOperand(i: `2`).getImm() +
2892	Ty.getSizeInBits() - `1`);
2893
2894	if (SrcSize < `64`) {
2895	assert(SrcSize == `32` && DstTy.getSizeInBits() == `16` &&
2896	"unexpected G_EXTRACT types");
2897	return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2898	}
2899
2900	DstReg = MRI.createGenericVirtualRegister(Ty: LLT::scalar(SizeInBits: `64`));
2901	MIB.setInsertPt(MBB&: MIB.getMBB(), II: std::next(x: I.getIterator()));
2902	MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {I.getOperand(i: `0`).getReg()}, SrcOps: {})
2903	.addReg(RegNo: DstReg, flags: `0`, SubReg: AArch64::sub_32);
2904	RBI.constrainGenericRegister(Reg: I.getOperand(i: `0`).getReg(),
2905	RC: AArch64::GPR32RegClass, MRI);
2906	I.getOperand(i: `0`).setReg(DstReg);
2907
2908	return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2909	}
2910
2911	case TargetOpcode::G_INSERT: {
2912	LLT SrcTy = MRI.getType(Reg: I.getOperand(i: `2`).getReg());
2913	LLT DstTy = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
2914	unsigned DstSize = DstTy.getSizeInBits();
2915	// Larger inserts are vectors, same-size ones should be something else by
2916	// now (split up or turned into COPYs).
2917	if (Ty.getSizeInBits() > `64` \|\| SrcTy.getSizeInBits() > `32`)
2918	return false;
2919
2920	I.setDesc(TII.get(Opcode: DstSize == `64` ? AArch64::BFMXri : AArch64::BFMWri));
2921	unsigned LSB = I.getOperand(i: `3`).getImm();
2922	unsigned Width = MRI.getType(Reg: I.getOperand(i: `2`).getReg()).getSizeInBits();
2923	I.getOperand(i: `3`).setImm((DstSize - LSB) % DstSize);
2924	MachineInstrBuilder (MF, I).addImm(Val: Width - `1`);
2925
2926	if (DstSize < `64`) {
2927	assert(DstSize == `32` && SrcTy.getSizeInBits() == `16` &&
2928	"unexpected G_INSERT types");
2929	return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2930	}
2931
2932	Register SrcReg = MRI.createGenericVirtualRegister(Ty: LLT::scalar(SizeInBits: `64`));
2933	BuildMI(BB&: MBB, I: I.getIterator(), MIMD: I.getDebugLoc(),
2934	MCID: TII.get(Opcode: AArch64::SUBREG_TO_REG))
2935	.addDef(RegNo: SrcReg)
2936	.addImm(Val: `0`)
2937	.addUse(RegNo: I.getOperand(i: `2`).getReg())
2938	.addImm(Val: AArch64::sub_32);
2939	RBI.constrainGenericRegister(Reg: I.getOperand(i: `2`).getReg(),
2940	RC: AArch64::GPR32RegClass, MRI);
2941	I.getOperand(i: `2`).setReg(SrcReg);
2942
2943	return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2944	}
2945	case TargetOpcode::G_FRAME_INDEX: {
2946	// allocas and G_FRAME_INDEX are only supported in addrspace(0).
2947	if (Ty != LLT::pointer(AddressSpace: `0`, SizeInBits: `64`)) {
2948	LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty
2949	<< ", expected: " << LLT::pointer(`0`, `64`) << `'\n'`);
2950	return false;
2951	}
2952	I.setDesc(TII.get(Opcode: AArch64::ADDXri));
2953
2954	// MOs for a #0 shifted immediate.
2955	I.addOperand(Op: MachineOperand::CreateImm(Val: `0`));
2956	I.addOperand(Op: MachineOperand::CreateImm(Val: `0`));
2957
2958	return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2959	}
2960
2961	case TargetOpcode::G_GLOBAL_VALUE: {
2962	const GlobalValue GV = nullptr*;
2963	unsigned OpFlags;
2964	if (I.getOperand(i: `1`).isSymbol()) {
2965	OpFlags = I.getOperand(i: `1`).getTargetFlags();
2966	// Currently only used by "RtLibUseGOT".
2967	assert(OpFlags == AArch64II::MO_GOT);
2968	} else {
2969	GV = I.getOperand(i: `1`).getGlobal();
2970	if (GV->isThreadLocal()) {
2971	// We don't support instructions with emulated TLS variables yet
2972	if (TM.useEmulatedTLS())
2973	return false;
2974	return selectTLSGlobalValue(I, MRI);
2975	}
2976	OpFlags = STI.ClassifyGlobalReference(GV, TM);
2977	}
2978
2979	if (OpFlags & AArch64II::MO_GOT) {
2980	I.setDesc(TII.get(Opcode: MF.getInfo<AArch64FunctionInfo>()->hasELFSignedGOT()
2981	? AArch64::LOADgotAUTH
2982	: AArch64::LOADgot));
2983	I.getOperand(i: `1`).setTargetFlags(OpFlags);
2984	} else if (TM.getCodeModel() == CodeModel::Large &&
2985	!TM.isPositionIndependent()) {
2986	// Materialize the global using movz/movk instructions.
2987	materializeLargeCMVal(I, V: GV, OpFlags);
2988	I.eraseFromParent();
2989	return true;
2990	} else if (TM.getCodeModel() == CodeModel::Tiny) {
2991	I.setDesc(TII.get(Opcode: AArch64::ADR));
2992	I.getOperand(i: `1`).setTargetFlags(OpFlags);
2993	} else {
2994	I.setDesc(TII.get(Opcode: AArch64::MOVaddr));
2995	I.getOperand(i: `1`).setTargetFlags(OpFlags \| AArch64II::MO_PAGE);
2996	MachineInstrBuilder MIB(MF, I);
2997	MIB.addGlobalAddress(GV, Offset: I.getOperand(i: `1`).getOffset(),
2998	TargetFlags: OpFlags \| AArch64II::MO_PAGEOFF \| AArch64II::MO_NC);
2999	}
3000	return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3001	}
3002
3003	case TargetOpcode::G_PTRAUTH_GLOBAL_VALUE:
3004	return selectPtrAuthGlobalValue(I, MRI);
3005
3006	case TargetOpcode::G_ZEXTLOAD:
3007	case TargetOpcode::G_LOAD:
3008	case TargetOpcode::G_STORE: {
3009	GLoadStore &LdSt = cast<GLoadStore>(Val&: I);
3010	bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
3011	LLT PtrTy = MRI.getType(Reg: LdSt.getPointerReg());
3012
3013	// Can only handle AddressSpace 0, 64-bit pointers.
3014	if (PtrTy != LLT::pointer(AddressSpace: `0`, SizeInBits: `64`)) {
3015	return false;
3016	}
3017
3018	uint64_t MemSizeInBytes = LdSt.getMemSize().getValue();
3019	unsigned MemSizeInBits = LdSt.getMemSizeInBits().getValue();
3020	AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering();
3021
3022	// Need special instructions for atomics that affect ordering.
3023	if (Order != AtomicOrdering::NotAtomic &&
3024	Order != AtomicOrdering::Unordered &&
3025	Order != AtomicOrdering::Monotonic) {
3026	assert(!isa<GZExtLoad>(LdSt));
3027	assert(MemSizeInBytes <= `8` &&
3028	"128-bit atomics should already be custom-legalized");
3029
3030	if (isa<GLoad>(Val: LdSt)) {
3031	static constexpr unsigned LDAPROpcodes[] = {
3032	AArch64::LDAPRB, AArch64::LDAPRH, AArch64::LDAPRW, AArch64::LDAPRX};
3033	static constexpr unsigned LDAROpcodes[] = {
3034	AArch64::LDARB, AArch64::LDARH, AArch64::LDARW, AArch64::LDARX};
3035	ArrayRef<unsigned> Opcodes =
3036	STI.hasRCPC() && Order != AtomicOrdering::SequentiallyConsistent
3037	? LDAPROpcodes
3038	: LDAROpcodes;
3039	I.setDesc(TII.get(Opcode: Opcodes [Log2_32(Value: MemSizeInBytes)]));
3040	} else {
3041	static constexpr unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH,
3042	AArch64::STLRW, AArch64::STLRX};
3043	Register ValReg = LdSt.getReg(Idx: `0`);
3044	if (MRI.getType(Reg: ValReg).getSizeInBits() == `64` && MemSizeInBits != `64`) {
3045	// Emit a subreg copy of 32 bits.
3046	Register NewVal = MRI.createVirtualRegister(RegClass: &AArch64::GPR32RegClass);
3047	MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {NewVal}, SrcOps: {})
3048	.addReg(RegNo: I.getOperand(i: `0`).getReg(), flags: `0`, SubReg: AArch64::sub_32);
3049	I.getOperand(i: `0`).setReg(NewVal);
3050	}
3051	I.setDesc(TII.get(Opcode: Opcodes[Log2_32(Value: MemSizeInBytes)]));
3052	}
3053	constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3054	return true;
3055	}
3056
3057	#ifndef NDEBUG
3058	const Register PtrReg = LdSt.getPointerReg();
3059	const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
3060	// Check that the pointer register is valid.
3061	assert(PtrRB.getID() == AArch64::GPRRegBankID &&
3062	"Load/Store pointer operand isn't a GPR");
3063	assert(MRI.getType(PtrReg).isPointer() &&
3064	"Load/Store pointer operand isn't a pointer");
3065	#endif
3066
3067	const Register ValReg = LdSt.getReg(Idx: `0`);
3068	const RegisterBank &RB = *RBI.getRegBank(Reg: ValReg, MRI, TRI);
3069	LLT ValTy = MRI.getType(Reg: ValReg);
3070
3071	// The code below doesn't support truncating stores, so we need to split it
3072	// again.
3073	if (isa<GStore>(Val: LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
3074	unsigned SubReg;
3075	LLT MemTy = LdSt.getMMO().getMemoryType();
3076	auto *RC = getRegClassForTypeOnBank(Ty: MemTy, RB);
3077	if (!getSubRegForClass(RC, TRI, SubReg))
3078	return false;
3079
3080	// Generate a subreg copy.
3081	auto Copy = MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {MemTy}, SrcOps: {})
3082	.addReg(RegNo: ValReg, flags: `0`, SubReg)
3083	.getReg(Idx: `0`);
3084	RBI.constrainGenericRegister(Reg: Copy, RC: *RC, MRI);
3085	LdSt.getOperand(i: `0`).setReg(Copy);
3086	} else if (isa<GLoad>(Val: LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
3087	// If this is an any-extending load from the FPR bank, split it into a regular
3088	// load + extend.
3089	if (RB.getID() == AArch64::FPRRegBankID) {
3090	unsigned SubReg;
3091	LLT MemTy = LdSt.getMMO().getMemoryType();
3092	auto *RC = getRegClassForTypeOnBank(Ty: MemTy, RB);
3093	if (!getSubRegForClass(RC, TRI, SubReg))
3094	return false;
3095	Register OldDst = LdSt.getReg(Idx: `0`);
3096	Register NewDst =
3097	MRI.createGenericVirtualRegister(Ty: LdSt.getMMO().getMemoryType());
3098	LdSt.getOperand(i: `0`).setReg(NewDst);
3099	MRI.setRegBank(Reg: NewDst, RegBank: RB);
3100	// Generate a SUBREG_TO_REG to extend it.
3101	MIB.setInsertPt(MBB&: MIB.getMBB(), II: std::next(x: LdSt.getIterator()));
3102	MIB.buildInstr(Opc: AArch64::SUBREG_TO_REG, DstOps: {OldDst}, SrcOps: {})
3103	.addImm(Val: `0`)
3104	.addUse(RegNo: NewDst)
3105	.addImm(Val: SubReg);
3106	auto SubRegRC = getRegClassForTypeOnBank(Ty: MRI.getType(Reg: OldDst), RB);
3107	RBI.constrainGenericRegister(Reg: OldDst, RC: *SubRegRC, MRI);
3108	MIB.setInstr(LdSt);
3109	ValTy = MemTy; // This is no longer an extending load.
3110	}
3111	}
3112
3113	// Helper lambda for partially selecting I. Either returns the original
3114	// instruction with an updated opcode, or a new instruction.
3115	auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
3116	bool IsStore = isa<GStore>(Val: I);
3117	const unsigned NewOpc =
3118	selectLoadStoreUIOp(GenericOpc: I.getOpcode(), RegBankID: RB.getID(), OpSize: MemSizeInBits);
3119	if (NewOpc == I.getOpcode())
3120	return nullptr;
3121	// Check if we can fold anything into the addressing mode.
3122	auto AddrModeFns =
3123	selectAddrModeIndexed(Root&: I.getOperand(i: `1`), Size: MemSizeInBytes);
3124	if (!AddrModeFns) {
3125	// Can't fold anything. Use the original instruction.
3126	I.setDesc(TII.get(Opcode: NewOpc));
3127	I.addOperand(Op: MachineOperand::CreateImm(Val: `0`));
3128	return &I;
3129	}
3130
3131	// Folded something. Create a new instruction and return it.
3132	auto NewInst = MIB.buildInstr(Opc: NewOpc, DstOps: {}, SrcOps: {}, Flags: I.getFlags());
3133	Register CurValReg = I.getOperand(i: `0`).getReg();
3134	IsStore ? NewInst.addUse(RegNo: CurValReg) : NewInst.addDef(RegNo: CurValReg);
3135	NewInst.cloneMemRefs(OtherMI: I);
3136	for (auto &Fn : *AddrModeFns)
3137	Fn (NewInst);
3138	I.eraseFromParent();
3139	return &*NewInst;
3140	};
3141
3142	MachineInstr *LoadStore = SelectLoadStoreAddressingMode ();
3143	if (!LoadStore)
3144	return false;
3145
3146	// If we're storing a 0, use WZR/XZR.
3147	if (Opcode == TargetOpcode::G_STORE) {
3148	auto CVal = getIConstantVRegValWithLookThrough(
3149	VReg: LoadStore->getOperand(i: `0`).getReg(), MRI);
3150	if (CVal && CVal ->Value == `0`) {
3151	switch (LoadStore->getOpcode()) {
3152	case AArch64::STRWui:
3153	case AArch64::STRHHui:
3154	case AArch64::STRBBui:
3155	LoadStore->getOperand(i: `0`).setReg(AArch64::WZR);
3156	break;
3157	case AArch64::STRXui:
3158	LoadStore->getOperand(i: `0`).setReg(AArch64::XZR);
3159	break;
3160	}
3161	}
3162	}
3163
3164	if (IsZExtLoad \|\| (Opcode == TargetOpcode::G_LOAD &&
3165	ValTy == LLT::scalar(SizeInBits: `64`) && MemSizeInBits == `32`)) {
3166	// The any/zextload from a smaller type to i32 should be handled by the
3167	// importer.
3168	if (MRI.getType(Reg: LoadStore->getOperand(i: `0`).getReg()).getSizeInBits() != `64`)
3169	return false;
3170	// If we have an extending load then change the load's type to be a
3171	// narrower reg and zero_extend with SUBREG_TO_REG.
3172	Register LdReg = MRI.createVirtualRegister(RegClass: &AArch64::GPR32RegClass);
3173	Register DstReg = LoadStore->getOperand(i: `0`).getReg();
3174	LoadStore->getOperand(i: `0`).setReg(LdReg);
3175
3176	MIB.setInsertPt(MBB&: MIB.getMBB(), II: std::next(x: LoadStore->getIterator()));
3177	MIB.buildInstr(Opc: AArch64::SUBREG_TO_REG, DstOps: {DstReg}, SrcOps: {})
3178	.addImm(Val: `0`)
3179	.addUse(RegNo: LdReg)
3180	.addImm(Val: AArch64::sub_32);
3181	constrainSelectedInstRegOperands(I&: *LoadStore, TII, TRI, RBI);
3182	return RBI.constrainGenericRegister(Reg: DstReg, RC: AArch64::GPR64allRegClass,
3183	MRI);
3184	}
3185	return constrainSelectedInstRegOperands(I&: *LoadStore, TII, TRI, RBI);
3186	}
3187
3188	case TargetOpcode::G_INDEXED_ZEXTLOAD:
3189	case TargetOpcode::G_INDEXED_SEXTLOAD:
3190	return selectIndexedExtLoad(I, MRI);
3191	case TargetOpcode::G_INDEXED_LOAD:
3192	return selectIndexedLoad(I, MRI);
3193	case TargetOpcode::G_INDEXED_STORE:
3194	return selectIndexedStore(I&: cast<GIndexedStore>(Val&: I), MRI);
3195
3196	case TargetOpcode::G_LSHR:
3197	case TargetOpcode::G_ASHR:
3198	if (MRI.getType(Reg: I.getOperand(i: `0`).getReg()).isVector())
3199	return selectVectorAshrLshr(I, MRI);
3200	[[fallthrough]];
3201	case TargetOpcode::G_SHL:
3202	if (Opcode == TargetOpcode::G_SHL &&
3203	MRI.getType(Reg: I.getOperand(i: `0`).getReg()).isVector())
3204	return selectVectorSHL(I, MRI);
3205
3206	// These shifts were legalized to have 64 bit shift amounts because we
3207	// want to take advantage of the selection patterns that assume the
3208	// immediates are s64s, however, selectBinaryOp will assume both operands
3209	// will have the same bit size.
3210	{
3211	Register SrcReg = I.getOperand(i: `1`).getReg();
3212	Register ShiftReg = I.getOperand(i: `2`).getReg();
3213	const LLT ShiftTy = MRI.getType(Reg: ShiftReg);
3214	const LLT SrcTy = MRI.getType(Reg: SrcReg);
3215	if (!SrcTy.isVector() && SrcTy.getSizeInBits() == `32` &&
3216	ShiftTy.getSizeInBits() == `64`) {
3217	assert(!ShiftTy.isVector() && "unexpected vector shift ty");
3218	// Insert a subregister copy to implement a 64->32 trunc
3219	auto Trunc = MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {SrcTy}, SrcOps: {})
3220	.addReg(RegNo: ShiftReg, flags: `0`, SubReg: AArch64::sub_32);
3221	MRI.setRegBank(Reg: Trunc.getReg(Idx: `0`), RegBank: RBI.getRegBank(ID: AArch64::GPRRegBankID));
3222	I.getOperand(i: `2`).setReg(Trunc.getReg(Idx: `0`));
3223	}
3224	}
3225	[[fallthrough]];
3226	case TargetOpcode::G_OR: {
3227	// Reject the various things we don't support yet.
3228	if (unsupportedBinOp(I, RBI, MRI, TRI))
3229	return false;
3230
3231	const unsigned OpSize = Ty.getSizeInBits();
3232
3233	const Register DefReg = I.getOperand(i: `0`).getReg();
3234	const RegisterBank &RB = *RBI.getRegBank(Reg: DefReg, MRI, TRI);
3235
3236	const unsigned NewOpc = selectBinaryOp(GenericOpc: I.getOpcode(), RegBankID: RB.getID(), OpSize);
3237	if (NewOpc == I.getOpcode())
3238	return false;
3239
3240	I.setDesc(TII.get(Opcode: NewOpc));
3241	// FIXME: Should the type be always reset in setDesc?
3242
3243	// Now that we selected an opcode, we need to constrain the register
3244	// operands to use appropriate classes.
3245	return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3246	}
3247
3248	case TargetOpcode::G_PTR_ADD: {
3249	emitADD(DefReg: I.getOperand(i: `0`).getReg(), LHS&: I.getOperand(i: `1`), RHS&: I.getOperand(i: `2`), MIRBuilder&: MIB);
3250	I.eraseFromParent();
3251	return true;
3252	}
3253
3254	case TargetOpcode::G_SADDE:
3255	case TargetOpcode::G_UADDE:
3256	case TargetOpcode::G_SSUBE:
3257	case TargetOpcode::G_USUBE:
3258	case TargetOpcode::G_SADDO:
3259	case TargetOpcode::G_UADDO:
3260	case TargetOpcode::G_SSUBO:
3261	case TargetOpcode::G_USUBO:
3262	return selectOverflowOp(I, MRI);
3263
3264	case TargetOpcode::G_PTRMASK: {
3265	Register MaskReg = I.getOperand(i: `2`).getReg();
3266	std::optional<int64_t> MaskVal = getIConstantVRegSExtVal(VReg: MaskReg, MRI);
3267	// TODO: Implement arbitrary cases
3268	if (!MaskVal \|\| !isShiftedMask_64(Value: *MaskVal))
3269	return false;
3270
3271	uint64_t Mask = *MaskVal;
3272	I.setDesc(TII.get(Opcode: AArch64::ANDXri));
3273	I.getOperand(i: `2`).ChangeToImmediate(
3274	ImmVal: AArch64_AM::encodeLogicalImmediate(imm: Mask, regSize: `64`));
3275
3276	return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3277	}
3278	case TargetOpcode::G_PTRTOINT:
3279	case TargetOpcode::G_TRUNC: {
3280	const LLT DstTy = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
3281	const LLT SrcTy = MRI.getType(Reg: I.getOperand(i: `1`).getReg());
3282
3283	const Register DstReg = I.getOperand(i: `0`).getReg();
3284	const Register SrcReg = I.getOperand(i: `1`).getReg();
3285
3286	const RegisterBank &DstRB = *RBI.getRegBank(Reg: DstReg, MRI, TRI);
3287	const RegisterBank &SrcRB = *RBI.getRegBank(Reg: SrcReg, MRI, TRI);
3288
3289	if (DstRB.getID() != SrcRB.getID()) {
3290	LLVM_DEBUG(
3291	dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");
3292	return false;
3293	}
3294
3295	if (DstRB.getID() == AArch64::GPRRegBankID) {
3296	const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(Ty: DstTy, RB: DstRB);
3297	if (!DstRC)
3298	return false;
3299
3300	const TargetRegisterClass *SrcRC = getRegClassForTypeOnBank(Ty: SrcTy, RB: SrcRB);
3301	if (!SrcRC)
3302	return false;
3303
3304	if (!RBI.constrainGenericRegister(Reg: SrcReg, RC: *SrcRC, MRI) \|\|
3305	!RBI.constrainGenericRegister(Reg: DstReg, RC: *DstRC, MRI)) {
3306	LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");
3307	return false;
3308	}
3309
3310	if (DstRC == SrcRC) {
3311	// Nothing to be done
3312	} else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(SizeInBits: `32`) &&
3313	SrcTy == LLT::scalar(SizeInBits: `64`)) {
3314	llvm_unreachable("TableGen can import this case");
3315	return false;
3316	} else if (DstRC == &AArch64::GPR32RegClass &&
3317	SrcRC == &AArch64::GPR64RegClass) {
3318	I.getOperand(i: `1`).setSubReg(AArch64::sub_32);
3319	} else {
3320	LLVM_DEBUG(
3321	dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
3322	return false;
3323	}
3324
3325	I.setDesc(TII.get(Opcode: TargetOpcode::COPY));
3326	return true;
3327	} else if (DstRB.getID() == AArch64::FPRRegBankID) {
3328	if (DstTy == LLT::fixed_vector(NumElements: `4`, ScalarSizeInBits: `16`) &&
3329	SrcTy == LLT::fixed_vector(NumElements: `4`, ScalarSizeInBits: `32`)) {
3330	I.setDesc(TII.get(Opcode: AArch64::XTNv4i16));
3331	constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3332	return true;
3333	}
3334
3335	if (!SrcTy.isVector() && SrcTy.getSizeInBits() == `128`) {
3336	MachineInstr *Extract = emitExtractVectorElt(
3337	DstReg, DstRB, ScalarTy: LLT::scalar(SizeInBits: DstTy.getSizeInBits()), VecReg: SrcReg, LaneIdx: `0`, MIRBuilder&: MIB);
3338	if (!Extract)
3339	return false;
3340	I.eraseFromParent();
3341	return true;
3342	}
3343
3344	// We might have a vector G_PTRTOINT, in which case just emit a COPY.
3345	if (Opcode == TargetOpcode::G_PTRTOINT) {
3346	assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector");
3347	I.setDesc(TII.get(Opcode: TargetOpcode::COPY));
3348	return selectCopy(I, TII, MRI, TRI, RBI);
3349	}
3350	}
3351
3352	return false;
3353	}
3354
3355	case TargetOpcode::G_ANYEXT: {
3356	if (selectUSMovFromExtend(I, MRI))
3357	return true;
3358
3359	const Register DstReg = I.getOperand(i: `0`).getReg();
3360	const Register SrcReg = I.getOperand(i: `1`).getReg();
3361
3362	const RegisterBank &RBDst = *RBI.getRegBank(Reg: DstReg, MRI, TRI);
3363	if (RBDst.getID() != AArch64::GPRRegBankID) {
3364	LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst
3365	<< ", expected: GPR\n");
3366	return false;
3367	}
3368
3369	const RegisterBank &RBSrc = *RBI.getRegBank(Reg: SrcReg, MRI, TRI);
3370	if (RBSrc.getID() != AArch64::GPRRegBankID) {
3371	LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc
3372	<< ", expected: GPR\n");
3373	return false;
3374	}
3375
3376	const unsigned DstSize = MRI.getType(Reg: DstReg).getSizeInBits();
3377
3378	if (DstSize == `0`) {
3379	LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n");
3380	return false;
3381	}
3382
3383	if (DstSize != `64` && DstSize > `32`) {
3384	LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize
3385	<< ", expected: 32 or 64\n");
3386	return false;
3387	}
3388	// At this point G_ANYEXT is just like a plain COPY, but we need
3389	// to explicitly form the 64-bit value if any.
3390	if (DstSize > `32`) {
3391	Register ExtSrc = MRI.createVirtualRegister(RegClass: &AArch64::GPR64allRegClass);
3392	BuildMI(BB&: MBB, I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: AArch64::SUBREG_TO_REG))
3393	.addDef(RegNo: ExtSrc)
3394	.addImm(Val: `0`)
3395	.addUse(RegNo: SrcReg)
3396	.addImm(Val: AArch64::sub_32);
3397	I.getOperand(i: `1`).setReg(ExtSrc);
3398	}
3399	return selectCopy(I, TII, MRI, TRI, RBI);
3400	}
3401
3402	case TargetOpcode::G_ZEXT:
3403	case TargetOpcode::G_SEXT_INREG:
3404	case TargetOpcode::G_SEXT: {
3405	if (selectUSMovFromExtend(I, MRI))
3406	return true;
3407
3408	unsigned Opcode = I.getOpcode();
3409	const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
3410	const Register DefReg = I.getOperand(i: `0`).getReg();
3411	Register SrcReg = I.getOperand(i: `1`).getReg();
3412	const LLT DstTy = MRI.getType(Reg: DefReg);
3413	const LLT SrcTy = MRI.getType(Reg: SrcReg);
3414	unsigned DstSize = DstTy.getSizeInBits();
3415	unsigned SrcSize = SrcTy.getSizeInBits();
3416
3417	// SEXT_INREG has the same src reg size as dst, the size of the value to be
3418	// extended is encoded in the imm.
3419	if (Opcode == TargetOpcode::G_SEXT_INREG)
3420	SrcSize = I.getOperand(i: `2`).getImm();
3421
3422	if (DstTy.isVector())
3423	return false; // Should be handled by imported patterns.
3424
3425	assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==
3426	AArch64::GPRRegBankID &&
3427	"Unexpected ext regbank");
3428
3429	MachineInstr *ExtI;
3430
3431	// First check if we're extending the result of a load which has a dest type
3432	// smaller than 32 bits, then this zext is redundant. GPR32 is the smallest
3433	// GPR register on AArch64 and all loads which are smaller automatically
3434	// zero-extend the upper bits. E.g.
3435	// %v(s8) = G_LOAD %p, :: (load 1)
3436	// %v2(s32) = G_ZEXT %v(s8)
3437	if (!IsSigned) {
3438	auto *LoadMI = getOpcodeDef(Opcode: TargetOpcode::G_LOAD, Reg: SrcReg, MRI);
3439	bool IsGPR =
3440	RBI.getRegBank(Reg: SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID;
3441	if (LoadMI && IsGPR) {
3442	const MachineMemOperand MemOp = LoadMI->memoperands_begin();
3443	unsigned BytesLoaded = MemOp->getSize().getValue();
3444	if (BytesLoaded < `4` && SrcTy.getSizeInBytes() == BytesLoaded)
3445	return selectCopy(I, TII, MRI, TRI, RBI);
3446	}
3447
3448	// For the 32-bit -> 64-bit case, we can emit a mov (ORRWrs)
3449	// + SUBREG_TO_REG.
3450	if (IsGPR && SrcSize == `32` && DstSize == `64`) {
3451	Register SubregToRegSrc =
3452	MRI.createVirtualRegister(RegClass: &AArch64::GPR32RegClass);
3453	const Register ZReg = AArch64::WZR;
3454	MIB.buildInstr(Opc: AArch64::ORRWrs, DstOps: {SubregToRegSrc}, SrcOps: {ZReg, SrcReg})
3455	.addImm(Val: `0`);
3456
3457	MIB.buildInstr(Opc: AArch64::SUBREG_TO_REG, DstOps: {DefReg}, SrcOps: {})
3458	.addImm(Val: `0`)
3459	.addUse(RegNo: SubregToRegSrc)
3460	.addImm(Val: AArch64::sub_32);
3461
3462	if (!RBI.constrainGenericRegister(Reg: DefReg, RC: AArch64::GPR64RegClass,
3463	MRI)) {
3464	LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n");
3465	return false;
3466	}
3467
3468	if (!RBI.constrainGenericRegister(Reg: SrcReg, RC: AArch64::GPR32RegClass,
3469	MRI)) {
3470	LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n");
3471	return false;
3472	}
3473
3474	I.eraseFromParent();
3475	return true;
3476	}
3477	}
3478
3479	if (DstSize == `64`) {
3480	if (Opcode != TargetOpcode::G_SEXT_INREG) {
3481	// FIXME: Can we avoid manually doing this?
3482	if (!RBI.constrainGenericRegister(Reg: SrcReg, RC: AArch64::GPR32RegClass,
3483	MRI)) {
3484	LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)
3485	<< " operand\n");
3486	return false;
3487	}
3488	SrcReg = MIB.buildInstr(Opc: AArch64::SUBREG_TO_REG,
3489	DstOps: {&AArch64::GPR64RegClass}, SrcOps: {})
3490	.addImm(Val: `0`)
3491	.addUse(RegNo: SrcReg)
3492	.addImm(Val: AArch64::sub_32)
3493	.getReg(Idx: `0`);
3494	}
3495
3496	ExtI = MIB.buildInstr(Opc: IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
3497	DstOps: {DefReg}, SrcOps: {SrcReg})
3498	.addImm(Val: `0`)
3499	.addImm(Val: SrcSize - `1`);
3500	} else if (DstSize <= `32`) {
3501	ExtI = MIB.buildInstr(Opc: IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
3502	DstOps: {DefReg}, SrcOps: {SrcReg})
3503	.addImm(Val: `0`)
3504	.addImm(Val: SrcSize - `1`);
3505	} else {
3506	return false;
3507	}
3508
3509	constrainSelectedInstRegOperands(I&: *ExtI, TII, TRI, RBI);
3510	I.eraseFromParent();
3511	return true;
3512	}
3513
3514	case TargetOpcode::G_SITOFP:
3515	case TargetOpcode::G_UITOFP:
3516	case TargetOpcode::G_FPTOSI:
3517	case TargetOpcode::G_FPTOUI: {
3518	const LLT DstTy = MRI.getType(Reg: I.getOperand(i: `0`).getReg()),
3519	SrcTy = MRI.getType(Reg: I.getOperand(i: `1`).getReg());
3520	const unsigned NewOpc = selectFPConvOpc(GenericOpc: Opcode, DstTy, SrcTy);
3521	if (NewOpc == Opcode)
3522	return false;
3523
3524	I.setDesc(TII.get(Opcode: NewOpc));
3525	constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3526	I.setFlags(MachineInstr::NoFPExcept);
3527
3528	return true;
3529	}
3530
3531	case TargetOpcode::G_FREEZE:
3532	return selectCopy(I, TII, MRI, TRI, RBI);
3533
3534	case TargetOpcode::G_INTTOPTR:
3535	// The importer is currently unable to import pointer types since they
3536	// didn't exist in SelectionDAG.
3537	return selectCopy(I, TII, MRI, TRI, RBI);
3538
3539	case TargetOpcode::G_BITCAST:
3540	// Imported SelectionDAG rules can handle every bitcast except those that
3541	// bitcast from a type to the same type. Ideally, these shouldn't occur
3542	// but we might not run an optimizer that deletes them. The other exception
3543	// is bitcasts involving pointer types, as SelectionDAG has no knowledge
3544	// of them.
3545	return selectCopy(I, TII, MRI, TRI, RBI);
3546
3547	case TargetOpcode::G_SELECT: {
3548	auto &Sel = cast<GSelect>(Val&: I);
3549	const Register CondReg = Sel.getCondReg();
3550	const Register TReg = Sel.getTrueReg();
3551	const Register FReg = Sel.getFalseReg();
3552
3553	if (tryOptSelect(Sel))
3554	return true;
3555
3556	// Make sure to use an unused vreg instead of wzr, so that the peephole
3557	// optimizations will be able to optimize these.
3558	Register DeadVReg = MRI.createVirtualRegister(RegClass: &AArch64::GPR32RegClass);
3559	auto TstMI = MIB.buildInstr(Opc: AArch64::ANDSWri, DstOps: {DeadVReg}, SrcOps: {CondReg})
3560	.addImm(Val: AArch64_AM::encodeLogicalImmediate(imm: `1`, regSize: `32`));
3561	constrainSelectedInstRegOperands(I&: *TstMI, TII, TRI, RBI);
3562	if (!emitSelect(Dst: Sel.getReg(Idx: `0`), True: TReg, False: FReg, CC: AArch64CC::NE, MIB))
3563	return false;
3564	Sel.eraseFromParent();
3565	return true;
3566	}
3567	case TargetOpcode::G_ICMP: {
3568	if (Ty.isVector())
3569	return false;
3570
3571	if (Ty != LLT::scalar(SizeInBits: `32`)) {
3572	LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty
3573	<< ", expected: " << LLT::scalar(`32`) << `'\n'`);
3574	return false;
3575	}
3576
3577	auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(i: `1`).getPredicate());
3578	const AArch64CC::CondCode InvCC =
3579	changeICMPPredToAArch64CC(P: CmpInst::getInversePredicate(pred: Pred));
3580	emitIntegerCompare(LHS&: I.getOperand(i: `2`), RHS&: I.getOperand(i: `3`), Predicate&: I.getOperand(i: `1`), MIRBuilder&: MIB);
3581	emitCSINC(/Dst=/I.getOperand(i: `0`).getReg(), /Src1=/AArch64::WZR,
3582	/Src2=/AArch64::WZR, Pred: InvCC, MIRBuilder&: MIB);
3583	I.eraseFromParent();
3584	return true;
3585	}
3586
3587	case TargetOpcode::G_FCMP: {
3588	CmpInst::Predicate Pred =
3589	static_cast<CmpInst::Predicate>(I.getOperand(i: `1`).getPredicate());
3590	if (!emitFPCompare(LHS: I.getOperand(i: `2`).getReg(), RHS: I.getOperand(i: `3`).getReg(), MIRBuilder&: MIB,
3591	Pred) \|\|
3592	!emitCSetForFCmp(Dst: I.getOperand(i: `0`).getReg(), Pred, MIRBuilder&: MIB))
3593	return false;
3594	I.eraseFromParent();
3595	return true;
3596	}
3597	case TargetOpcode::G_VASTART:
3598	return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
3599	: selectVaStartAAPCS(I, MF, MRI);
3600	case TargetOpcode::G_INTRINSIC:
3601	return selectIntrinsic(I, MRI);
3602	case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3603	return selectIntrinsicWithSideEffects(I, MRI);
3604	case TargetOpcode::G_IMPLICIT_DEF: {
3605	I.setDesc(TII.get(Opcode: TargetOpcode::IMPLICIT_DEF));
3606	const LLT DstTy = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
3607	const Register DstReg = I.getOperand(i: `0`).getReg();
3608	const RegisterBank &DstRB = *RBI.getRegBank(Reg: DstReg, MRI, TRI);
3609	const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(Ty: DstTy, RB: DstRB);
3610	RBI.constrainGenericRegister(Reg: DstReg, RC: *DstRC, MRI);
3611	return true;
3612	}
3613	case TargetOpcode::G_BLOCK_ADDR: {
3614	Function *BAFn = I.getOperand(i: `1`).getBlockAddress()->getFunction();
3615	if (std::optional<uint16_t> BADisc =
3616	STI.getPtrAuthBlockAddressDiscriminatorIfEnabled(ParentFn: *BAFn)) {
3617	MIB.buildInstr(Opc: TargetOpcode::IMPLICIT_DEF, DstOps: {AArch64::X16}, SrcOps: {});
3618	MIB.buildInstr(Opc: TargetOpcode::IMPLICIT_DEF, DstOps: {AArch64::X17}, SrcOps: {});
3619	MIB.buildInstr(Opcode: AArch64::MOVaddrPAC)
3620	.addBlockAddress(BA: I.getOperand(i: `1`).getBlockAddress())
3621	.addImm(Val: AArch64PACKey::IA)
3622	.addReg(/AddrDisc=/RegNo: AArch64::XZR)
3623	.addImm(Val: *BADisc)
3624	.constrainAllUses(TII, TRI, RBI);
3625	MIB.buildCopy(Res: I.getOperand(i: `0`).getReg(), Op: Register (AArch64::X16));
3626	RBI.constrainGenericRegister(Reg: I.getOperand(i: `0`).getReg(),
3627	RC: AArch64::GPR64RegClass, MRI);
3628	I.eraseFromParent();
3629	return true;
3630	}
3631	if (TM.getCodeModel() == CodeModel::Large && !TM.isPositionIndependent()) {
3632	materializeLargeCMVal(I, V: I.getOperand(i: `1`).getBlockAddress(), OpFlags: `0`);
3633	I.eraseFromParent();
3634	return true;
3635	} else {
3636	I.setDesc(TII.get(Opcode: AArch64::MOVaddrBA));
3637	auto MovMI = BuildMI(BB&: MBB, I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: AArch64::MOVaddrBA),
3638	DestReg: I.getOperand(i: `0`).getReg())
3639	.addBlockAddress(BA: I.getOperand(i: `1`).getBlockAddress(),
3640	/ Offset / `0`, TargetFlags: AArch64II::MO_PAGE)
3641	.addBlockAddress(
3642	BA: I.getOperand(i: `1`).getBlockAddress(), / Offset / `0`,
3643	TargetFlags: AArch64II::MO_NC \| AArch64II::MO_PAGEOFF);
3644	I.eraseFromParent();
3645	return constrainSelectedInstRegOperands(I&: *MovMI, TII, TRI, RBI);
3646	}
3647	}
3648	case AArch64::G_DUP: {
3649	// When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by
3650	// imported patterns. Do it manually here. Avoiding generating s16 gpr is
3651	// difficult because at RBS we may end up pessimizing the fpr case if we
3652	// decided to add an anyextend to fix this. Manual selection is the most
3653	// robust solution for now.
3654	if (RBI.getRegBank(Reg: I.getOperand(i: `1`).getReg(), MRI, TRI)->getID() !=
3655	AArch64::GPRRegBankID)
3656	return false; // We expect the fpr regbank case to be imported.
3657	LLT VecTy = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
3658	if (VecTy == LLT::fixed_vector(NumElements: `8`, ScalarSizeInBits: `8`))
3659	I.setDesc(TII.get(Opcode: AArch64::DUPv8i8gpr));
3660	else if (VecTy == LLT::fixed_vector(NumElements: `16`, ScalarSizeInBits: `8`))
3661	I.setDesc(TII.get(Opcode: AArch64::DUPv16i8gpr));
3662	else if (VecTy == LLT::fixed_vector(NumElements: `4`, ScalarSizeInBits: `16`))
3663	I.setDesc(TII.get(Opcode: AArch64::DUPv4i16gpr));
3664	else if (VecTy == LLT::fixed_vector(NumElements: `8`, ScalarSizeInBits: `16`))
3665	I.setDesc(TII.get(Opcode: AArch64::DUPv8i16gpr));
3666	else
3667	return false;
3668	return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3669	}
3670	case TargetOpcode::G_BUILD_VECTOR:
3671	return selectBuildVector(I, MRI);
3672	case TargetOpcode::G_MERGE_VALUES:
3673	return selectMergeValues(I, MRI);
3674	case TargetOpcode::G_UNMERGE_VALUES:
3675	return selectUnmergeValues(I, MRI);
3676	case TargetOpcode::G_SHUFFLE_VECTOR:
3677	return selectShuffleVector(I, MRI);
3678	case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3679	return selectExtractElt(I, MRI);
3680	case TargetOpcode::G_CONCAT_VECTORS:
3681	return selectConcatVectors(I, MRI);
3682	case TargetOpcode::G_JUMP_TABLE:
3683	return selectJumpTable(I, MRI);
3684	case TargetOpcode::G_MEMCPY:
3685	case TargetOpcode::G_MEMCPY_INLINE:
3686	case TargetOpcode::G_MEMMOVE:
3687	case TargetOpcode::G_MEMSET:
3688	assert(STI.hasMOPS() && "Shouldn't get here without +mops feature");
3689	return selectMOPS(I, MRI);
3690	}
3691
3692	return false;
3693	}
3694
3695	bool AArch64InstructionSelector::selectAndRestoreState(MachineInstr &I) {
3696	MachineIRBuilderState OldMIBState = MIB.getState();
3697	bool Success = select(I);
3698	MIB.setState(OldMIBState);
3699	return Success;
3700	}
3701
3702	bool AArch64InstructionSelector::selectMOPS(MachineInstr &GI,
3703	MachineRegisterInfo &MRI) {
3704	unsigned Mopcode;
3705	switch (GI.getOpcode()) {
3706	case TargetOpcode::G_MEMCPY:
3707	case TargetOpcode::G_MEMCPY_INLINE:
3708	Mopcode = AArch64::MOPSMemoryCopyPseudo;
3709	break;
3710	case TargetOpcode::G_MEMMOVE:
3711	Mopcode = AArch64::MOPSMemoryMovePseudo;
3712	break;
3713	case TargetOpcode::G_MEMSET:
3714	// For tagged memset see llvm.aarch64.mops.memset.tag
3715	Mopcode = AArch64::MOPSMemorySetPseudo;
3716	break;
3717	}
3718
3719	auto &DstPtr = GI.getOperand(i: `0`);
3720	auto &SrcOrVal = GI.getOperand(i: `1`);
3721	auto &Size = GI.getOperand(i: `2`);
3722
3723	// Create copies of the registers that can be clobbered.
3724	const Register DstPtrCopy = MRI.cloneVirtualRegister(VReg: DstPtr.getReg());
3725	const Register SrcValCopy = MRI.cloneVirtualRegister(VReg: SrcOrVal.getReg());
3726	const Register SizeCopy = MRI.cloneVirtualRegister(VReg: Size.getReg());
3727
3728	const bool IsSet = Mopcode == AArch64::MOPSMemorySetPseudo;
3729	const auto &SrcValRegClass =
3730	IsSet ? AArch64::GPR64RegClass : AArch64::GPR64commonRegClass;
3731
3732	// Constrain to specific registers
3733	RBI.constrainGenericRegister(Reg: DstPtrCopy, RC: AArch64::GPR64commonRegClass, MRI);
3734	RBI.constrainGenericRegister(Reg: SrcValCopy, RC: SrcValRegClass, MRI);
3735	RBI.constrainGenericRegister(Reg: SizeCopy, RC: AArch64::GPR64RegClass, MRI);
3736
3737	MIB.buildCopy(Res: DstPtrCopy, Op: DstPtr);
3738	MIB.buildCopy(Res: SrcValCopy, Op: SrcOrVal);
3739	MIB.buildCopy(Res: SizeCopy, Op: Size);
3740
3741	// New instruction uses the copied registers because it must update them.
3742	// The defs are not used since they don't exist in G_MEM. They are still*
3743	// tied.
3744	// Note: order of operands is different from G_MEMSET, G_MEMCPY, G_MEMMOVE
3745	Register DefDstPtr = MRI.createVirtualRegister(RegClass: &AArch64::GPR64commonRegClass);
3746	Register DefSize = MRI.createVirtualRegister(RegClass: &AArch64::GPR64RegClass);
3747	if (IsSet) {
3748	MIB.buildInstr(Opc: Mopcode, DstOps: {DefDstPtr, DefSize},
3749	SrcOps: {DstPtrCopy, SizeCopy, SrcValCopy});
3750	} else {
3751	Register DefSrcPtr = MRI.createVirtualRegister(RegClass: &SrcValRegClass);
3752	MIB.buildInstr(Opc: Mopcode, DstOps: {DefDstPtr, DefSrcPtr, DefSize},
3753	SrcOps: {DstPtrCopy, SrcValCopy, SizeCopy});
3754	}
3755
3756	GI.eraseFromParent();
3757	return true;
3758	}
3759
3760	bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
3761	MachineRegisterInfo &MRI) {
3762	assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT");
3763	Register JTAddr = I.getOperand(i: `0`).getReg();
3764	unsigned JTI = I.getOperand(i: `1`).getIndex();
3765	Register Index = I.getOperand(i: `2`).getReg();
3766
3767	MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(Idx: JTI, Size: `4`, PCRelSym: nullptr);
3768
3769	// With aarch64-jump-table-hardening, we only expand the jump table dispatch
3770	// sequence later, to guarantee the integrity of the intermediate values.
3771	if (MF->getFunction().hasFnAttribute(Kind: "aarch64-jump-table-hardening")) {
3772	CodeModel::Model CM = TM.getCodeModel();
3773	if (STI.isTargetMachO()) {
3774	if (CM != CodeModel::Small && CM != CodeModel::Large)
3775	report_fatal_error(reason: "Unsupported code-model for hardened jump-table");
3776	} else {
3777	// Note that COFF support would likely also need JUMP_TABLE_DEBUG_INFO.
3778	assert(STI.isTargetELF() &&
3779	"jump table hardening only supported on MachO/ELF");
3780	if (CM != CodeModel::Small)
3781	report_fatal_error(reason: "Unsupported code-model for hardened jump-table");
3782	}
3783
3784	MIB.buildCopy(Res: {AArch64::X16}, Op: I.getOperand(i: `2`).getReg());
3785	MIB.buildInstr(Opcode: AArch64::BR_JumpTable)
3786	.addJumpTableIndex(Idx: I.getOperand(i: `1`).getIndex());
3787	I.eraseFromParent();
3788	return true;
3789	}
3790
3791	Register TargetReg = MRI.createVirtualRegister(RegClass: &AArch64::GPR64RegClass);
3792	Register ScratchReg = MRI.createVirtualRegister(RegClass: &AArch64::GPR64spRegClass);
3793
3794	auto JumpTableInst = MIB.buildInstr(Opc: AArch64::JumpTableDest32,
3795	DstOps: {TargetReg, ScratchReg}, SrcOps: {JTAddr, Index})
3796	.addJumpTableIndex(Idx: JTI);
3797	// Save the jump table info.
3798	MIB.buildInstr(Opc: TargetOpcode::JUMP_TABLE_DEBUG_INFO, DstOps: {},
3799	SrcOps: {static_cast<int64_t>(JTI)});
3800	// Build the indirect branch.
3801	MIB.buildInstr(Opc: AArch64::BR, DstOps: {}, SrcOps: {TargetReg});
3802	I.eraseFromParent();
3803	return constrainSelectedInstRegOperands(I&: *JumpTableInst, TII, TRI, RBI);
3804	}
3805
3806	bool AArch64InstructionSelector::selectJumpTable(MachineInstr &I,
3807	MachineRegisterInfo &MRI) {
3808	assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table");
3809	assert(I.getOperand(`1`).isJTI() && "Jump table op should have a JTI!");
3810
3811	Register DstReg = I.getOperand(i: `0`).getReg();
3812	unsigned JTI = I.getOperand(i: `1`).getIndex();
3813	// We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
3814	auto MovMI =
3815	MIB.buildInstr(Opc: AArch64::MOVaddrJT, DstOps: {DstReg}, SrcOps: {})
3816	.addJumpTableIndex(Idx: JTI, TargetFlags: AArch64II::MO_PAGE)
3817	.addJumpTableIndex(Idx: JTI, TargetFlags: AArch64II::MO_NC \| AArch64II::MO_PAGEOFF);
3818	I.eraseFromParent();
3819	return constrainSelectedInstRegOperands(I&: *MovMI, TII, TRI, RBI);
3820	}
3821
3822	bool AArch64InstructionSelector::selectTLSGlobalValue(
3823	MachineInstr &I, MachineRegisterInfo &MRI) {
3824	if (!STI.isTargetMachO())
3825	return false;
3826	MachineFunction &MF = *I.getParent()->getParent();
3827	MF.getFrameInfo().setAdjustsStack(true);
3828
3829	const auto &GlobalOp = I.getOperand(i: `1`);
3830	assert(GlobalOp.getOffset() == `0` &&
3831	"Shouldn't have an offset on TLS globals!");
3832	const GlobalValue &GV = *GlobalOp.getGlobal();
3833
3834	auto LoadGOT =
3835	MIB.buildInstr(Opc: AArch64::LOADgot, DstOps: {&AArch64::GPR64commonRegClass}, SrcOps: {})
3836	.addGlobalAddress(GV: &GV, Offset: `0`, TargetFlags: AArch64II::MO_TLS);
3837
3838	auto Load = MIB.buildInstr(Opc: AArch64::LDRXui, DstOps: {&AArch64::GPR64commonRegClass},
3839	SrcOps: {LoadGOT.getReg(Idx: `0`)})
3840	.addImm(Val: `0`);
3841
3842	MIB.buildCopy(Res: Register (AArch64::X0), Op: LoadGOT.getReg(Idx: `0`));
3843	// TLS calls preserve all registers except those that absolutely must be
3844	// trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
3845	// silly).
3846	unsigned Opcode = getBLRCallOpcode(MF);
3847
3848	// With ptrauth-calls, the tlv access thunk pointer is authenticated (IA, 0).
3849	if (MF.getFunction().hasFnAttribute(Kind: "ptrauth-calls")) {
3850	assert(Opcode == AArch64::BLR);
3851	Opcode = AArch64::BLRAAZ;
3852	}
3853
3854	MIB.buildInstr(Opc: Opcode, DstOps: {}, SrcOps: {Load})
3855	.addUse(RegNo: AArch64::X0, Flags: RegState::Implicit)
3856	.addDef(RegNo: AArch64::X0, Flags: RegState::Implicit)
3857	.addRegMask(Mask: TRI.getTLSCallPreservedMask());
3858
3859	MIB.buildCopy(Res: I.getOperand(i: `0`).getReg(), Op: Register (AArch64::X0));
3860	RBI.constrainGenericRegister(Reg: I.getOperand(i: `0`).getReg(), RC: AArch64::GPR64RegClass,
3861	MRI);
3862	I.eraseFromParent();
3863	return true;
3864	}
3865
3866	MachineInstr *AArch64InstructionSelector::emitScalarToVector(
3867	unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
3868	MachineIRBuilder &MIRBuilder) const {
3869	auto Undef = MIRBuilder.buildInstr(Opc: TargetOpcode::IMPLICIT_DEF, DstOps: {DstRC}, SrcOps: {});
3870
3871	auto BuildFn = [&](unsigned SubregIndex) {
3872	auto Ins =
3873	MIRBuilder
3874	.buildInstr(Opc: TargetOpcode::INSERT_SUBREG, DstOps: {DstRC}, SrcOps: {Undef, Scalar})
3875	.addImm(Val: SubregIndex);
3876	constrainSelectedInstRegOperands(I&: *Undef, TII, TRI, RBI);
3877	constrainSelectedInstRegOperands(I&: *Ins, TII, TRI, RBI);
3878	return &*Ins;
3879	};
3880
3881	switch (EltSize) {
3882	case `8`:
3883	return BuildFn (AArch64::bsub);
3884	case `16`:
3885	return BuildFn (AArch64::hsub);
3886	case `32`:
3887	return BuildFn (AArch64::ssub);
3888	case `64`:
3889	return BuildFn (AArch64::dsub);
3890	default:
3891	return nullptr;
3892	}
3893	}
3894
3895	MachineInstr *
3896	AArch64InstructionSelector::emitNarrowVector(Register DstReg, Register SrcReg,
3897	MachineIRBuilder &MIB,
3898	MachineRegisterInfo &MRI) const {
3899	LLT DstTy = MRI.getType(Reg: DstReg);
3900	const TargetRegisterClass *RC =
3901	getRegClassForTypeOnBank(Ty: DstTy, RB: *RBI.getRegBank(Reg: SrcReg, MRI, TRI));
3902	if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
3903	LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
3904	return nullptr;
3905	}
3906	unsigned SubReg = `0`;
3907	if (!getSubRegForClass(RC, TRI, SubReg))
3908	return nullptr;
3909	if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
3910	LLVM_DEBUG(dbgs() << "Unsupported destination size! ("
3911	<< DstTy.getSizeInBits() << "\n");
3912	return nullptr;
3913	}
3914	auto Copy = MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {DstReg}, SrcOps: {})
3915	.addReg(RegNo: SrcReg, flags: `0`, SubReg);
3916	RBI.constrainGenericRegister(Reg: DstReg, RC: *RC, MRI);
3917	return Copy;
3918	}
3919
3920	bool AArch64InstructionSelector::selectMergeValues(
3921	MachineInstr &I, MachineRegisterInfo &MRI) {
3922	assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode");
3923	const LLT DstTy = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
3924	const LLT SrcTy = MRI.getType(Reg: I.getOperand(i: `1`).getReg());
3925	assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation");
3926	const RegisterBank &RB = *RBI.getRegBank(Reg: I.getOperand(i: `1`).getReg(), MRI, TRI);
3927
3928	if (I.getNumOperands() != `3`)
3929	return false;
3930
3931	// Merging 2 s64s into an s128.
3932	if (DstTy == LLT::scalar(SizeInBits: `128`)) {
3933	if (SrcTy.getSizeInBits() != `64`)
3934	return false;
3935	Register DstReg = I.getOperand(i: `0`).getReg();
3936	Register Src1Reg = I.getOperand(i: `1`).getReg();
3937	Register Src2Reg = I.getOperand(i: `2`).getReg();
3938	auto Tmp = MIB.buildInstr(Opc: TargetOpcode::IMPLICIT_DEF, DstOps: {DstTy}, SrcOps: {});
3939	MachineInstr *InsMI = emitLaneInsert(DstReg: std::nullopt, SrcReg: Tmp.getReg(Idx: `0`), EltReg: Src1Reg,
3940	/ LaneIdx / `0`, RB, MIRBuilder&: MIB);
3941	if (!InsMI)
3942	return false;
3943	MachineInstr *Ins2MI = emitLaneInsert(DstReg, SrcReg: InsMI->getOperand(i: `0`).getReg(),
3944	EltReg: Src2Reg, / LaneIdx / `1`, RB, MIRBuilder&: MIB);
3945	if (!Ins2MI)
3946	return false;
3947	constrainSelectedInstRegOperands(I&: *InsMI, TII, TRI, RBI);
3948	constrainSelectedInstRegOperands(I&: *Ins2MI, TII, TRI, RBI);
3949	I.eraseFromParent();
3950	return true;
3951	}
3952
3953	if (RB.getID() != AArch64::GPRRegBankID)
3954	return false;
3955
3956	if (DstTy.getSizeInBits() != `64` \|\| SrcTy.getSizeInBits() != `32`)
3957	return false;
3958
3959	auto *DstRC = &AArch64::GPR64RegClass;
3960	Register SubToRegDef = MRI.createVirtualRegister(RegClass: DstRC);
3961	MachineInstr &SubRegMI = BuildMI(BB&: I.getParent(), I, MIMD: I.getDebugLoc(),
3962	MCID: TII.get(Opcode: TargetOpcode::SUBREG_TO_REG))
3963	.addDef(RegNo: SubToRegDef)
3964	.addImm(Val: `0`)
3965	.addUse(RegNo: I.getOperand(i: `1`).getReg())
3966	.addImm(Val: AArch64::sub_32);
3967	Register SubToRegDef2 = MRI.createVirtualRegister(RegClass: DstRC);
3968	// Need to anyext the second scalar before we can use bfm
3969	MachineInstr &SubRegMI2 = BuildMI(BB&: I.getParent(), I, MIMD: I.getDebugLoc(),
3970	MCID: TII.get(Opcode: TargetOpcode::SUBREG_TO_REG))
3971	.addDef(RegNo: SubToRegDef2)
3972	.addImm(Val: `0`)
3973	.addUse(RegNo: I.getOperand(i: `2`).getReg())
3974	.addImm(Val: AArch64::sub_32);
3975	MachineInstr &BFM =
3976	BuildMI(BB&: I.getParent(), I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: AArch64::BFMXri))
3977	.addDef(RegNo: I.getOperand(i: `0`).getReg())
3978	.addUse(RegNo: SubToRegDef)
3979	.addUse(RegNo: SubToRegDef2)
3980	.addImm(Val: `32`)
3981	.addImm(Val: `31`);
3982	constrainSelectedInstRegOperands(I&: SubRegMI, TII, TRI, RBI);
3983	constrainSelectedInstRegOperands(I&: SubRegMI2, TII, TRI, RBI);
3984	constrainSelectedInstRegOperands(I&: BFM, TII, TRI, RBI);
3985	I.eraseFromParent();
3986	return true;
3987	}
3988
3989	static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
3990	const unsigned EltSize) {
3991	// Choose a lane copy opcode and subregister based off of the size of the
3992	// vector's elements.
3993	switch (EltSize) {
3994	case `8`:
3995	CopyOpc = AArch64::DUPi8;
3996	ExtractSubReg = AArch64::bsub;
3997	break;
3998	case `16`:
3999	CopyOpc = AArch64::DUPi16;
4000	ExtractSubReg = AArch64::hsub;
4001	break;
4002	case `32`:
4003	CopyOpc = AArch64::DUPi32;
4004	ExtractSubReg = AArch64::ssub;
4005	break;
4006	case `64`:
4007	CopyOpc = AArch64::DUPi64;
4008	ExtractSubReg = AArch64::dsub;
4009	break;
4010	default:
4011	// Unknown size, bail out.
4012	LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n");
4013	return false;
4014	}
4015	return true;
4016	}
4017
4018	MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
4019	std::optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
4020	Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
4021	MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4022	unsigned CopyOpc = `0`;
4023	unsigned ExtractSubReg = `0`;
4024	if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, EltSize: ScalarTy.getSizeInBits())) {
4025	LLVM_DEBUG(
4026	dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
4027	return nullptr;
4028	}
4029
4030	const TargetRegisterClass *DstRC =
4031	getRegClassForTypeOnBank(Ty: ScalarTy, RB: DstRB, GetAllRegSet: true);
4032	if (!DstRC) {
4033	LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
4034	return nullptr;
4035	}
4036
4037	const RegisterBank &VecRB = *RBI.getRegBank(Reg: VecReg, MRI, TRI);
4038	const LLT &VecTy = MRI.getType(Reg: VecReg);
4039	const TargetRegisterClass *VecRC =
4040	getRegClassForTypeOnBank(Ty: VecTy, RB: VecRB, GetAllRegSet: true);
4041	if (!VecRC) {
4042	LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
4043	return nullptr;
4044	}
4045
4046	// The register that we're going to copy into.
4047	Register InsertReg = VecReg;
4048	if (!DstReg)
4049	DstReg = MRI.createVirtualRegister(RegClass: DstRC);
4050	// If the lane index is 0, we just use a subregister COPY.
4051	if (LaneIdx == `0`) {
4052	auto Copy = MIRBuilder.buildInstr(Opc: TargetOpcode::COPY, DstOps: {*DstReg}, SrcOps: {})
4053	.addReg(RegNo: VecReg, flags: `0`, SubReg: ExtractSubReg);
4054	RBI.constrainGenericRegister(Reg: DstReg, RC: DstRC, MRI);
4055	return &*Copy;
4056	}
4057
4058	// Lane copies require 128-bit wide registers. If we're dealing with an
4059	// unpacked vector, then we need to move up to that width. Insert an implicit
4060	// def and a subregister insert to get us there.
4061	if (VecTy.getSizeInBits() != `128`) {
4062	MachineInstr *ScalarToVector = emitScalarToVector(
4063	EltSize: VecTy.getSizeInBits(), DstRC: &AArch64::FPR128RegClass, Scalar: VecReg, MIRBuilder);
4064	if (!ScalarToVector)
4065	return nullptr;
4066	InsertReg = ScalarToVector->getOperand(i: `0`).getReg();
4067	}
4068
4069	MachineInstr *LaneCopyMI =
4070	MIRBuilder.buildInstr(Opc: CopyOpc, DstOps: {*DstReg}, SrcOps: {InsertReg}).addImm(Val: LaneIdx);
4071	constrainSelectedInstRegOperands(I&: *LaneCopyMI, TII, TRI, RBI);
4072
4073	// Make sure that we actually constrain the initial copy.
4074	RBI.constrainGenericRegister(Reg: DstReg, RC: DstRC, MRI);
4075	return LaneCopyMI;
4076	}
4077
4078	bool AArch64InstructionSelector::selectExtractElt(
4079	MachineInstr &I, MachineRegisterInfo &MRI) {
4080	assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
4081	"unexpected opcode!");
4082	Register DstReg = I.getOperand(i: `0`).getReg();
4083	const LLT NarrowTy = MRI.getType(Reg: DstReg);
4084	const Register SrcReg = I.getOperand(i: `1`).getReg();
4085	const LLT WideTy = MRI.getType(Reg: SrcReg);
4086	(void)WideTy;
4087	assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
4088	"source register size too small!");
4089	assert(!NarrowTy.isVector() && "cannot extract vector into vector!");
4090
4091	// Need the lane index to determine the correct copy opcode.
4092	MachineOperand &LaneIdxOp = I.getOperand(i: `2`);
4093	assert(LaneIdxOp.isReg() && "Lane index operand was not a register?");
4094
4095	if (RBI.getRegBank(Reg: DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
4096	LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n");
4097	return false;
4098	}
4099
4100	// Find the index to extract from.
4101	auto VRegAndVal = getIConstantVRegValWithLookThrough(VReg: LaneIdxOp.getReg(), MRI);
4102	if (!VRegAndVal)
4103	return false;
4104	unsigned LaneIdx = VRegAndVal ->Value.getSExtValue();
4105
4106
4107	const RegisterBank &DstRB = *RBI.getRegBank(Reg: DstReg, MRI, TRI);
4108	MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, ScalarTy: NarrowTy, VecReg: SrcReg,
4109	LaneIdx, MIRBuilder&: MIB);
4110	if (!Extract)
4111	return false;
4112
4113	I.eraseFromParent();
4114	return true;
4115	}
4116
4117	bool AArch64InstructionSelector::selectSplitVectorUnmerge(
4118	MachineInstr &I, MachineRegisterInfo &MRI) {
4119	unsigned NumElts = I.getNumOperands() - `1`;
4120	Register SrcReg = I.getOperand(i: NumElts).getReg();
4121	const LLT NarrowTy = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
4122	const LLT SrcTy = MRI.getType(Reg: SrcReg);
4123
4124	assert(NarrowTy.isVector() && "Expected an unmerge into vectors");
4125	if (SrcTy.getSizeInBits() > `128`) {
4126	LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge");
4127	return false;
4128	}
4129
4130	// We implement a split vector operation by treating the sub-vectors as
4131	// scalars and extracting them.
4132	const RegisterBank &DstRB =
4133	*RBI.getRegBank(Reg: I.getOperand(i: `0`).getReg(), MRI, TRI);
4134	for (unsigned OpIdx = `0`; OpIdx < NumElts; ++OpIdx) {
4135	Register Dst = I.getOperand(i: OpIdx).getReg();
4136	MachineInstr *Extract =
4137	emitExtractVectorElt(DstReg: Dst, DstRB, ScalarTy: NarrowTy, VecReg: SrcReg, LaneIdx: OpIdx, MIRBuilder&: MIB);
4138	if (!Extract)
4139	return false;
4140	}
4141	I.eraseFromParent();
4142	return true;
4143	}
4144
4145	bool AArch64InstructionSelector::selectUnmergeValues(MachineInstr &I,
4146	MachineRegisterInfo &MRI) {
4147	assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
4148	"unexpected opcode");
4149
4150	// TODO: Handle unmerging into GPRs and from scalars to scalars.
4151	if (RBI.getRegBank(Reg: I.getOperand(i: `0`).getReg(), MRI, TRI)->getID() !=
4152	AArch64::FPRRegBankID \|\|
4153	RBI.getRegBank(Reg: I.getOperand(i: `1`).getReg(), MRI, TRI)->getID() !=
4154	AArch64::FPRRegBankID) {
4155	LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
4156	"currently unsupported.\n");
4157	return false;
4158	}
4159
4160	// The last operand is the vector source register, and every other operand is
4161	// a register to unpack into.
4162	unsigned NumElts = I.getNumOperands() - `1`;
4163	Register SrcReg = I.getOperand(i: NumElts).getReg();
4164	const LLT NarrowTy = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
4165	const LLT WideTy = MRI.getType(Reg: SrcReg);
4166	(void)WideTy;
4167	assert((WideTy.isVector() \|\| WideTy.getSizeInBits() == `128`) &&
4168	"can only unmerge from vector or s128 types!");
4169	assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
4170	"source register size too small!");
4171
4172	if (!NarrowTy.isScalar())
4173	return selectSplitVectorUnmerge(I, MRI);
4174
4175	// Choose a lane copy opcode and subregister based off of the size of the
4176	// vector's elements.
4177	unsigned CopyOpc = `0`;
4178	unsigned ExtractSubReg = `0`;
4179	if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, EltSize: NarrowTy.getSizeInBits()))
4180	return false;
4181
4182	// Set up for the lane copies.
4183	MachineBasicBlock &MBB = *I.getParent();
4184
4185	// Stores the registers we'll be copying from.
4186	SmallVector<Register, `4`> InsertRegs;
4187
4188	// We'll use the first register twice, so we only need NumElts-1 registers.
4189	unsigned NumInsertRegs = NumElts - `1`;
4190
4191	// If our elements fit into exactly 128 bits, then we can copy from the source
4192	// directly. Otherwise, we need to do a bit of setup with some subregister
4193	// inserts.
4194	if (NarrowTy.getSizeInBits() * NumElts == `128`) {
4195	InsertRegs = SmallVector<Register, `4`>(NumInsertRegs, SrcReg);
4196	} else {
4197	// No. We have to perform subregister inserts. For each insert, create an
4198	// implicit def and a subregister insert, and save the register we create.
4199	const TargetRegisterClass *RC = getRegClassForTypeOnBank(
4200	Ty: LLT::fixed_vector(NumElements: NumElts, ScalarSizeInBits: WideTy.getScalarSizeInBits()),
4201	RB: *RBI.getRegBank(Reg: SrcReg, MRI, TRI));
4202	unsigned SubReg = `0`;
4203	bool Found = getSubRegForClass(RC, TRI, SubReg);
4204	(void)Found;
4205	assert(Found && "expected to find last operand's subeg idx");
4206	for (unsigned Idx = `0`; Idx < NumInsertRegs; ++Idx) {
4207	Register ImpDefReg = MRI.createVirtualRegister(RegClass: &AArch64::FPR128RegClass);
4208	MachineInstr &ImpDefMI =
4209	*BuildMI(BB&: MBB, I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: TargetOpcode::IMPLICIT_DEF),
4210	DestReg: ImpDefReg);
4211
4212	// Now, create the subregister insert from SrcReg.
4213	Register InsertReg = MRI.createVirtualRegister(RegClass: &AArch64::FPR128RegClass);
4214	MachineInstr &InsMI =
4215	*BuildMI(BB&: MBB, I, MIMD: I.getDebugLoc(),
4216	MCID: TII.get(Opcode: TargetOpcode::INSERT_SUBREG), DestReg: InsertReg)
4217	.addUse(RegNo: ImpDefReg)
4218	.addUse(RegNo: SrcReg)
4219	.addImm(Val: SubReg);
4220
4221	constrainSelectedInstRegOperands(I&: ImpDefMI, TII, TRI, RBI);
4222	constrainSelectedInstRegOperands(I&: InsMI, TII, TRI, RBI);
4223
4224	// Save the register so that we can copy from it after.
4225	InsertRegs.push_back(Elt: InsertReg);
4226	}
4227	}
4228
4229	// Now that we've created any necessary subregister inserts, we can
4230	// create the copies.
4231	//
4232	// Perform the first copy separately as a subregister copy.
4233	Register CopyTo = I.getOperand(i: `0`).getReg();
4234	auto FirstCopy = MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {CopyTo}, SrcOps: {})
4235	.addReg(RegNo: InsertRegs [`0`], flags: `0`, SubReg: ExtractSubReg);
4236	constrainSelectedInstRegOperands(I&: *FirstCopy, TII, TRI, RBI);
4237
4238	// Now, perform the remaining copies as vector lane copies.
4239	unsigned LaneIdx = `1`;
4240	for (Register InsReg : InsertRegs) {
4241	Register CopyTo = I.getOperand(i: LaneIdx).getReg();
4242	MachineInstr &CopyInst =
4243	*BuildMI(BB&: MBB, I, MIMD: I.getDebugLoc(), MCID: TII.get(Opcode: CopyOpc), DestReg: CopyTo)
4244	.addUse(RegNo: InsReg)
4245	.addImm(Val: LaneIdx);
4246	constrainSelectedInstRegOperands(I&: CopyInst, TII, TRI, RBI);
4247	++LaneIdx;
4248	}
4249
4250	// Separately constrain the first copy's destination. Because of the
4251	// limitation in constrainOperandRegClass, we can't guarantee that this will
4252	// actually be constrained. So, do it ourselves using the second operand.
4253	const TargetRegisterClass *RC =
4254	MRI.getRegClassOrNull(Reg: I.getOperand(i: `1`).getReg());
4255	if (!RC) {
4256	LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n");
4257	return false;
4258	}
4259
4260	RBI.constrainGenericRegister(Reg: CopyTo, RC: *RC, MRI);
4261	I.eraseFromParent();
4262	return true;
4263	}
4264
4265	bool AArch64InstructionSelector::selectConcatVectors(
4266	MachineInstr &I, MachineRegisterInfo &MRI) {
4267	assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
4268	"Unexpected opcode");
4269	Register Dst = I.getOperand(i: `0`).getReg();
4270	Register Op1 = I.getOperand(i: `1`).getReg();
4271	Register Op2 = I.getOperand(i: `2`).getReg();
4272	MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIRBuilder&: MIB);
4273	if (!ConcatMI)
4274	return false;
4275	I.eraseFromParent();
4276	return true;
4277	}
4278
4279	unsigned
4280	AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal,
4281	MachineFunction &MF) const {
4282	Type *CPTy = CPVal->getType();
4283	Align Alignment = MF.getDataLayout().getPrefTypeAlign(Ty: CPTy);
4284
4285	MachineConstantPool *MCP = MF.getConstantPool();
4286	return MCP->getConstantPoolIndex(C: CPVal, Alignment);
4287	}
4288
4289	MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
4290	const Constant CPVal, MachineIRBuilder &MIRBuilder) const* {
4291	const TargetRegisterClass *RC;
4292	unsigned Opc;
4293	bool IsTiny = TM.getCodeModel() == CodeModel::Tiny;
4294	unsigned Size = MIRBuilder.getDataLayout().getTypeStoreSize(Ty: CPVal->getType());
4295	switch (Size) {
4296	case `16`:
4297	RC = &AArch64::FPR128RegClass;
4298	Opc = IsTiny ? AArch64::LDRQl : AArch64::LDRQui;
4299	break;
4300	case `8`:
4301	RC = &AArch64::FPR64RegClass;
4302	Opc = IsTiny ? AArch64::LDRDl : AArch64::LDRDui;
4303	break;
4304	case `4`:
4305	RC = &AArch64::FPR32RegClass;
4306	Opc = IsTiny ? AArch64::LDRSl : AArch64::LDRSui;
4307	break;
4308	case `2`:
4309	RC = &AArch64::FPR16RegClass;
4310	Opc = AArch64::LDRHui;
4311	break;
4312	default:
4313	LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
4314	<< *CPVal->getType());
4315	return nullptr;
4316	}
4317
4318	MachineInstr LoadMI = nullptr*;
4319	auto &MF = MIRBuilder.getMF();
4320	unsigned CPIdx = emitConstantPoolEntry(CPVal, MF);
4321	if (IsTiny && (Size == `16` \|\| Size == `8` \|\| Size == `4`)) {
4322	// Use load(literal) for tiny code model.
4323	LoadMI = &*MIRBuilder.buildInstr(Opc, DstOps: {RC}, SrcOps: {}).addConstantPoolIndex(Idx: CPIdx);
4324	} else {
4325	auto Adrp =
4326	MIRBuilder.buildInstr(Opc: AArch64::ADRP, DstOps: {&AArch64::GPR64RegClass}, SrcOps: {})
4327	.addConstantPoolIndex(Idx: CPIdx, Offset: `0`, TargetFlags: AArch64II::MO_PAGE);
4328
4329	LoadMI = &*MIRBuilder.buildInstr(Opc, DstOps: {RC}, SrcOps: {Adrp})
4330	.addConstantPoolIndex(
4331	Idx: CPIdx, Offset: `0`, TargetFlags: AArch64II::MO_PAGEOFF \| AArch64II::MO_NC);
4332
4333	constrainSelectedInstRegOperands(I&: *Adrp, TII, TRI, RBI);
4334	}
4335
4336	MachinePointerInfo PtrInfo = MachinePointerInfo::getConstantPool(MF);
4337	LoadMI->addMemOperand(MF, MO: MF.getMachineMemOperand(PtrInfo,
4338	F: MachineMemOperand::MOLoad,
4339	Size, BaseAlignment: Align (Size)));
4340	constrainSelectedInstRegOperands(I&: *LoadMI, TII, TRI, RBI);
4341	return LoadMI;
4342	}
4343
4344	/// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
4345	/// size and RB.
4346	static std::pair<unsigned, unsigned>
4347	getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
4348	unsigned Opc, SubregIdx;
4349	if (RB.getID() == AArch64::GPRRegBankID) {
4350	if (EltSize == `8`) {
4351	Opc = AArch64::INSvi8gpr;
4352	SubregIdx = AArch64::bsub;
4353	} else if (EltSize == `16`) {
4354	Opc = AArch64::INSvi16gpr;
4355	SubregIdx = AArch64::ssub;
4356	} else if (EltSize == `32`) {
4357	Opc = AArch64::INSvi32gpr;
4358	SubregIdx = AArch64::ssub;
4359	} else if (EltSize == `64`) {
4360	Opc = AArch64::INSvi64gpr;
4361	SubregIdx = AArch64::dsub;
4362	} else {
4363	llvm_unreachable("invalid elt size!");
4364	}
4365	} else {
4366	if (EltSize == `8`) {
4367	Opc = AArch64::INSvi8lane;
4368	SubregIdx = AArch64::bsub;
4369	} else if (EltSize == `16`) {
4370	Opc = AArch64::INSvi16lane;
4371	SubregIdx = AArch64::hsub;
4372	} else if (EltSize == `32`) {
4373	Opc = AArch64::INSvi32lane;
4374	SubregIdx = AArch64::ssub;
4375	} else if (EltSize == `64`) {
4376	Opc = AArch64::INSvi64lane;
4377	SubregIdx = AArch64::dsub;
4378	} else {
4379	llvm_unreachable("invalid elt size!");
4380	}
4381	}
4382	return std::make_pair(x&: Opc, y&: SubregIdx);
4383	}
4384
4385	MachineInstr *AArch64InstructionSelector::emitInstr(
4386	unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
4387	std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder,
4388	const ComplexRendererFns &RenderFns) const {
4389	assert(Opcode && "Expected an opcode?");
4390	assert(!isPreISelGenericOpcode(Opcode) &&
4391	"Function should only be used to produce selected instructions!");
4392	auto MI = MIRBuilder.buildInstr(Opc: Opcode, DstOps, SrcOps);
4393	if (RenderFns)
4394	for (auto &Fn : *RenderFns)
4395	Fn (MI);
4396	constrainSelectedInstRegOperands(I&: *MI, TII, TRI, RBI);
4397	return &*MI;
4398	}
4399
4400	MachineInstr *AArch64InstructionSelector::emitAddSub(
4401	const std::array<std::array<unsigned, `2`>, `5`> &AddrModeAndSizeToOpcode,
4402	Register Dst, MachineOperand &LHS, MachineOperand &RHS,
4403	MachineIRBuilder &MIRBuilder) const {
4404	MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4405	assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4406	auto Ty = MRI.getType(Reg: LHS.getReg());
4407	assert(!Ty.isVector() && "Expected a scalar or pointer?");
4408	unsigned Size = Ty.getSizeInBits();
4409	assert((Size == `32` \|\| Size == `64`) && "Expected a 32-bit or 64-bit type only");
4410	bool Is32Bit = Size == `32`;
4411
4412	// INSTRri form with positive arithmetic immediate.
4413	if (auto Fns = selectArithImmed(Root&: RHS))
4414	return emitInstr(Opcode: AddrModeAndSizeToOpcode [`0`][Is32Bit], DstOps: {Dst}, SrcOps: {LHS},
4415	MIRBuilder, RenderFns: Fns);
4416
4417	// INSTRri form with negative arithmetic immediate.
4418	if (auto Fns = selectNegArithImmed(Root&: RHS))
4419	return emitInstr(Opcode: AddrModeAndSizeToOpcode [`3`][Is32Bit], DstOps: {Dst}, SrcOps: {LHS},
4420	MIRBuilder, RenderFns: Fns);
4421
4422	// INSTRrx form.
4423	if (auto Fns = selectArithExtendedRegister(Root&: RHS))
4424	return emitInstr(Opcode: AddrModeAndSizeToOpcode [`4`][Is32Bit], DstOps: {Dst}, SrcOps: {LHS},
4425	MIRBuilder, RenderFns: Fns);
4426
4427	// INSTRrs form.
4428	if (auto Fns = selectShiftedRegister(Root&: RHS))
4429	return emitInstr(Opcode: AddrModeAndSizeToOpcode [`1`][Is32Bit], DstOps: {Dst}, SrcOps: {LHS},
4430	MIRBuilder, RenderFns: Fns);
4431	return emitInstr(Opcode: AddrModeAndSizeToOpcode [`2`][Is32Bit], DstOps: {Dst}, SrcOps: {LHS, RHS},
4432	MIRBuilder);
4433	}
4434
4435	MachineInstr *
4436	AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
4437	MachineOperand &RHS,
4438	MachineIRBuilder &MIRBuilder) const {
4439	const std::array<std::array<unsigned, `2`>, `5`> OpcTable{
4440	._M_elems: {{AArch64::ADDXri, AArch64::ADDWri},
4441	{AArch64::ADDXrs, AArch64::ADDWrs},
4442	{AArch64::ADDXrr, AArch64::ADDWrr},
4443	{AArch64::SUBXri, AArch64::SUBWri},
4444	{AArch64::ADDXrx, AArch64::ADDWrx}}};
4445	return emitAddSub(AddrModeAndSizeToOpcode: OpcTable, Dst: DefReg, LHS, RHS, MIRBuilder);
4446	}
4447
4448	MachineInstr *
4449	AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS,
4450	MachineOperand &RHS,
4451	MachineIRBuilder &MIRBuilder) const {
4452	const std::array<std::array<unsigned, `2`>, `5`> OpcTable{
4453	._M_elems: {{AArch64::ADDSXri, AArch64::ADDSWri},
4454	{AArch64::ADDSXrs, AArch64::ADDSWrs},
4455	{AArch64::ADDSXrr, AArch64::ADDSWrr},
4456	{AArch64::SUBSXri, AArch64::SUBSWri},
4457	{AArch64::ADDSXrx, AArch64::ADDSWrx}}};
4458	return emitAddSub(AddrModeAndSizeToOpcode: OpcTable, Dst, LHS, RHS, MIRBuilder);
4459	}
4460
4461	MachineInstr *
4462	AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS,
4463	MachineOperand &RHS,
4464	MachineIRBuilder &MIRBuilder) const {
4465	const std::array<std::array<unsigned, `2`>, `5`> OpcTable{
4466	._M_elems: {{AArch64::SUBSXri, AArch64::SUBSWri},
4467	{AArch64::SUBSXrs, AArch64::SUBSWrs},
4468	{AArch64::SUBSXrr, AArch64::SUBSWrr},
4469	{AArch64::ADDSXri, AArch64::ADDSWri},
4470	{AArch64::SUBSXrx, AArch64::SUBSWrx}}};
4471	return emitAddSub(AddrModeAndSizeToOpcode: OpcTable, Dst, LHS, RHS, MIRBuilder);
4472	}
4473
4474	MachineInstr *
4475	AArch64InstructionSelector::emitADCS(Register Dst, MachineOperand &LHS,
4476	MachineOperand &RHS,
4477	MachineIRBuilder &MIRBuilder) const {
4478	assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4479	MachineRegisterInfo *MRI = MIRBuilder.getMRI();
4480	bool Is32Bit = (MRI->getType(Reg: LHS.getReg()).getSizeInBits() == `32`);
4481	static const unsigned OpcTable[`2`] = {AArch64::ADCSXr, AArch64::ADCSWr};
4482	return emitInstr(Opcode: OpcTable[Is32Bit], DstOps: {Dst}, SrcOps: {LHS, RHS}, MIRBuilder);
4483	}
4484
4485	MachineInstr *
4486	AArch64InstructionSelector::emitSBCS(Register Dst, MachineOperand &LHS,
4487	MachineOperand &RHS,
4488	MachineIRBuilder &MIRBuilder) const {
4489	assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4490	MachineRegisterInfo *MRI = MIRBuilder.getMRI();
4491	bool Is32Bit = (MRI->getType(Reg: LHS.getReg()).getSizeInBits() == `32`);
4492	static const unsigned OpcTable[`2`] = {AArch64::SBCSXr, AArch64::SBCSWr};
4493	return emitInstr(Opcode: OpcTable[Is32Bit], DstOps: {Dst}, SrcOps: {LHS, RHS}, MIRBuilder);
4494	}
4495
4496	MachineInstr *
4497	AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
4498	MachineIRBuilder &MIRBuilder) const {
4499	MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4500	bool Is32Bit = (MRI.getType(Reg: LHS.getReg()).getSizeInBits() == `32`);
4501	auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
4502	return emitADDS(Dst: MRI.createVirtualRegister(RegClass: RC), LHS, RHS, MIRBuilder);
4503	}
4504
4505	MachineInstr *
4506	AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS,
4507	MachineIRBuilder &MIRBuilder) const {
4508	assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4509	MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4510	LLT Ty = MRI.getType(Reg: LHS.getReg());
4511	unsigned RegSize = Ty.getSizeInBits();
4512	bool Is32Bit = (RegSize == `32`);
4513	const unsigned OpcTable[`3`][`2`] = {{AArch64::ANDSXri, AArch64::ANDSWri},
4514	{AArch64::ANDSXrs, AArch64::ANDSWrs},
4515	{AArch64::ANDSXrr, AArch64::ANDSWrr}};
4516	// ANDS needs a logical immediate for its immediate form. Check if we can
4517	// fold one in.
4518	if (auto ValAndVReg = getIConstantVRegValWithLookThrough(VReg: RHS.getReg(), MRI)) {
4519	int64_t Imm = ValAndVReg ->Value.getSExtValue();
4520
4521	if (AArch64_AM::isLogicalImmediate(imm: Imm, regSize: RegSize)) {
4522	auto TstMI = MIRBuilder.buildInstr(Opc: OpcTable[`0`][Is32Bit], DstOps: {Ty}, SrcOps: {LHS});
4523	TstMI.addImm(Val: AArch64_AM::encodeLogicalImmediate(imm: Imm, regSize: RegSize));
4524	constrainSelectedInstRegOperands(I&: *TstMI, TII, TRI, RBI);
4525	return &*TstMI;
4526	}
4527	}
4528
4529	if (auto Fns = selectLogicalShiftedRegister(Root&: RHS))
4530	return emitInstr(Opcode: OpcTable[`1`][Is32Bit], DstOps: {Ty}, SrcOps: {LHS}, MIRBuilder, RenderFns: Fns);
4531	return emitInstr(Opcode: OpcTable[`2`][Is32Bit], DstOps: {Ty}, SrcOps: {LHS, RHS}, MIRBuilder);
4532	}
4533
4534	MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
4535	MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
4536	MachineIRBuilder &MIRBuilder) const {
4537	assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
4538	assert(Predicate.isPredicate() && "Expected predicate?");
4539	MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4540	LLT CmpTy = MRI.getType(Reg: LHS.getReg());
4541	assert(!CmpTy.isVector() && "Expected scalar or pointer");
4542	unsigned Size = CmpTy.getSizeInBits();
4543	(void)Size;
4544	assert((Size == `32` \|\| Size == `64`) && "Expected a 32-bit or 64-bit LHS/RHS?");
4545	// Fold the compare into a cmn or tst if possible.
4546	if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))
4547	return FoldCmp;
4548	auto Dst = MRI.cloneVirtualRegister(VReg: LHS.getReg());
4549	return emitSUBS(Dst, LHS, RHS, MIRBuilder);
4550	}
4551
4552	MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
4553	Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const {
4554	MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4555	#ifndef NDEBUG
4556	LLT Ty = MRI.getType(Dst);
4557	assert(!Ty.isVector() && Ty.getSizeInBits() == `32` &&
4558	"Expected a 32-bit scalar register?");
4559	#endif
4560	const Register ZReg = AArch64::WZR;
4561	AArch64CC::CondCode CC1, CC2;
4562	changeFCMPPredToAArch64CC(P: Pred, CondCode&: CC1, CondCode2&: CC2);
4563	auto InvCC1 = AArch64CC::getInvertedCondCode(Code: CC1);
4564	if (CC2 == AArch64CC::AL)
4565	return emitCSINC(/Dst=/Dst, /Src1=/ZReg, /Src2=/ZReg, Pred: InvCC1,
4566	MIRBuilder);
4567	const TargetRegisterClass *RC = &AArch64::GPR32RegClass;
4568	Register Def1Reg = MRI.createVirtualRegister(RegClass: RC);
4569	Register Def2Reg = MRI.createVirtualRegister(RegClass: RC);
4570	auto InvCC2 = AArch64CC::getInvertedCondCode(Code: CC2);
4571	emitCSINC(/Dst=/Def1Reg, /Src1=/ZReg, /Src2=/ZReg, Pred: InvCC1, MIRBuilder);
4572	emitCSINC(/Dst=/Def2Reg, /Src1=/ZReg, /Src2=/ZReg, Pred: InvCC2, MIRBuilder);
4573	auto OrMI = MIRBuilder.buildInstr(Opc: AArch64::ORRWrr, DstOps: {Dst}, SrcOps: {Def1Reg, Def2Reg});
4574	constrainSelectedInstRegOperands(I&: *OrMI, TII, TRI, RBI);
4575	return &*OrMI;
4576	}
4577
4578	MachineInstr *AArch64InstructionSelector::emitFPCompare(
4579	Register LHS, Register RHS, MachineIRBuilder &MIRBuilder,
4580	std::optional<CmpInst::Predicate> Pred) const {
4581	MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4582	LLT Ty = MRI.getType(Reg: LHS);
4583	if (Ty.isVector())
4584	return nullptr;
4585	unsigned OpSize = Ty.getSizeInBits();
4586	assert(OpSize == `16` \|\| OpSize == `32` \|\| OpSize == `64`);
4587
4588	// If this is a compare against +0.0, then we don't have
4589	// to explicitly materialize a constant.
4590	const ConstantFP *FPImm = getConstantFPVRegVal(VReg: RHS, MRI);
4591	bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
4592
4593	auto IsEqualityPred = [](CmpInst::Predicate P) {
4594	return P == CmpInst::FCMP_OEQ \|\| P == CmpInst::FCMP_ONE \|\|
4595	P == CmpInst::FCMP_UEQ \|\| P == CmpInst::FCMP_UNE;
4596	};
4597	if (!ShouldUseImm && Pred && IsEqualityPred (*Pred)) {
4598	// Try commutating the operands.
4599	const ConstantFP *LHSImm = getConstantFPVRegVal(VReg: LHS, MRI);
4600	if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) {
4601	ShouldUseImm = true;
4602	std::swap(a&: LHS, b&: RHS);
4603	}
4604	}
4605	unsigned CmpOpcTbl[`2`][`3`] = {
4606	{AArch64::FCMPHrr, AArch64::FCMPSrr, AArch64::FCMPDrr},
4607	{AArch64::FCMPHri, AArch64::FCMPSri, AArch64::FCMPDri}};
4608	unsigned CmpOpc =
4609	CmpOpcTbl[ShouldUseImm][OpSize == `16` ? `0` : (OpSize == `32` ? `1` : `2`)];
4610
4611	// Partially build the compare. Decide if we need to add a use for the
4612	// third operand based off whether or not we're comparing against 0.0.
4613	auto CmpMI = MIRBuilder.buildInstr(Opcode: CmpOpc).addUse(RegNo: LHS);
4614	CmpMI.setMIFlags(MachineInstr::NoFPExcept);
4615	if (!ShouldUseImm)
4616	CmpMI.addUse(RegNo: RHS);
4617	constrainSelectedInstRegOperands(I&: *CmpMI, TII, TRI, RBI);
4618	return &*CmpMI;
4619	}
4620
4621	MachineInstr *AArch64InstructionSelector::emitVectorConcat(
4622	std::optional<Register> Dst, Register Op1, Register Op2,
4623	MachineIRBuilder &MIRBuilder) const {
4624	// We implement a vector concat by:
4625	// 1. Use scalar_to_vector to insert the lower vector into the larger dest
4626	// 2. Insert the upper vector into the destination's upper element
4627	// TODO: some of this code is common with G_BUILD_VECTOR handling.
4628	MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4629
4630	const LLT Op1Ty = MRI.getType(Reg: Op1);
4631	const LLT Op2Ty = MRI.getType(Reg: Op2);
4632
4633	if (Op1Ty != Op2Ty) {
4634	LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys");
4635	return nullptr;
4636	}
4637	assert(Op1Ty.isVector() && "Expected a vector for vector concat");
4638
4639	if (Op1Ty.getSizeInBits() >= `128`) {
4640	LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors");
4641	return nullptr;
4642	}
4643
4644	// At the moment we just support 64 bit vector concats.
4645	if (Op1Ty.getSizeInBits() != `64`) {
4646	LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors");
4647	return nullptr;
4648	}
4649
4650	const LLT ScalarTy = LLT::scalar(SizeInBits: Op1Ty.getSizeInBits());
4651	const RegisterBank &FPRBank = *RBI.getRegBank(Reg: Op1, MRI, TRI);
4652	const TargetRegisterClass *DstRC =
4653	getRegClassForTypeOnBank(Ty: Op1Ty.multiplyElements(Factor: `2`), RB: FPRBank);
4654
4655	MachineInstr *WidenedOp1 =
4656	emitScalarToVector(EltSize: ScalarTy.getSizeInBits(), DstRC, Scalar: Op1, MIRBuilder);
4657	MachineInstr *WidenedOp2 =
4658	emitScalarToVector(EltSize: ScalarTy.getSizeInBits(), DstRC, Scalar: Op2, MIRBuilder);
4659	if (!WidenedOp1 \|\| !WidenedOp2) {
4660	LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value");
4661	return nullptr;
4662	}
4663
4664	// Now do the insert of the upper element.
4665	unsigned InsertOpc, InsSubRegIdx;
4666	std::tie(args&: InsertOpc, args&: InsSubRegIdx) =
4667	getInsertVecEltOpInfo(RB: FPRBank, EltSize: ScalarTy.getSizeInBits());
4668
4669	if (!Dst)
4670	Dst = MRI.createVirtualRegister(RegClass: DstRC);
4671	auto InsElt =
4672	MIRBuilder
4673	.buildInstr(Opc: InsertOpc, DstOps: {*Dst}, SrcOps: {WidenedOp1->getOperand(i: `0`).getReg()})
4674	.addImm(Val: `1`) / Lane index /
4675	.addUse(RegNo: WidenedOp2->getOperand(i: `0`).getReg())
4676	.addImm(Val: `0`);
4677	constrainSelectedInstRegOperands(I&: *InsElt, TII, TRI, RBI);
4678	return &*InsElt;
4679	}
4680
4681	MachineInstr *
4682	AArch64InstructionSelector::emitCSINC(Register Dst, Register Src1,
4683	Register Src2, AArch64CC::CondCode Pred,
4684	MachineIRBuilder &MIRBuilder) const {
4685	auto &MRI = *MIRBuilder.getMRI();
4686	const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg: Dst);
4687	// If we used a register class, then this won't necessarily have an LLT.
4688	// Compute the size based off whether or not we have a class or bank.
4689	unsigned Size;
4690	if (const auto RC = dyn_cast<const* TargetRegisterClass *>(Val: RegClassOrBank))
4691	Size = TRI.getRegSizeInBits(RC: *RC);
4692	else
4693	Size = MRI.getType(Reg: Dst).getSizeInBits();
4694	// Some opcodes use s1.
4695	assert(Size <= `64` && "Expected 64 bits or less only!");
4696	static const unsigned OpcTable[`2`] = {AArch64::CSINCWr, AArch64::CSINCXr};
4697	unsigned Opc = OpcTable[Size == `64`];
4698	auto CSINC = MIRBuilder.buildInstr(Opc, DstOps: {Dst}, SrcOps: {Src1, Src2}).addImm(Val: Pred);
4699	constrainSelectedInstRegOperands(I&: *CSINC, TII, TRI, RBI);
4700	return &*CSINC;
4701	}
4702
4703	MachineInstr *AArch64InstructionSelector::emitCarryIn(MachineInstr &I,
4704	Register CarryReg) {
4705	MachineRegisterInfo *MRI = MIB.getMRI();
4706	unsigned Opcode = I.getOpcode();
4707
4708	// If the instruction is a SUB, we need to negate the carry,
4709	// because borrowing is indicated by carry-flag == 0.
4710	bool NeedsNegatedCarry =
4711	(Opcode == TargetOpcode::G_USUBE \|\| Opcode == TargetOpcode::G_SSUBE);
4712
4713	// If the previous instruction will already produce the correct carry, do not
4714	// emit a carry generating instruction. E.g. for G_UADDE/G_USUBE sequences
4715	// generated during legalization of wide add/sub. This optimization depends on
4716	// these sequences not being interrupted by other instructions.
4717	// We have to select the previous instruction before the carry-using
4718	// instruction is deleted by the calling function, otherwise the previous
4719	// instruction might become dead and would get deleted.
4720	MachineInstr *SrcMI = MRI->getVRegDef(Reg: CarryReg);
4721	if (SrcMI == I.getPrevNode()) {
4722	if (auto *CarrySrcMI = dyn_cast<GAddSubCarryOut>(Val: SrcMI)) {
4723	bool ProducesNegatedCarry = CarrySrcMI->isSub();
4724	if (NeedsNegatedCarry == ProducesNegatedCarry &&
4725	CarrySrcMI->isUnsigned() &&
4726	CarrySrcMI->getCarryOutReg() == CarryReg &&
4727	selectAndRestoreState(I&: *SrcMI))
4728	return nullptr;
4729	}
4730	}
4731
4732	Register DeadReg = MRI->createVirtualRegister(RegClass: &AArch64::GPR32RegClass);
4733
4734	if (NeedsNegatedCarry) {
4735	// (0 - Carry) sets !C in NZCV when Carry == 1
4736	Register ZReg = AArch64::WZR;
4737	return emitInstr(Opcode: AArch64::SUBSWrr, DstOps: {DeadReg}, SrcOps: {ZReg, CarryReg}, MIRBuilder&: MIB);
4738	}
4739
4740	// (Carry - 1) sets !C in NZCV when Carry == 0
4741	auto Fns = select12BitValueWithLeftShift(Immed: `1`);
4742	return emitInstr(Opcode: AArch64::SUBSWri, DstOps: {DeadReg}, SrcOps: {CarryReg}, MIRBuilder&: MIB, RenderFns: Fns);
4743	}
4744
4745	bool AArch64InstructionSelector::selectOverflowOp(MachineInstr &I,
4746	MachineRegisterInfo &MRI) {
4747	auto &CarryMI = cast<GAddSubCarryOut>(Val&: I);
4748
4749	if (auto *CarryInMI = dyn_cast<GAddSubCarryInOut>(Val: &I)) {
4750	// Set NZCV carry according to carry-in VReg
4751	emitCarryIn(I, CarryReg: CarryInMI->getCarryInReg());
4752	}
4753
4754	// Emit the operation and get the correct condition code.
4755	auto OpAndCC = emitOverflowOp(Opcode: I.getOpcode(), Dst: CarryMI.getDstReg(),
4756	LHS&: CarryMI.getLHS(), RHS&: CarryMI.getRHS(), MIRBuilder&: MIB);
4757
4758	Register CarryOutReg = CarryMI.getCarryOutReg();
4759
4760	// Don't convert carry-out to VReg if it is never used
4761	if (!MRI.use_nodbg_empty(RegNo: CarryOutReg)) {
4762	// Now, put the overflow result in the register given by the first operand
4763	// to the overflow op. CSINC increments the result when the predicate is
4764	// false, so to get the increment when it's true, we need to use the
4765	// inverse. In this case, we want to increment when carry is set.
4766	Register ZReg = AArch64::WZR;
4767	emitCSINC(/Dst=/CarryOutReg, /Src1=/ZReg, /Src2=/ZReg,
4768	Pred: getInvertedCondCode(Code: OpAndCC.second), MIRBuilder&: MIB);
4769	}
4770
4771	I.eraseFromParent();
4772	return true;
4773	}
4774
4775	std::pair<MachineInstr *, AArch64CC::CondCode>
4776	AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst,
4777	MachineOperand &LHS,
4778	MachineOperand &RHS,
4779	MachineIRBuilder &MIRBuilder) const {
4780	switch (Opcode) {
4781	default:
4782	llvm_unreachable("Unexpected opcode!");
4783	case TargetOpcode::G_SADDO:
4784	return std::make_pair(x: emitADDS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::VS);
4785	case TargetOpcode::G_UADDO:
4786	return std::make_pair(x: emitADDS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::HS);
4787	case TargetOpcode::G_SSUBO:
4788	return std::make_pair(x: emitSUBS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::VS);
4789	case TargetOpcode::G_USUBO:
4790	return std::make_pair(x: emitSUBS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::LO);
4791	case TargetOpcode::G_SADDE:
4792	return std::make_pair(x: emitADCS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::VS);
4793	case TargetOpcode::G_UADDE:
4794	return std::make_pair(x: emitADCS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::HS);
4795	case TargetOpcode::G_SSUBE:
4796	return std::make_pair(x: emitSBCS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::VS);
4797	case TargetOpcode::G_USUBE:
4798	return std::make_pair(x: emitSBCS(Dst, LHS, RHS, MIRBuilder), y: AArch64CC::LO);
4799	}
4800	}
4801
4802	/// Returns true if @p Val is a tree of AND/OR/CMP operations that can be
4803	/// expressed as a conjunction.
4804	/// \param CanNegate Set to true if we can negate the whole sub-tree just by
4805	/// changing the conditions on the CMP tests.
4806	/// (this means we can call emitConjunctionRec() with
4807	/// Negate==true on this sub-tree)
4808	/// \param MustBeFirst Set to true if this subtree needs to be negated and we
4809	/// cannot do the negation naturally. We are required to
4810	/// emit the subtree first in this case.
4811	/// \param WillNegate Is true if are called when the result of this
4812	/// subexpression must be negated. This happens when the
4813	/// outer expression is an OR. We can use this fact to know
4814	/// that we have a double negation (or (or ...) ...) that
4815	/// can be implemented for free.
4816	static bool canEmitConjunction(Register Val, bool &CanNegate, bool &MustBeFirst,
4817	bool WillNegate, MachineRegisterInfo &MRI,
4818	unsigned Depth = `0`) {
4819	if (!MRI.hasOneNonDBGUse(RegNo: Val))
4820	return false;
4821	MachineInstr *ValDef = MRI.getVRegDef(Reg: Val);
4822	unsigned Opcode = ValDef->getOpcode();
4823	if (isa<GAnyCmp>(Val: ValDef)) {
4824	CanNegate = true;
4825	MustBeFirst = false;
4826	return true;
4827	}
4828	// Protect against exponential runtime and stack overflow.
4829	if (Depth > `6`)
4830	return false;
4831	if (Opcode == TargetOpcode::G_AND \|\| Opcode == TargetOpcode::G_OR) {
4832	bool IsOR = Opcode == TargetOpcode::G_OR;
4833	Register O0 = ValDef->getOperand(i: `1`).getReg();
4834	Register O1 = ValDef->getOperand(i: `2`).getReg();
4835	bool CanNegateL;
4836	bool MustBeFirstL;
4837	if (!canEmitConjunction(Val: O0, CanNegate&: CanNegateL, MustBeFirst&: MustBeFirstL, WillNegate: IsOR, MRI, Depth: Depth + `1`))
4838	return false;
4839	bool CanNegateR;
4840	bool MustBeFirstR;
4841	if (!canEmitConjunction(Val: O1, CanNegate&: CanNegateR, MustBeFirst&: MustBeFirstR, WillNegate: IsOR, MRI, Depth: Depth + `1`))
4842	return false;
4843
4844	if (MustBeFirstL && MustBeFirstR)
4845	return false;
4846
4847	if (IsOR) {
4848	// For an OR expression we need to be able to naturally negate at least
4849	// one side or we cannot do the transformation at all.
4850	if (!CanNegateL && !CanNegateR)
4851	return false;
4852	// If we the result of the OR will be negated and we can naturally negate
4853	// the leaves, then this sub-tree as a whole negates naturally.
4854	CanNegate = WillNegate && CanNegateL && CanNegateR;
4855	// If we cannot naturally negate the whole sub-tree, then this must be
4856	// emitted first.
4857	MustBeFirst = !CanNegate;
4858	} else {
4859	assert(Opcode == TargetOpcode::G_AND && "Must be G_AND");
4860	// We cannot naturally negate an AND operation.
4861	CanNegate = false;
4862	MustBeFirst = MustBeFirstL \|\| MustBeFirstR;
4863	}
4864	return true;
4865	}
4866	return false;
4867	}
4868
4869	MachineInstr *AArch64InstructionSelector::emitConditionalComparison(
4870	Register LHS, Register RHS, CmpInst::Predicate CC,
4871	AArch64CC::CondCode Predicate, AArch64CC::CondCode OutCC,
4872	MachineIRBuilder &MIB) const {
4873	auto &MRI = *MIB.getMRI();
4874	LLT OpTy = MRI.getType(Reg: LHS);
4875	unsigned CCmpOpc;
4876	std::optional<ValueAndVReg> C;
4877	if (CmpInst::isIntPredicate(P: CC)) {
4878	assert(OpTy.getSizeInBits() == `32` \|\| OpTy.getSizeInBits() == `64`);
4879	C = getIConstantVRegValWithLookThrough(VReg: RHS, MRI);
4880	if (!C \|\| C ->Value.sgt(RHS: `31`) \|\| C ->Value.slt(RHS: -`31`))
4881	CCmpOpc = OpTy.getSizeInBits() == `32` ? AArch64::CCMPWr : AArch64::CCMPXr;
4882	else if (C ->Value.ule(RHS: `31`))
4883	CCmpOpc = OpTy.getSizeInBits() == `32` ? AArch64::CCMPWi : AArch64::CCMPXi;
4884	else
4885	CCmpOpc = OpTy.getSizeInBits() == `32` ? AArch64::CCMNWi : AArch64::CCMNXi;
4886	} else {
4887	assert(OpTy.getSizeInBits() == `16` \|\| OpTy.getSizeInBits() == `32` \|\|
4888	OpTy.getSizeInBits() == `64`);
4889	switch (OpTy.getSizeInBits()) {
4890	case `16`:
4891	assert(STI.hasFullFP16() && "Expected Full FP16 for fp16 comparisons");
4892	CCmpOpc = AArch64::FCCMPHrr;
4893	break;
4894	case `32`:
4895	CCmpOpc = AArch64::FCCMPSrr;
4896	break;
4897	case `64`:
4898	CCmpOpc = AArch64::FCCMPDrr;
4899	break;
4900	default:
4901	return nullptr;
4902	}
4903	}
4904	AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(Code: OutCC);
4905	unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(Code: InvOutCC);
4906	auto CCmp =
4907	MIB.buildInstr(Opc: CCmpOpc, DstOps: {}, SrcOps: {LHS});
4908	if (CCmpOpc == AArch64::CCMPWi \|\| CCmpOpc == AArch64::CCMPXi)
4909	CCmp.addImm(Val: C ->Value.getZExtValue());
4910	else if (CCmpOpc == AArch64::CCMNWi \|\| CCmpOpc == AArch64::CCMNXi)
4911	CCmp.addImm(Val: C ->Value.abs().getZExtValue());
4912	else
4913	CCmp.addReg(RegNo: RHS);
4914	CCmp.addImm(Val: NZCV).addImm(Val: Predicate);
4915	constrainSelectedInstRegOperands(I&: *CCmp, TII, TRI, RBI);
4916	return &*CCmp;
4917	}
4918
4919	MachineInstr *AArch64InstructionSelector::emitConjunctionRec(
4920	Register Val, AArch64CC::CondCode &OutCC, bool Negate, Register CCOp,
4921	AArch64CC::CondCode Predicate, MachineIRBuilder &MIB) const {
4922	// We're at a tree leaf, produce a conditional comparison operation.
4923	auto &MRI = *MIB.getMRI();
4924	MachineInstr *ValDef = MRI.getVRegDef(Reg: Val);
4925	unsigned Opcode = ValDef->getOpcode();
4926	if (auto *Cmp = dyn_cast<GAnyCmp>(Val: ValDef)) {
4927	Register LHS = Cmp->getLHSReg();
4928	Register RHS = Cmp->getRHSReg();
4929	CmpInst::Predicate CC = Cmp->getCond();
4930	if (Negate)
4931	CC = CmpInst::getInversePredicate(pred: CC);
4932	if (isa<GICmp>(Val: Cmp)) {
4933	OutCC = changeICMPPredToAArch64CC(P: CC);
4934	} else {
4935	// Handle special FP cases.
4936	AArch64CC::CondCode ExtraCC;
4937	changeFPCCToANDAArch64CC(CC, CondCode&: OutCC, CondCode2&: ExtraCC);
4938	// Some floating point conditions can't be tested with a single condition
4939	// code. Construct an additional comparison in this case.
4940	if (ExtraCC != AArch64CC::AL) {
4941	MachineInstr *ExtraCmp;
4942	if (!CCOp)
4943	ExtraCmp = emitFPCompare(LHS, RHS, MIRBuilder&: MIB, Pred: CC);
4944	else
4945	ExtraCmp =
4946	emitConditionalComparison(LHS, RHS, CC, Predicate, OutCC: ExtraCC, MIB);
4947	CCOp = ExtraCmp->getOperand(i: `0`).getReg();
4948	Predicate = ExtraCC;
4949	}
4950	}
4951
4952	// Produce a normal comparison if we are first in the chain
4953	if (!CCOp) {
4954	auto Dst = MRI.cloneVirtualRegister(VReg: LHS);
4955	if (isa<GICmp>(Val: Cmp))
4956	return emitSUBS(Dst, LHS&: Cmp->getOperand(i: `2`), RHS&: Cmp->getOperand(i: `3`), MIRBuilder&: MIB);
4957	return emitFPCompare(LHS: Cmp->getOperand(i: `2`).getReg(),
4958	RHS: Cmp->getOperand(i: `3`).getReg(), MIRBuilder&: MIB);
4959	}
4960	// Otherwise produce a ccmp.
4961	return emitConditionalComparison(LHS, RHS, CC, Predicate, OutCC, MIB);
4962	}
4963	assert(MRI.hasOneNonDBGUse(Val) && "Valid conjunction/disjunction tree");
4964
4965	bool IsOR = Opcode == TargetOpcode::G_OR;
4966
4967	Register LHS = ValDef->getOperand(i: `1`).getReg();
4968	bool CanNegateL;
4969	bool MustBeFirstL;
4970	bool ValidL = canEmitConjunction(Val: LHS, CanNegate&: CanNegateL, MustBeFirst&: MustBeFirstL, WillNegate: IsOR, MRI);
4971	assert(ValidL && "Valid conjunction/disjunction tree");
4972	(void)ValidL;
4973
4974	Register RHS = ValDef->getOperand(i: `2`).getReg();
4975	bool CanNegateR;
4976	bool MustBeFirstR;
4977	bool ValidR = canEmitConjunction(Val: RHS, CanNegate&: CanNegateR, MustBeFirst&: MustBeFirstR, WillNegate: IsOR, MRI);
4978	assert(ValidR && "Valid conjunction/disjunction tree");
4979	(void)ValidR;
4980
4981	// Swap sub-tree that must come first to the right side.
4982	if (MustBeFirstL) {
4983	assert(!MustBeFirstR && "Valid conjunction/disjunction tree");
4984	std::swap(a&: LHS, b&: RHS);
4985	std::swap(a&: CanNegateL, b&: CanNegateR);
4986	std::swap(a&: MustBeFirstL, b&: MustBeFirstR);
4987	}
4988
4989	bool NegateR;
4990	bool NegateAfterR;
4991	bool NegateL;
4992	bool NegateAfterAll;
4993	if (Opcode == TargetOpcode::G_OR) {
4994	// Swap the sub-tree that we can negate naturally to the left.
4995	if (!CanNegateL) {
4996	assert(CanNegateR && "at least one side must be negatable");
4997	assert(!MustBeFirstR && "invalid conjunction/disjunction tree");
4998	assert(!Negate);
4999	std::swap(a&: LHS, b&: RHS);
5000	NegateR = false;
5001	NegateAfterR = true;
5002	} else {
5003	// Negate the left sub-tree if possible, otherwise negate the result.
5004	NegateR = CanNegateR;
5005	NegateAfterR = !CanNegateR;
5006	}
5007	NegateL = true;
5008	NegateAfterAll = !Negate;
5009	} else {
5010	assert(Opcode == TargetOpcode::G_AND &&
5011	"Valid conjunction/disjunction tree");
5012	assert(!Negate && "Valid conjunction/disjunction tree");
5013
5014	NegateL = false;
5015	NegateR = false;
5016	NegateAfterR = false;
5017	NegateAfterAll = false;
5018	}
5019
5020	// Emit sub-trees.
5021	AArch64CC::CondCode RHSCC;
5022	MachineInstr *CmpR =
5023	emitConjunctionRec(Val: RHS, OutCC&: RHSCC, Negate: NegateR, CCOp, Predicate, MIB);
5024	if (NegateAfterR)
5025	RHSCC = AArch64CC::getInvertedCondCode(Code: RHSCC);
5026	MachineInstr *CmpL = emitConjunctionRec(
5027	Val: LHS, OutCC, Negate: NegateL, CCOp: CmpR->getOperand(i: `0`).getReg(), Predicate: RHSCC, MIB);
5028	if (NegateAfterAll)
5029	OutCC = AArch64CC::getInvertedCondCode(Code: OutCC);
5030	return CmpL;
5031	}
5032
5033	MachineInstr *AArch64InstructionSelector::emitConjunction(
5034	Register Val, AArch64CC::CondCode &OutCC, MachineIRBuilder &MIB) const {
5035	bool DummyCanNegate;
5036	bool DummyMustBeFirst;
5037	if (!canEmitConjunction(Val, CanNegate&: DummyCanNegate, MustBeFirst&: DummyMustBeFirst, WillNegate: false,
5038	MRI&: *MIB.getMRI()))
5039	return nullptr;
5040	return emitConjunctionRec(Val, OutCC, Negate: false, CCOp: Register (), Predicate: AArch64CC::AL, MIB);
5041	}
5042
5043	bool AArch64InstructionSelector::tryOptSelectConjunction(GSelect &SelI,
5044	MachineInstr &CondMI) {
5045	AArch64CC::CondCode AArch64CC;
5046	MachineInstr *ConjMI = emitConjunction(Val: SelI.getCondReg(), OutCC&: AArch64CC, MIB);
5047	if (!ConjMI)
5048	return false;
5049
5050	emitSelect(Dst: SelI.getReg(Idx: `0`), True: SelI.getTrueReg(), False: SelI.getFalseReg(), CC: AArch64CC, MIB);
5051	SelI.eraseFromParent();
5052	return true;
5053	}
5054
5055	bool AArch64InstructionSelector::tryOptSelect(GSelect &I) {
5056	MachineRegisterInfo &MRI = *MIB.getMRI();
5057	// We want to recognize this pattern:
5058	//
5059	// $z = G_FCMP pred, $x, $y
5060	// ...
5061	// $w = G_SELECT $z, $a, $b
5062	//
5063	// Where the value of $z is only* ever used by the G_SELECT (possibly with*
5064	// some copies/truncs in between.)
5065	//
5066	// If we see this, then we can emit something like this:
5067	//
5068	// fcmp $x, $y
5069	// fcsel $w, $a, $b, pred
5070	//
5071	// Rather than emitting both of the rather long sequences in the standard
5072	// G_FCMP/G_SELECT select methods.
5073
5074	// First, check if the condition is defined by a compare.
5075	MachineInstr *CondDef = MRI.getVRegDef(Reg: I.getOperand(i: `1`).getReg());
5076
5077	// We can only fold if all of the defs have one use.
5078	Register CondDefReg = CondDef->getOperand(i: `0`).getReg();
5079	if (!MRI.hasOneNonDBGUse(RegNo: CondDefReg)) {
5080	// Unless it's another select.
5081	for (const MachineInstr &UI : MRI.use_nodbg_instructions(Reg: CondDefReg)) {
5082	if (CondDef == &UI)
5083	continue;
5084	if (UI.getOpcode() != TargetOpcode::G_SELECT)
5085	return false;
5086	}
5087	}
5088
5089	// Is the condition defined by a compare?
5090	unsigned CondOpc = CondDef->getOpcode();
5091	if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP) {
5092	if (tryOptSelectConjunction(SelI&: I, CondMI&: *CondDef))
5093	return true;
5094	return false;
5095	}
5096
5097	AArch64CC::CondCode CondCode;
5098	if (CondOpc == TargetOpcode::G_ICMP) {
5099	auto Pred =
5100	static_cast<CmpInst::Predicate>(CondDef->getOperand(i: `1`).getPredicate());
5101	CondCode = changeICMPPredToAArch64CC(P: Pred);
5102	emitIntegerCompare(LHS&: CondDef->getOperand(i: `2`), RHS&: CondDef->getOperand(i: `3`),
5103	Predicate&: CondDef->getOperand(i: `1`), MIRBuilder&: MIB);
5104	} else {
5105	// Get the condition code for the select.
5106	auto Pred =
5107	static_cast<CmpInst::Predicate>(CondDef->getOperand(i: `1`).getPredicate());
5108	AArch64CC::CondCode CondCode2;
5109	changeFCMPPredToAArch64CC(P: Pred, CondCode, CondCode2);
5110
5111	// changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
5112	// instructions to emit the comparison.
5113	// TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
5114	// unnecessary.
5115	if (CondCode2 != AArch64CC::AL)
5116	return false;
5117
5118	if (!emitFPCompare(LHS: CondDef->getOperand(i: `2`).getReg(),
5119	RHS: CondDef->getOperand(i: `3`).getReg(), MIRBuilder&: MIB)) {
5120	LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n");
5121	return false;
5122	}
5123	}
5124
5125	// Emit the select.
5126	emitSelect(Dst: I.getOperand(i: `0`).getReg(), True: I.getOperand(i: `2`).getReg(),
5127	False: I.getOperand(i: `3`).getReg(), CC: CondCode, MIB);
5128	I.eraseFromParent();
5129	return true;
5130	}
5131
5132	MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
5133	MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
5134	MachineIRBuilder &MIRBuilder) const {
5135	assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&
5136	"Unexpected MachineOperand");
5137	MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
5138	// We want to find this sort of thing:
5139	// x = G_SUB 0, y
5140	// G_ICMP z, x
5141	//
5142	// In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.
5143	// e.g:
5144	//
5145	// cmn z, y
5146
5147	// Check if the RHS or LHS of the G_ICMP is defined by a SUB
5148	MachineInstr *LHSDef = getDefIgnoringCopies(Reg: LHS.getReg(), MRI);
5149	MachineInstr *RHSDef = getDefIgnoringCopies(Reg: RHS.getReg(), MRI);
5150	auto P = static_cast<CmpInst::Predicate>(Predicate.getPredicate());
5151	// Given this:
5152	//
5153	// x = G_SUB 0, y
5154	// G_ICMP x, z
5155	//
5156	// Produce this:
5157	//
5158	// cmn y, z
5159	if (isCMN(MaybeSub: LHSDef, Pred: P, MRI))
5160	return emitCMN(LHS&: LHSDef->getOperand(i: `2`), RHS, MIRBuilder);
5161
5162	// Same idea here, but with the RHS of the compare instead:
5163	//
5164	// Given this:
5165	//
5166	// x = G_SUB 0, y
5167	// G_ICMP z, x
5168	//
5169	// Produce this:
5170	//
5171	// cmn z, y
5172	if (isCMN(MaybeSub: RHSDef, Pred: P, MRI))
5173	return emitCMN(LHS, RHS&: RHSDef->getOperand(i: `2`), MIRBuilder);
5174
5175	// Given this:
5176	//
5177	// z = G_AND x, y
5178	// G_ICMP z, 0
5179	//
5180	// Produce this if the compare is signed:
5181	//
5182	// tst x, y
5183	if (!CmpInst::isUnsigned(predicate: P) && LHSDef &&
5184	LHSDef->getOpcode() == TargetOpcode::G_AND) {
5185	// Make sure that the RHS is 0.
5186	auto ValAndVReg = getIConstantVRegValWithLookThrough(VReg: RHS.getReg(), MRI);
5187	if (!ValAndVReg \|\| ValAndVReg ->Value != `0`)
5188	return nullptr;
5189
5190	return emitTST(LHS&: LHSDef->getOperand(i: `1`),
5191	RHS&: LHSDef->getOperand(i: `2`), MIRBuilder);
5192	}
5193
5194	return nullptr;
5195	}
5196
5197	bool AArch64InstructionSelector::selectShuffleVector(
5198	MachineInstr &I, MachineRegisterInfo &MRI) {
5199	const LLT DstTy = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
5200	Register Src1Reg = I.getOperand(i: `1`).getReg();
5201	const LLT Src1Ty = MRI.getType(Reg: Src1Reg);
5202	Register Src2Reg = I.getOperand(i: `2`).getReg();
5203	const LLT Src2Ty = MRI.getType(Reg: Src2Reg);
5204	ArrayRef<int> Mask = I.getOperand(i: `3`).getShuffleMask();
5205
5206	MachineBasicBlock &MBB = *I.getParent();
5207	MachineFunction &MF = *MBB.getParent();
5208	LLVMContext &Ctx = MF.getFunction().getContext();
5209
5210	// G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
5211	// it's originated from a <1 x T> type. Those should have been lowered into
5212	// G_BUILD_VECTOR earlier.
5213	if (!Src1Ty.isVector() \|\| !Src2Ty.isVector()) {
5214	LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n");
5215	return false;
5216	}
5217
5218	unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / `8`;
5219
5220	SmallVector<Constant *, `64`> CstIdxs;
5221	for (int Val : Mask) {
5222	// For now, any undef indexes we'll just assume to be 0. This should be
5223	// optimized in future, e.g. to select DUP etc.
5224	Val = Val < `0` ? `0` : Val;
5225	for (unsigned Byte = `0`; Byte < BytesPerElt; ++Byte) {
5226	unsigned Offset = Byte + Val * BytesPerElt;
5227	CstIdxs.emplace_back(Args: ConstantInt::get(Ty: Type::getInt8Ty(C&: Ctx), V: Offset));
5228	}
5229	}
5230
5231	// Use a constant pool to load the index vector for TBL.
5232	Constant *CPVal = ConstantVector::get(V: CstIdxs);
5233	MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIRBuilder&: MIB);
5234	if (!IndexLoad) {
5235	LLVM_DEBUG(dbgs() << "Could not load from a constant pool");
5236	return false;
5237	}
5238
5239	if (DstTy.getSizeInBits() != `128`) {
5240	assert(DstTy.getSizeInBits() == `64` && "Unexpected shuffle result ty");
5241	// This case can be done with TBL1.
5242	MachineInstr *Concat =
5243	emitVectorConcat(Dst: std::nullopt, Op1: Src1Reg, Op2: Src2Reg, MIRBuilder&: MIB);
5244	if (!Concat) {
5245	LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1");
5246	return false;
5247	}
5248
5249	// The constant pool load will be 64 bits, so need to convert to FPR128 reg.
5250	IndexLoad = emitScalarToVector(EltSize: `64`, DstRC: &AArch64::FPR128RegClass,
5251	Scalar: IndexLoad->getOperand(i: `0`).getReg(), MIRBuilder&: MIB);
5252
5253	auto TBL1 = MIB.buildInstr(
5254	Opc: AArch64::TBLv16i8One, DstOps: {&AArch64::FPR128RegClass},
5255	SrcOps: {Concat->getOperand(i: `0`).getReg(), IndexLoad->getOperand(i: `0`).getReg()});
5256	constrainSelectedInstRegOperands(I&: *TBL1, TII, TRI, RBI);
5257
5258	auto Copy =
5259	MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {I.getOperand(i: `0`).getReg()}, SrcOps: {})
5260	.addReg(RegNo: TBL1.getReg(Idx: `0`), flags: `0`, SubReg: AArch64::dsub);
5261	RBI.constrainGenericRegister(Reg: Copy.getReg(Idx: `0`), RC: AArch64::FPR64RegClass, MRI);
5262	I.eraseFromParent();
5263	return true;
5264	}
5265
5266	// For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
5267	// Q registers for regalloc.
5268	SmallVector<Register, `2`> Regs = {Src1Reg, Src2Reg};
5269	auto RegSeq = createQTuple(Regs, MIB);
5270	auto TBL2 = MIB.buildInstr(Opc: AArch64::TBLv16i8Two, DstOps: {I.getOperand(i: `0`)},
5271	SrcOps: {RegSeq, IndexLoad->getOperand(i: `0`)});
5272	constrainSelectedInstRegOperands(I&: *TBL2, TII, TRI, RBI);
5273	I.eraseFromParent();
5274	return true;
5275	}
5276
5277	MachineInstr *AArch64InstructionSelector::emitLaneInsert(
5278	std::optional<Register> DstReg, Register SrcReg, Register EltReg,
5279	unsigned LaneIdx, const RegisterBank &RB,
5280	MachineIRBuilder &MIRBuilder) const {
5281	MachineInstr InsElt = nullptr*;
5282	const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
5283	MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
5284
5285	// Create a register to define with the insert if one wasn't passed in.
5286	if (!DstReg)
5287	DstReg = MRI.createVirtualRegister(RegClass: DstRC);
5288
5289	unsigned EltSize = MRI.getType(Reg: EltReg).getSizeInBits();
5290	unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
5291
5292	if (RB.getID() == AArch64::FPRRegBankID) {
5293	auto InsSub = emitScalarToVector(EltSize, DstRC, Scalar: EltReg, MIRBuilder);
5294	InsElt = MIRBuilder.buildInstr(Opc, DstOps: {*DstReg}, SrcOps: {SrcReg})
5295	.addImm(Val: LaneIdx)
5296	.addUse(RegNo: InsSub->getOperand(i: `0`).getReg())
5297	.addImm(Val: `0`);
5298	} else {
5299	InsElt = MIRBuilder.buildInstr(Opc, DstOps: {*DstReg}, SrcOps: {SrcReg})
5300	.addImm(Val: LaneIdx)
5301	.addUse(RegNo: EltReg);
5302	}
5303
5304	constrainSelectedInstRegOperands(I&: *InsElt, TII, TRI, RBI);
5305	return InsElt;
5306	}
5307
5308	bool AArch64InstructionSelector::selectUSMovFromExtend(
5309	MachineInstr &MI, MachineRegisterInfo &MRI) {
5310	if (MI.getOpcode() != TargetOpcode::G_SEXT &&
5311	MI.getOpcode() != TargetOpcode::G_ZEXT &&
5312	MI.getOpcode() != TargetOpcode::G_ANYEXT)
5313	return false;
5314	bool IsSigned = MI.getOpcode() == TargetOpcode::G_SEXT;
5315	const Register DefReg = MI.getOperand(i: `0`).getReg();
5316	const LLT DstTy = MRI.getType(Reg: DefReg);
5317	unsigned DstSize = DstTy.getSizeInBits();
5318
5319	if (DstSize != `32` && DstSize != `64`)
5320	return false;
5321
5322	MachineInstr *Extract = getOpcodeDef(Opcode: TargetOpcode::G_EXTRACT_VECTOR_ELT,
5323	Reg: MI.getOperand(i: `1`).getReg(), MRI);
5324	int64_t Lane;
5325	if (!Extract \|\| !mi_match(R: Extract->getOperand(i: `2`).getReg(), MRI, P: m_ICst(Cst&: Lane)))
5326	return false;
5327	Register Src0 = Extract->getOperand(i: `1`).getReg();
5328
5329	const LLT VecTy = MRI.getType(Reg: Src0);
5330	if (VecTy.isScalableVector())
5331	return false;
5332
5333	if (VecTy.getSizeInBits() != `128`) {
5334	const MachineInstr *ScalarToVector = emitScalarToVector(
5335	EltSize: VecTy.getSizeInBits(), DstRC: &AArch64::FPR128RegClass, Scalar: Src0, MIRBuilder&: MIB);
5336	assert(ScalarToVector && "Didn't expect emitScalarToVector to fail!");
5337	Src0 = ScalarToVector->getOperand(i: `0`).getReg();
5338	}
5339
5340	unsigned Opcode;
5341	if (DstSize == `64` && VecTy.getScalarSizeInBits() == `32`)
5342	Opcode = IsSigned ? AArch64::SMOVvi32to64 : AArch64::UMOVvi32;
5343	else if (DstSize == `64` && VecTy.getScalarSizeInBits() == `16`)
5344	Opcode = IsSigned ? AArch64::SMOVvi16to64 : AArch64::UMOVvi16;
5345	else if (DstSize == `64` && VecTy.getScalarSizeInBits() == `8`)
5346	Opcode = IsSigned ? AArch64::SMOVvi8to64 : AArch64::UMOVvi8;
5347	else if (DstSize == `32` && VecTy.getScalarSizeInBits() == `16`)
5348	Opcode = IsSigned ? AArch64::SMOVvi16to32 : AArch64::UMOVvi16;
5349	else if (DstSize == `32` && VecTy.getScalarSizeInBits() == `8`)
5350	Opcode = IsSigned ? AArch64::SMOVvi8to32 : AArch64::UMOVvi8;
5351	else
5352	llvm_unreachable("Unexpected type combo for S/UMov!");
5353
5354	// We may need to generate one of these, depending on the type and sign of the
5355	// input:
5356	// DstReg = SMOV Src0, Lane;
5357	// NewReg = UMOV Src0, Lane; DstReg = SUBREG_TO_REG NewReg, sub_32;
5358	MachineInstr ExtI = nullptr*;
5359	if (DstSize == `64` && !IsSigned) {
5360	Register NewReg = MRI.createVirtualRegister(RegClass: &AArch64::GPR32RegClass);
5361	MIB.buildInstr(Opc: Opcode, DstOps: {NewReg}, SrcOps: {Src0}).addImm(Val: Lane);
5362	ExtI = MIB.buildInstr(Opc: AArch64::SUBREG_TO_REG, DstOps: {DefReg}, SrcOps: {})
5363	.addImm(Val: `0`)
5364	.addUse(RegNo: NewReg)
5365	.addImm(Val: AArch64::sub_32);
5366	RBI.constrainGenericRegister(Reg: DefReg, RC: AArch64::GPR64RegClass, MRI);
5367	} else
5368	ExtI = MIB.buildInstr(Opc: Opcode, DstOps: {DefReg}, SrcOps: {Src0}).addImm(Val: Lane);
5369
5370	constrainSelectedInstRegOperands(I&: *ExtI, TII, TRI, RBI);
5371	MI.eraseFromParent();
5372	return true;
5373	}
5374
5375	MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm8(
5376	Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
5377	unsigned int Op;
5378	if (DstSize == `128`) {
5379	if (Bits.getHiBits(numBits: `64`) != Bits.getLoBits(numBits: `64`))
5380	return nullptr;
5381	Op = AArch64::MOVIv16b_ns;
5382	} else {
5383	Op = AArch64::MOVIv8b_ns;
5384	}
5385
5386	uint64_t Val = Bits.zextOrTrunc(width: `64`).getZExtValue();
5387
5388	if (AArch64_AM::isAdvSIMDModImmType9(Imm: Val)) {
5389	Val = AArch64_AM::encodeAdvSIMDModImmType9(Imm: Val);
5390	auto Mov = Builder.buildInstr(Opc: Op, DstOps: {Dst}, SrcOps: {}).addImm(Val);
5391	constrainSelectedInstRegOperands(I&: *Mov, TII, TRI, RBI);
5392	return &*Mov;
5393	}
5394	return nullptr;
5395	}
5396
5397	MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm16(
5398	Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
5399	bool Inv) {
5400
5401	unsigned int Op;
5402	if (DstSize == `128`) {
5403	if (Bits.getHiBits(numBits: `64`) != Bits.getLoBits(numBits: `64`))
5404	return nullptr;
5405	Op = Inv ? AArch64::MVNIv8i16 : AArch64::MOVIv8i16;
5406	} else {
5407	Op = Inv ? AArch64::MVNIv4i16 : AArch64::MOVIv4i16;
5408	}
5409
5410	uint64_t Val = Bits.zextOrTrunc(width: `64`).getZExtValue();
5411	uint64_t Shift;
5412
5413	if (AArch64_AM::isAdvSIMDModImmType5(Imm: Val)) {
5414	Val = AArch64_AM::encodeAdvSIMDModImmType5(Imm: Val);
5415	Shift = `0`;
5416	} else if (AArch64_AM::isAdvSIMDModImmType6(Imm: Val)) {
5417	Val = AArch64_AM::encodeAdvSIMDModImmType6(Imm: Val);
5418	Shift = `8`;
5419	} else
5420	return nullptr;
5421
5422	auto Mov = Builder.buildInstr(Opc: Op, DstOps: {Dst}, SrcOps: {}).addImm(Val).addImm(Val: Shift);
5423	constrainSelectedInstRegOperands(I&: *Mov, TII, TRI, RBI);
5424	return &*Mov;
5425	}
5426
5427	MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm32(
5428	Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
5429	bool Inv) {
5430
5431	unsigned int Op;
5432	if (DstSize == `128`) {
5433	if (Bits.getHiBits(numBits: `64`) != Bits.getLoBits(numBits: `64`))
5434	return nullptr;
5435	Op = Inv ? AArch64::MVNIv4i32 : AArch64::MOVIv4i32;
5436	} else {
5437	Op = Inv ? AArch64::MVNIv2i32 : AArch64::MOVIv2i32;
5438	}
5439
5440	uint64_t Val = Bits.zextOrTrunc(width: `64`).getZExtValue();
5441	uint64_t Shift;
5442
5443	if ((AArch64_AM::isAdvSIMDModImmType1(Imm: Val))) {
5444	Val = AArch64_AM::encodeAdvSIMDModImmType1(Imm: Val);
5445	Shift = `0`;
5446	} else if ((AArch64_AM::isAdvSIMDModImmType2(Imm: Val))) {
5447	Val = AArch64_AM::encodeAdvSIMDModImmType2(Imm: Val);
5448	Shift = `8`;
5449	} else if ((AArch64_AM::isAdvSIMDModImmType3(Imm: Val))) {
5450	Val = AArch64_AM::encodeAdvSIMDModImmType3(Imm: Val);
5451	Shift = `16`;
5452	} else if ((AArch64_AM::isAdvSIMDModImmType4(Imm: Val))) {
5453	Val = AArch64_AM::encodeAdvSIMDModImmType4(Imm: Val);
5454	Shift = `24`;
5455	} else
5456	return nullptr;
5457
5458	auto Mov = Builder.buildInstr(Opc: Op, DstOps: {Dst}, SrcOps: {}).addImm(Val).addImm(Val: Shift);
5459	constrainSelectedInstRegOperands(I&: *Mov, TII, TRI, RBI);
5460	return &*Mov;
5461	}
5462
5463	MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm64(
5464	Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
5465
5466	unsigned int Op;
5467	if (DstSize == `128`) {
5468	if (Bits.getHiBits(numBits: `64`) != Bits.getLoBits(numBits: `64`))
5469	return nullptr;
5470	Op = AArch64::MOVIv2d_ns;
5471	} else {
5472	Op = AArch64::MOVID;
5473	}
5474
5475	uint64_t Val = Bits.zextOrTrunc(width: `64`).getZExtValue();
5476	if (AArch64_AM::isAdvSIMDModImmType10(Imm: Val)) {
5477	Val = AArch64_AM::encodeAdvSIMDModImmType10(Imm: Val);
5478	auto Mov = Builder.buildInstr(Opc: Op, DstOps: {Dst}, SrcOps: {}).addImm(Val);
5479	constrainSelectedInstRegOperands(I&: *Mov, TII, TRI, RBI);
5480	return &*Mov;
5481	}
5482	return nullptr;
5483	}
5484
5485	MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm321s(
5486	Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
5487	bool Inv) {
5488
5489	unsigned int Op;
5490	if (DstSize == `128`) {
5491	if (Bits.getHiBits(numBits: `64`) != Bits.getLoBits(numBits: `64`))
5492	return nullptr;
5493	Op = Inv ? AArch64::MVNIv4s_msl : AArch64::MOVIv4s_msl;
5494	} else {
5495	Op = Inv ? AArch64::MVNIv2s_msl : AArch64::MOVIv2s_msl;
5496	}
5497
5498	uint64_t Val = Bits.zextOrTrunc(width: `64`).getZExtValue();
5499	uint64_t Shift;
5500
5501	if (AArch64_AM::isAdvSIMDModImmType7(Imm: Val)) {
5502	Val = AArch64_AM::encodeAdvSIMDModImmType7(Imm: Val);
5503	Shift = `264`;
5504	} else if (AArch64_AM::isAdvSIMDModImmType8(Imm: Val)) {
5505	Val = AArch64_AM::encodeAdvSIMDModImmType8(Imm: Val);
5506	Shift = `272`;
5507	} else
5508	return nullptr;
5509
5510	auto Mov = Builder.buildInstr(Opc: Op, DstOps: {Dst}, SrcOps: {}).addImm(Val).addImm(Val: Shift);
5511	constrainSelectedInstRegOperands(I&: *Mov, TII, TRI, RBI);
5512	return &*Mov;
5513	}
5514
5515	MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImmFP(
5516	Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
5517
5518	unsigned int Op;
5519	bool IsWide = false;
5520	if (DstSize == `128`) {
5521	if (Bits.getHiBits(numBits: `64`) != Bits.getLoBits(numBits: `64`))
5522	return nullptr;
5523	Op = AArch64::FMOVv4f32_ns;
5524	IsWide = true;
5525	} else {
5526	Op = AArch64::FMOVv2f32_ns;
5527	}
5528
5529	uint64_t Val = Bits.zextOrTrunc(width: `64`).getZExtValue();
5530
5531	if (AArch64_AM::isAdvSIMDModImmType11(Imm: Val)) {
5532	Val = AArch64_AM::encodeAdvSIMDModImmType11(Imm: Val);
5533	} else if (IsWide && AArch64_AM::isAdvSIMDModImmType12(Imm: Val)) {
5534	Val = AArch64_AM::encodeAdvSIMDModImmType12(Imm: Val);
5535	Op = AArch64::FMOVv2f64_ns;
5536	} else
5537	return nullptr;
5538
5539	auto Mov = Builder.buildInstr(Opc: Op, DstOps: {Dst}, SrcOps: {}).addImm(Val);
5540	constrainSelectedInstRegOperands(I&: *Mov, TII, TRI, RBI);
5541	return &*Mov;
5542	}
5543
5544	bool AArch64InstructionSelector::selectIndexedExtLoad(
5545	MachineInstr &MI, MachineRegisterInfo &MRI) {
5546	auto &ExtLd = cast<GIndexedAnyExtLoad>(Val&: MI);
5547	Register Dst = ExtLd.getDstReg();
5548	Register WriteBack = ExtLd.getWritebackReg();
5549	Register Base = ExtLd.getBaseReg();
5550	Register Offset = ExtLd.getOffsetReg();
5551	LLT Ty = MRI.getType(Reg: Dst);
5552	assert(Ty.getSizeInBits() <= `64`); // Only for scalar GPRs.
5553	unsigned MemSizeBits = ExtLd.getMMO().getMemoryType().getSizeInBits();
5554	bool IsPre = ExtLd.isPre();
5555	bool IsSExt = isa<GIndexedSExtLoad>(Val: ExtLd);
5556	unsigned InsertIntoSubReg = `0`;
5557	bool IsDst64 = Ty.getSizeInBits() == `64`;
5558
5559	// ZExt/SExt should be on gpr but can handle extload and zextload of fpr, so
5560	// long as they are scalar.
5561	bool IsFPR = RBI.getRegBank(Reg: Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID;
5562	if ((IsSExt && IsFPR) \|\| Ty.isVector())
5563	return false;
5564
5565	unsigned Opc = `0`;
5566	LLT NewLdDstTy;
5567	LLT s32 = LLT::scalar(SizeInBits: `32`);
5568	LLT s64 = LLT::scalar(SizeInBits: `64`);
5569
5570	if (MemSizeBits == `8`) {
5571	if (IsSExt) {
5572	if (IsDst64)
5573	Opc = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
5574	else
5575	Opc = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
5576	NewLdDstTy = IsDst64 ? s64 : s32;
5577	} else if (IsFPR) {
5578	Opc = IsPre ? AArch64::LDRBpre : AArch64::LDRBpost;
5579	InsertIntoSubReg = AArch64::bsub;
5580	NewLdDstTy = LLT::scalar(SizeInBits: MemSizeBits);
5581	} else {
5582	Opc = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
5583	InsertIntoSubReg = IsDst64 ? AArch64::sub_32 : `0`;
5584	NewLdDstTy = s32;
5585	}
5586	} else if (MemSizeBits == `16`) {
5587	if (IsSExt) {
5588	if (IsDst64)
5589	Opc = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
5590	else
5591	Opc = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
5592	NewLdDstTy = IsDst64 ? s64 : s32;
5593	} else if (IsFPR) {
5594	Opc = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
5595	InsertIntoSubReg = AArch64::hsub;
5596	NewLdDstTy = LLT::scalar(SizeInBits: MemSizeBits);
5597	} else {
5598	Opc = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
5599	InsertIntoSubReg = IsDst64 ? AArch64::sub_32 : `0`;
5600	NewLdDstTy = s32;
5601	}
5602	} else if (MemSizeBits == `32`) {
5603	if (IsSExt) {
5604	Opc = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
5605	NewLdDstTy = s64;
5606	} else if (IsFPR) {
5607	Opc = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
5608	InsertIntoSubReg = AArch64::ssub;
5609	NewLdDstTy = LLT::scalar(SizeInBits: MemSizeBits);
5610	} else {
5611	Opc = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
5612	InsertIntoSubReg = IsDst64 ? AArch64::sub_32 : `0`;
5613	NewLdDstTy = s32;
5614	}
5615	} else {
5616	llvm_unreachable("Unexpected size for indexed load");
5617	}
5618
5619	auto Cst = getIConstantVRegVal(VReg: Offset, MRI);
5620	if (!Cst)
5621	return false; // Shouldn't happen, but just in case.
5622
5623	auto LdMI = MIB.buildInstr(Opc, DstOps: {WriteBack, NewLdDstTy}, SrcOps: {Base})
5624	.addImm(Val: Cst ->getSExtValue());
5625	LdMI.cloneMemRefs(OtherMI: ExtLd);
5626	constrainSelectedInstRegOperands(I&: *LdMI, TII, TRI, RBI);
5627	// Make sure to select the load with the MemTy as the dest type, and then
5628	// insert into a larger reg if needed.
5629	if (InsertIntoSubReg) {
5630	// Generate a SUBREG_TO_REG.
5631	auto SubToReg = MIB.buildInstr(Opc: TargetOpcode::SUBREG_TO_REG, DstOps: {Dst}, SrcOps: {})
5632	.addImm(Val: `0`)
5633	.addUse(RegNo: LdMI.getReg(Idx: `1`))
5634	.addImm(Val: InsertIntoSubReg);
5635	RBI.constrainGenericRegister(
5636	Reg: SubToReg.getReg(Idx: `0`),
5637	RC: *getRegClassForTypeOnBank(Ty: MRI.getType(Reg: Dst),
5638	RB: *RBI.getRegBank(Reg: Dst, MRI, TRI)),
5639	MRI);
5640	} else {
5641	auto Copy = MIB.buildCopy(Res: Dst, Op: LdMI.getReg(Idx: `1`));
5642	selectCopy(I&: *Copy, TII, MRI, TRI, RBI);
5643	}
5644	MI.eraseFromParent();
5645
5646	return true;
5647	}
5648
5649	bool AArch64InstructionSelector::selectIndexedLoad(MachineInstr &MI,
5650	MachineRegisterInfo &MRI) {
5651	auto &Ld = cast<GIndexedLoad>(Val&: MI);
5652	Register Dst = Ld.getDstReg();
5653	Register WriteBack = Ld.getWritebackReg();
5654	Register Base = Ld.getBaseReg();
5655	Register Offset = Ld.getOffsetReg();
5656	assert(MRI.getType(Dst).getSizeInBits() <= `128` &&
5657	"Unexpected type for indexed load");
5658	unsigned MemSize = Ld.getMMO().getMemoryType().getSizeInBytes();
5659
5660	if (MemSize < MRI.getType(Reg: Dst).getSizeInBytes())
5661	return selectIndexedExtLoad(MI, MRI);
5662
5663	unsigned Opc = `0`;
5664	if (Ld.isPre()) {
5665	static constexpr unsigned GPROpcodes[] = {
5666	AArch64::LDRBBpre, AArch64::LDRHHpre, AArch64::LDRWpre,
5667	AArch64::LDRXpre};
5668	static constexpr unsigned FPROpcodes[] = {
5669	AArch64::LDRBpre, AArch64::LDRHpre, AArch64::LDRSpre, AArch64::LDRDpre,
5670	AArch64::LDRQpre};
5671	if (RBI.getRegBank(Reg: Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5672	Opc = FPROpcodes[Log2_32(Value: MemSize)];
5673	else
5674	Opc = GPROpcodes[Log2_32(Value: MemSize)];
5675	} else {
5676	static constexpr unsigned GPROpcodes[] = {
5677	AArch64::LDRBBpost, AArch64::LDRHHpost, AArch64::LDRWpost,
5678	AArch64::LDRXpost};
5679	static constexpr unsigned FPROpcodes[] = {
5680	AArch64::LDRBpost, AArch64::LDRHpost, AArch64::LDRSpost,
5681	AArch64::LDRDpost, AArch64::LDRQpost};
5682	if (RBI.getRegBank(Reg: Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5683	Opc = FPROpcodes[Log2_32(Value: MemSize)];
5684	else
5685	Opc = GPROpcodes[Log2_32(Value: MemSize)];
5686	}
5687	auto Cst = getIConstantVRegVal(VReg: Offset, MRI);
5688	if (!Cst)
5689	return false; // Shouldn't happen, but just in case.
5690	auto LdMI =
5691	MIB.buildInstr(Opc, DstOps: {WriteBack, Dst}, SrcOps: {Base}).addImm(Val: Cst ->getSExtValue());
5692	LdMI.cloneMemRefs(OtherMI: Ld);
5693	constrainSelectedInstRegOperands(I&: *LdMI, TII, TRI, RBI);
5694	MI.eraseFromParent();
5695	return true;
5696	}
5697
5698	bool AArch64InstructionSelector::selectIndexedStore(GIndexedStore &I,
5699	MachineRegisterInfo &MRI) {
5700	Register Dst = I.getWritebackReg();
5701	Register Val = I.getValueReg();
5702	Register Base = I.getBaseReg();
5703	Register Offset = I.getOffsetReg();
5704	LLT ValTy = MRI.getType(Reg: Val);
5705	assert(ValTy.getSizeInBits() <= `128` && "Unexpected type for indexed store");
5706
5707	unsigned Opc = `0`;
5708	if (I.isPre()) {
5709	static constexpr unsigned GPROpcodes[] = {
5710	AArch64::STRBBpre, AArch64::STRHHpre, AArch64::STRWpre,
5711	AArch64::STRXpre};
5712	static constexpr unsigned FPROpcodes[] = {
5713	AArch64::STRBpre, AArch64::STRHpre, AArch64::STRSpre, AArch64::STRDpre,
5714	AArch64::STRQpre};
5715
5716	if (RBI.getRegBank(Reg: Val, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5717	Opc = FPROpcodes[Log2_32(Value: ValTy.getSizeInBytes())];
5718	else
5719	Opc = GPROpcodes[Log2_32(Value: ValTy.getSizeInBytes())];
5720	} else {
5721	static constexpr unsigned GPROpcodes[] = {
5722	AArch64::STRBBpost, AArch64::STRHHpost, AArch64::STRWpost,
5723	AArch64::STRXpost};
5724	static constexpr unsigned FPROpcodes[] = {
5725	AArch64::STRBpost, AArch64::STRHpost, AArch64::STRSpost,
5726	AArch64::STRDpost, AArch64::STRQpost};
5727
5728	if (RBI.getRegBank(Reg: Val, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5729	Opc = FPROpcodes[Log2_32(Value: ValTy.getSizeInBytes())];
5730	else
5731	Opc = GPROpcodes[Log2_32(Value: ValTy.getSizeInBytes())];
5732	}
5733
5734	auto Cst = getIConstantVRegVal(VReg: Offset, MRI);
5735	if (!Cst)
5736	return false; // Shouldn't happen, but just in case.
5737	auto Str =
5738	MIB.buildInstr(Opc, DstOps: {Dst}, SrcOps: {Val, Base}).addImm(Val: Cst ->getSExtValue());
5739	Str.cloneMemRefs(OtherMI: I);
5740	constrainSelectedInstRegOperands(I&: *Str, TII, TRI, RBI);
5741	I.eraseFromParent();
5742	return true;
5743	}
5744
5745	MachineInstr *
5746	AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
5747	MachineIRBuilder &MIRBuilder,
5748	MachineRegisterInfo &MRI) {
5749	LLT DstTy = MRI.getType(Reg: Dst);
5750	unsigned DstSize = DstTy.getSizeInBits();
5751	if (CV->isNullValue()) {
5752	if (DstSize == `128`) {
5753	auto Mov =
5754	MIRBuilder.buildInstr(Opc: AArch64::MOVIv2d_ns, DstOps: {Dst}, SrcOps: {}).addImm(Val: `0`);
5755	constrainSelectedInstRegOperands(I&: *Mov, TII, TRI, RBI);
5756	return &*Mov;
5757	}
5758
5759	if (DstSize == `64`) {
5760	auto Mov =
5761	MIRBuilder
5762	.buildInstr(Opc: AArch64::MOVIv2d_ns, DstOps: {&AArch64::FPR128RegClass}, SrcOps: {})
5763	.addImm(Val: `0`);
5764	auto Copy = MIRBuilder.buildInstr(Opc: TargetOpcode::COPY, DstOps: {Dst}, SrcOps: {})
5765	.addReg(RegNo: Mov.getReg(Idx: `0`), flags: `0`, SubReg: AArch64::dsub);
5766	RBI.constrainGenericRegister(Reg: Dst, RC: AArch64::FPR64RegClass, MRI);
5767	return &*Copy;
5768	}
5769	}
5770
5771	if (Constant *SplatValue = CV->getSplatValue()) {
5772	APInt SplatValueAsInt =
5773	isa<ConstantFP>(Val: SplatValue)
5774	? cast<ConstantFP>(Val: SplatValue)->getValueAPF().bitcastToAPInt()
5775	: SplatValue->getUniqueInteger();
5776	APInt DefBits = APInt::getSplat(
5777	NewLen: DstSize, V: SplatValueAsInt.trunc(width: DstTy.getScalarSizeInBits()));
5778	auto TryMOVIWithBits = [&](APInt DefBits) -> MachineInstr * {
5779	MachineInstr *NewOp;
5780	bool Inv = false;
5781	if ((NewOp = tryAdvSIMDModImm64(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder)) \|\|
5782	(NewOp =
5783	tryAdvSIMDModImm32(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder, Inv)) \|\|
5784	(NewOp =
5785	tryAdvSIMDModImm321s(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder, Inv)) \|\|
5786	(NewOp =
5787	tryAdvSIMDModImm16(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder, Inv)) \|\|
5788	(NewOp = tryAdvSIMDModImm8(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder)) \|\|
5789	(NewOp = tryAdvSIMDModImmFP(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder)))
5790	return NewOp;
5791
5792	DefBits = ~DefBits;
5793	Inv = true;
5794	if ((NewOp =
5795	tryAdvSIMDModImm32(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder, Inv)) \|\|
5796	(NewOp =
5797	tryAdvSIMDModImm321s(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder, Inv)) \|\|
5798	(NewOp = tryAdvSIMDModImm16(Dst, DstSize, Bits: DefBits, Builder&: MIRBuilder, Inv)))
5799	return NewOp;
5800	return nullptr;
5801	};
5802
5803	if (auto *NewOp = TryMOVIWithBits (DefBits))
5804	return NewOp;
5805
5806	// See if a fneg of the constant can be materialized with a MOVI, etc
5807	auto TryWithFNeg = [&](APInt DefBits, int NumBits,
5808	unsigned NegOpc) -> MachineInstr * {
5809	// FNegate each sub-element of the constant
5810	APInt Neg = APInt::getHighBitsSet(numBits: NumBits, hiBitsSet: `1`).zext(width: DstSize);
5811	APInt NegBits(DstSize, `0`);
5812	unsigned NumElts = DstSize / NumBits;
5813	for (unsigned i = `0`; i < NumElts; i++)
5814	NegBits \|= Neg << (NumBits * i);
5815	NegBits = DefBits ^ NegBits;
5816
5817	// Try to create the new constants with MOVI, and if so generate a fneg
5818	// for it.
5819	if (auto *NewOp = TryMOVIWithBits (NegBits)) {
5820	Register NewDst = MRI.createVirtualRegister(RegClass: &AArch64::FPR128RegClass);
5821	NewOp->getOperand(i: `0`).setReg(NewDst);
5822	return MIRBuilder.buildInstr(Opc: NegOpc, DstOps: {Dst}, SrcOps: {NewDst});
5823	}
5824	return nullptr;
5825	};
5826	MachineInstr *R;
5827	if ((R = TryWithFNeg (DefBits, `32`, AArch64::FNEGv4f32)) \|\|
5828	(R = TryWithFNeg (DefBits, `64`, AArch64::FNEGv2f64)) \|\|
5829	(STI.hasFullFP16() &&
5830	(R = TryWithFNeg (DefBits, `16`, AArch64::FNEGv8f16))))
5831	return R;
5832	}
5833
5834	auto *CPLoad = emitLoadFromConstantPool(CPVal: CV, MIRBuilder);
5835	if (!CPLoad) {
5836	LLVM_DEBUG(dbgs() << "Could not generate cp load for constant vector!");
5837	return nullptr;
5838	}
5839
5840	auto Copy = MIRBuilder.buildCopy(Res: Dst, Op: CPLoad->getOperand(i: `0`));
5841	RBI.constrainGenericRegister(
5842	Reg: Dst, RC: *MRI.getRegClass(Reg: CPLoad->getOperand(i: `0`).getReg()), MRI);
5843	return &*Copy;
5844	}
5845
5846	bool AArch64InstructionSelector::tryOptConstantBuildVec(
5847	MachineInstr &I, LLT DstTy, MachineRegisterInfo &MRI) {
5848	assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
5849	unsigned DstSize = DstTy.getSizeInBits();
5850	assert(DstSize <= `128` && "Unexpected build_vec type!");
5851	if (DstSize < `32`)
5852	return false;
5853	// Check if we're building a constant vector, in which case we want to
5854	// generate a constant pool load instead of a vector insert sequence.
5855	SmallVector<Constant *, `16`> Csts;
5856	for (unsigned Idx = `1`; Idx < I.getNumOperands(); ++Idx) {
5857	// Try to find G_CONSTANT or G_FCONSTANT
5858	auto *OpMI =
5859	getOpcodeDef(Opcode: TargetOpcode::G_CONSTANT, Reg: I.getOperand(i: Idx).getReg(), MRI);
5860	if (OpMI)
5861	Csts.emplace_back(
5862	Args: const_cast<ConstantInt *>(OpMI->getOperand(i: `1`).getCImm()));
5863	else if ((OpMI = getOpcodeDef(Opcode: TargetOpcode::G_FCONSTANT,
5864	Reg: I.getOperand(i: Idx).getReg(), MRI)))
5865	Csts.emplace_back(
5866	Args: const_cast<ConstantFP *>(OpMI->getOperand(i: `1`).getFPImm()));
5867	else
5868	return false;
5869	}
5870	Constant *CV = ConstantVector::get(V: Csts);
5871	if (!emitConstantVector(Dst: I.getOperand(i: `0`).getReg(), CV, MIRBuilder&: MIB, MRI))
5872	return false;
5873	I.eraseFromParent();
5874	return true;
5875	}
5876
5877	bool AArch64InstructionSelector::tryOptBuildVecToSubregToReg(
5878	MachineInstr &I, MachineRegisterInfo &MRI) {
5879	// Given:
5880	// %vec = G_BUILD_VECTOR %elt, %undef, %undef, ... %undef
5881	//
5882	// Select the G_BUILD_VECTOR as a SUBREG_TO_REG from %elt.
5883	Register Dst = I.getOperand(i: `0`).getReg();
5884	Register EltReg = I.getOperand(i: `1`).getReg();
5885	LLT EltTy = MRI.getType(Reg: EltReg);
5886	// If the index isn't on the same bank as its elements, then this can't be a
5887	// SUBREG_TO_REG.
5888	const RegisterBank &EltRB = *RBI.getRegBank(Reg: EltReg, MRI, TRI);
5889	const RegisterBank &DstRB = *RBI.getRegBank(Reg: Dst, MRI, TRI);
5890	if (EltRB != DstRB)
5891	return false;
5892	if (any_of(Range: drop_begin(RangeOrContainer: I.operands(), N: `2`), P: [&MRI](const MachineOperand &Op) {
5893	return !getOpcodeDef(Opcode: TargetOpcode::G_IMPLICIT_DEF, Reg: Op.getReg(), MRI);
5894	}))
5895	return false;
5896	unsigned SubReg;
5897	const TargetRegisterClass *EltRC = getRegClassForTypeOnBank(Ty: EltTy, RB: EltRB);
5898	if (!EltRC)
5899	return false;
5900	const TargetRegisterClass *DstRC =
5901	getRegClassForTypeOnBank(Ty: MRI.getType(Reg: Dst), RB: DstRB);
5902	if (!DstRC)
5903	return false;
5904	if (!getSubRegForClass(RC: EltRC, TRI, SubReg))
5905	return false;
5906	auto SubregToReg = MIB.buildInstr(Opc: AArch64::SUBREG_TO_REG, DstOps: {Dst}, SrcOps: {})
5907	.addImm(Val: `0`)
5908	.addUse(RegNo: EltReg)
5909	.addImm(Val: SubReg);
5910	I.eraseFromParent();
5911	constrainSelectedInstRegOperands(I&: *SubregToReg, TII, TRI, RBI);
5912	return RBI.constrainGenericRegister(Reg: Dst, RC: *DstRC, MRI);
5913	}
5914
5915	bool AArch64InstructionSelector::selectBuildVector(MachineInstr &I,
5916	MachineRegisterInfo &MRI) {
5917	assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
5918	// Until we port more of the optimized selections, for now just use a vector
5919	// insert sequence.
5920	const LLT DstTy = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
5921	const LLT EltTy = MRI.getType(Reg: I.getOperand(i: `1`).getReg());
5922	unsigned EltSize = EltTy.getSizeInBits();
5923
5924	if (tryOptConstantBuildVec(I, DstTy, MRI))
5925	return true;
5926	if (tryOptBuildVecToSubregToReg(I, MRI))
5927	return true;
5928
5929	if (EltSize != `8` && EltSize != `16` && EltSize != `32` && EltSize != `64`)
5930	return false; // Don't support all element types yet.
5931	const RegisterBank &RB = *RBI.getRegBank(Reg: I.getOperand(i: `1`).getReg(), MRI, TRI);
5932
5933	const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
5934	MachineInstr *ScalarToVec =
5935	emitScalarToVector(EltSize: DstTy.getElementType().getSizeInBits(), DstRC,
5936	Scalar: I.getOperand(i: `1`).getReg(), MIRBuilder&: MIB);
5937	if (!ScalarToVec)
5938	return false;
5939
5940	Register DstVec = ScalarToVec->getOperand(i: `0`).getReg();
5941	unsigned DstSize = DstTy.getSizeInBits();
5942
5943	// Keep track of the last MI we inserted. Later on, we might be able to save
5944	// a copy using it.
5945	MachineInstr *PrevMI = ScalarToVec;
5946	for (unsigned i = `2`, e = DstSize / EltSize + `1`; i < e; ++i) {
5947	// Note that if we don't do a subregister copy, we can end up making an
5948	// extra register.
5949	Register OpReg = I.getOperand(i).getReg();
5950	// Do not emit inserts for undefs
5951	if (!getOpcodeDef<GImplicitDef>(Reg: OpReg, MRI)) {
5952	PrevMI = &*emitLaneInsert(DstReg: std::nullopt, SrcReg: DstVec, EltReg: OpReg, LaneIdx: i - `1`, RB, MIRBuilder&: MIB);
5953	DstVec = PrevMI->getOperand(i: `0`).getReg();
5954	}
5955	}
5956
5957	// If DstTy's size in bits is less than 128, then emit a subregister copy
5958	// from DstVec to the last register we've defined.
5959	if (DstSize < `128`) {
5960	// Force this to be FPR using the destination vector.
5961	const TargetRegisterClass *RC =
5962	getRegClassForTypeOnBank(Ty: DstTy, RB: *RBI.getRegBank(Reg: DstVec, MRI, TRI));
5963	if (!RC)
5964	return false;
5965	if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
5966	LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
5967	return false;
5968	}
5969
5970	unsigned SubReg = `0`;
5971	if (!getSubRegForClass(RC, TRI, SubReg))
5972	return false;
5973	if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
5974	LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize
5975	<< "\n");
5976	return false;
5977	}
5978
5979	Register Reg = MRI.createVirtualRegister(RegClass: RC);
5980	Register DstReg = I.getOperand(i: `0`).getReg();
5981
5982	MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {DstReg}, SrcOps: {}).addReg(RegNo: DstVec, flags: `0`, SubReg);
5983	MachineOperand &RegOp = I.getOperand(i: `1`);
5984	RegOp.setReg(Reg);
5985	RBI.constrainGenericRegister(Reg: DstReg, RC: *RC, MRI);
5986	} else {
5987	// We either have a vector with all elements (except the first one) undef or
5988	// at least one non-undef non-first element. In the first case, we need to
5989	// constrain the output register ourselves as we may have generated an
5990	// INSERT_SUBREG operation which is a generic operation for which the
5991	// output regclass cannot be automatically chosen.
5992	//
5993	// In the second case, there is no need to do this as it may generate an
5994	// instruction like INSvi32gpr where the regclass can be automatically
5995	// chosen.
5996	//
5997	// Also, we save a copy by re-using the destination register on the final
5998	// insert.
5999	PrevMI->getOperand(i: `0`).setReg(I.getOperand(i: `0`).getReg());
6000	constrainSelectedInstRegOperands(I&: *PrevMI, TII, TRI, RBI);
6001
6002	Register DstReg = PrevMI->getOperand(i: `0`).getReg();
6003	if (PrevMI == ScalarToVec && DstReg.isVirtual()) {
6004	const TargetRegisterClass *RC =
6005	getRegClassForTypeOnBank(Ty: DstTy, RB: *RBI.getRegBank(Reg: DstVec, MRI, TRI));
6006	RBI.constrainGenericRegister(Reg: DstReg, RC: *RC, MRI);
6007	}
6008	}
6009
6010	I.eraseFromParent();
6011	return true;
6012	}
6013
6014	bool AArch64InstructionSelector::selectVectorLoadIntrinsic(unsigned Opc,
6015	unsigned NumVecs,
6016	MachineInstr &I) {
6017	assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
6018	assert(Opc && "Expected an opcode?");
6019	assert(NumVecs > `1` && NumVecs < `5` && "Only support 2, 3, or 4 vectors");
6020	auto &MRI = *MIB.getMRI();
6021	LLT Ty = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
6022	unsigned Size = Ty.getSizeInBits();
6023	assert((Size == `64` \|\| Size == `128`) &&
6024	"Destination must be 64 bits or 128 bits?");
6025	unsigned SubReg = Size == `64` ? AArch64::dsub0 : AArch64::qsub0;
6026	auto Ptr = I.getOperand(i: I.getNumOperands() - `1`).getReg();
6027	assert(MRI.getType(Ptr).isPointer() && "Expected a pointer type?");
6028	auto Load = MIB.buildInstr(Opc, DstOps: {Ty}, SrcOps: {Ptr});
6029	Load.cloneMemRefs(OtherMI: I);
6030	constrainSelectedInstRegOperands(I&: *Load, TII, TRI, RBI);
6031	Register SelectedLoadDst = Load ->getOperand(i: `0`).getReg();
6032	for (unsigned Idx = `0`; Idx < NumVecs; ++Idx) {
6033	auto Vec = MIB.buildInstr(Opc: TargetOpcode::COPY, DstOps: {I.getOperand(i: Idx)}, SrcOps: {})
6034	.addReg(RegNo: SelectedLoadDst, flags: `0`, SubReg: SubReg + Idx);
6035	// Emit the subreg copies and immediately select them.
6036	// FIXME: We should refactor our copy code into an emitCopy helper and
6037	// clean up uses of this pattern elsewhere in the selector.
6038	selectCopy(I&: *Vec, TII, MRI, TRI, RBI);
6039	}
6040	return true;
6041	}
6042
6043	bool AArch64InstructionSelector::selectVectorLoadLaneIntrinsic(
6044	unsigned Opc, unsigned NumVecs, MachineInstr &I) {
6045	assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
6046	assert(Opc && "Expected an opcode?");
6047	assert(NumVecs > `1` && NumVecs < `5` && "Only support 2, 3, or 4 vectors");
6048	auto &MRI = *MIB.getMRI();
6049	LLT Ty = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
6050	bool Narrow = Ty.getSizeInBits() == `64`;
6051
6052	auto FirstSrcRegIt = I.operands_begin() + NumVecs + `1`;
6053	SmallVector<Register, `4`> Regs(NumVecs);
6054	std::transform(first: FirstSrcRegIt, last: FirstSrcRegIt + NumVecs, result: Regs.begin(),
6055	unary_op: [](auto MO) { return MO.getReg(); });
6056
6057	if (Narrow) {
6058	transform(Range&: Regs, d_first: Regs.begin(), F: [this](Register Reg) {
6059	return emitScalarToVector(EltSize: `64`, DstRC: &AArch64::FPR128RegClass, Scalar: Reg, MIRBuilder&: MIB)
6060	->getOperand(i: `0`)
6061	.getReg();
6062	});
6063	Ty = Ty.multiplyElements(Factor: `2`);
6064	}
6065
6066	Register Tuple = createQTuple(Regs, MIB);
6067	auto LaneNo = getIConstantVRegVal(VReg: (FirstSrcRegIt + NumVecs)->getReg(), MRI);
6068	if (!LaneNo)
6069	return false;
6070
6071	Register Ptr = (FirstSrcRegIt + NumVecs + `1`)->getReg();
6072	auto Load = MIB.buildInstr(Opc, DstOps: {Ty}, SrcOps: {})
6073	.addReg(RegNo: Tuple)
6074	.addImm(Val: LaneNo ->getZExtValue())
6075	.addReg(RegNo: Ptr);
6076	Load.cloneMemRefs(OtherMI: I);
6077	constrainSelectedInstRegOperands(I&: *Load, TII, TRI, RBI);
6078	Register SelectedLoadDst = Load ->getOperand(i: `0`).getReg();
6079	unsigned SubReg = AArch64::qsub0;
6080	for (unsigned Idx = `0`; Idx < NumVecs; ++Idx) {
6081	auto Vec = MIB.buildInstr(Opc: TargetOpcode::COPY,
6082	DstOps: {Narrow ? DstOp (&AArch64::FPR128RegClass)
6083	: DstOp (I.getOperand(i: Idx).getReg())},
6084	SrcOps: {})
6085	.addReg(RegNo: SelectedLoadDst, flags: `0`, SubReg: SubReg + Idx);
6086	Register WideReg = Vec.getReg(Idx: `0`);
6087	// Emit the subreg copies and immediately select them.
6088	selectCopy(I&: *Vec, TII, MRI, TRI, RBI);
6089	if (Narrow &&
6090	!emitNarrowVector(DstReg: I.getOperand(i: Idx).getReg(), SrcReg: WideReg, MIB, MRI))
6091	return false;
6092	}
6093	return true;
6094	}
6095
6096	void AArch64InstructionSelector::selectVectorStoreIntrinsic(MachineInstr &I,
6097	unsigned NumVecs,
6098	unsigned Opc) {
6099	MachineRegisterInfo &MRI = I.getParent()->getParent()->getRegInfo();
6100	LLT Ty = MRI.getType(Reg: I.getOperand(i: `1`).getReg());
6101	Register Ptr = I.getOperand(i: `1` + NumVecs).getReg();
6102
6103	SmallVector<Register, `2`> Regs(NumVecs);
6104	std::transform(first: I.operands_begin() + `1`, last: I.operands_begin() + `1` + NumVecs,
6105	result: Regs.begin(), unary_op: [](auto MO) { return MO.getReg(); });
6106
6107	Register Tuple = Ty.getSizeInBits() == `128` ? createQTuple(Regs, MIB)
6108	: createDTuple(Regs, MIB);
6109	auto Store = MIB.buildInstr(Opc, DstOps: {}, SrcOps: {Tuple, Ptr});
6110	Store.cloneMemRefs(OtherMI: I);
6111	constrainSelectedInstRegOperands(I&: *Store, TII, TRI, RBI);
6112	}
6113
6114	bool AArch64InstructionSelector::selectVectorStoreLaneIntrinsic(
6115	MachineInstr &I, unsigned NumVecs, unsigned Opc) {
6116	MachineRegisterInfo &MRI = I.getParent()->getParent()->getRegInfo();
6117	LLT Ty = MRI.getType(Reg: I.getOperand(i: `1`).getReg());
6118	bool Narrow = Ty.getSizeInBits() == `64`;
6119
6120	SmallVector<Register, `2`> Regs(NumVecs);
6121	std::transform(first: I.operands_begin() + `1`, last: I.operands_begin() + `1` + NumVecs,
6122	result: Regs.begin(), unary_op: [](auto MO) { return MO.getReg(); });
6123
6124	if (Narrow)
6125	transform(Range&: Regs, d_first: Regs.begin(), F: [this](Register Reg) {
6126	return emitScalarToVector(EltSize: `64`, DstRC: &AArch64::FPR128RegClass, Scalar: Reg, MIRBuilder&: MIB)
6127	->getOperand(i: `0`)
6128	.getReg();
6129	});
6130
6131	Register Tuple = createQTuple(Regs, MIB);
6132
6133	auto LaneNo = getIConstantVRegVal(VReg: I.getOperand(i: `1` + NumVecs).getReg(), MRI);
6134	if (!LaneNo)
6135	return false;
6136	Register Ptr = I.getOperand(i: `1` + NumVecs + `1`).getReg();
6137	auto Store = MIB.buildInstr(Opc, DstOps: {}, SrcOps: {})
6138	.addReg(RegNo: Tuple)
6139	.addImm(Val: LaneNo ->getZExtValue())
6140	.addReg(RegNo: Ptr);
6141	Store.cloneMemRefs(OtherMI: I);
6142	constrainSelectedInstRegOperands(I&: *Store, TII, TRI, RBI);
6143	return true;
6144	}
6145
6146	bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
6147	MachineInstr &I, MachineRegisterInfo &MRI) {
6148	// Find the intrinsic ID.
6149	unsigned IntrinID = cast<GIntrinsic>(Val&: I).getIntrinsicID();
6150
6151	const LLT S8 = LLT::scalar(SizeInBits: `8`);
6152	const LLT S16 = LLT::scalar(SizeInBits: `16`);
6153	const LLT S32 = LLT::scalar(SizeInBits: `32`);
6154	const LLT S64 = LLT::scalar(SizeInBits: `64`);
6155	const LLT P0 = LLT::pointer(AddressSpace: `0`, SizeInBits: `64`);
6156	// Select the instruction.
6157	switch (IntrinID) {
6158	default:
6159	return false;
6160	case Intrinsic::aarch64_ldxp:
6161	case Intrinsic::aarch64_ldaxp: {
6162	auto NewI = MIB.buildInstr(
6163	Opc: IntrinID == Intrinsic::aarch64_ldxp ? AArch64::LDXPX : AArch64::LDAXPX,
6164	DstOps: {I.getOperand(i: `0`).getReg(), I.getOperand(i: `1`).getReg()},
6165	SrcOps: {I.getOperand(i: `3`)});
6166	NewI.cloneMemRefs(OtherMI: I);
6167	constrainSelectedInstRegOperands(I&: *NewI, TII, TRI, RBI);
6168	break;
6169	}
6170	case Intrinsic::aarch64_neon_ld1x2: {
6171	LLT Ty = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
6172	unsigned Opc = `0`;
6173	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8))
6174	Opc = AArch64::LD1Twov8b;
6175	else if (Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6176	Opc = AArch64::LD1Twov16b;
6177	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16))
6178	Opc = AArch64::LD1Twov4h;
6179	else if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6180	Opc = AArch64::LD1Twov8h;
6181	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32))
6182	Opc = AArch64::LD1Twov2s;
6183	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6184	Opc = AArch64::LD1Twov4s;
6185	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\| Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0))
6186	Opc = AArch64::LD1Twov2d;
6187	else if (Ty == S64 \|\| Ty == P0)
6188	Opc = AArch64::LD1Twov1d;
6189	else
6190	llvm_unreachable("Unexpected type for ld1x2!");
6191	selectVectorLoadIntrinsic(Opc, NumVecs: `2`, I);
6192	break;
6193	}
6194	case Intrinsic::aarch64_neon_ld1x3: {
6195	LLT Ty = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
6196	unsigned Opc = `0`;
6197	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8))
6198	Opc = AArch64::LD1Threev8b;
6199	else if (Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6200	Opc = AArch64::LD1Threev16b;
6201	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16))
6202	Opc = AArch64::LD1Threev4h;
6203	else if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6204	Opc = AArch64::LD1Threev8h;
6205	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32))
6206	Opc = AArch64::LD1Threev2s;
6207	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6208	Opc = AArch64::LD1Threev4s;
6209	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\| Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0))
6210	Opc = AArch64::LD1Threev2d;
6211	else if (Ty == S64 \|\| Ty == P0)
6212	Opc = AArch64::LD1Threev1d;
6213	else
6214	llvm_unreachable("Unexpected type for ld1x3!");
6215	selectVectorLoadIntrinsic(Opc, NumVecs: `3`, I);
6216	break;
6217	}
6218	case Intrinsic::aarch64_neon_ld1x4: {
6219	LLT Ty = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
6220	unsigned Opc = `0`;
6221	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8))
6222	Opc = AArch64::LD1Fourv8b;
6223	else if (Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6224	Opc = AArch64::LD1Fourv16b;
6225	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16))
6226	Opc = AArch64::LD1Fourv4h;
6227	else if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6228	Opc = AArch64::LD1Fourv8h;
6229	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32))
6230	Opc = AArch64::LD1Fourv2s;
6231	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6232	Opc = AArch64::LD1Fourv4s;
6233	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\| Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0))
6234	Opc = AArch64::LD1Fourv2d;
6235	else if (Ty == S64 \|\| Ty == P0)
6236	Opc = AArch64::LD1Fourv1d;
6237	else
6238	llvm_unreachable("Unexpected type for ld1x4!");
6239	selectVectorLoadIntrinsic(Opc, NumVecs: `4`, I);
6240	break;
6241	}
6242	case Intrinsic::aarch64_neon_ld2: {
6243	LLT Ty = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
6244	unsigned Opc = `0`;
6245	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8))
6246	Opc = AArch64::LD2Twov8b;
6247	else if (Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6248	Opc = AArch64::LD2Twov16b;
6249	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16))
6250	Opc = AArch64::LD2Twov4h;
6251	else if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6252	Opc = AArch64::LD2Twov8h;
6253	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32))
6254	Opc = AArch64::LD2Twov2s;
6255	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6256	Opc = AArch64::LD2Twov4s;
6257	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\| Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0))
6258	Opc = AArch64::LD2Twov2d;
6259	else if (Ty == S64 \|\| Ty == P0)
6260	Opc = AArch64::LD1Twov1d;
6261	else
6262	llvm_unreachable("Unexpected type for ld2!");
6263	selectVectorLoadIntrinsic(Opc, NumVecs: `2`, I);
6264	break;
6265	}
6266	case Intrinsic::aarch64_neon_ld2lane: {
6267	LLT Ty = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
6268	unsigned Opc;
6269	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8) \|\| Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6270	Opc = AArch64::LD2i8;
6271	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16) \|\| Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6272	Opc = AArch64::LD2i16;
6273	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32) \|\| Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6274	Opc = AArch64::LD2i32;
6275	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\|
6276	Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0) \|\| Ty == S64 \|\| Ty == P0)
6277	Opc = AArch64::LD2i64;
6278	else
6279	llvm_unreachable("Unexpected type for st2lane!");
6280	if (!selectVectorLoadLaneIntrinsic(Opc, NumVecs: `2`, I))
6281	return false;
6282	break;
6283	}
6284	case Intrinsic::aarch64_neon_ld2r: {
6285	LLT Ty = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
6286	unsigned Opc = `0`;
6287	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8))
6288	Opc = AArch64::LD2Rv8b;
6289	else if (Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6290	Opc = AArch64::LD2Rv16b;
6291	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16))
6292	Opc = AArch64::LD2Rv4h;
6293	else if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6294	Opc = AArch64::LD2Rv8h;
6295	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32))
6296	Opc = AArch64::LD2Rv2s;
6297	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6298	Opc = AArch64::LD2Rv4s;
6299	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\| Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0))
6300	Opc = AArch64::LD2Rv2d;
6301	else if (Ty == S64 \|\| Ty == P0)
6302	Opc = AArch64::LD2Rv1d;
6303	else
6304	llvm_unreachable("Unexpected type for ld2r!");
6305	selectVectorLoadIntrinsic(Opc, NumVecs: `2`, I);
6306	break;
6307	}
6308	case Intrinsic::aarch64_neon_ld3: {
6309	LLT Ty = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
6310	unsigned Opc = `0`;
6311	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8))
6312	Opc = AArch64::LD3Threev8b;
6313	else if (Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6314	Opc = AArch64::LD3Threev16b;
6315	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16))
6316	Opc = AArch64::LD3Threev4h;
6317	else if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6318	Opc = AArch64::LD3Threev8h;
6319	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32))
6320	Opc = AArch64::LD3Threev2s;
6321	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6322	Opc = AArch64::LD3Threev4s;
6323	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\| Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0))
6324	Opc = AArch64::LD3Threev2d;
6325	else if (Ty == S64 \|\| Ty == P0)
6326	Opc = AArch64::LD1Threev1d;
6327	else
6328	llvm_unreachable("Unexpected type for ld3!");
6329	selectVectorLoadIntrinsic(Opc, NumVecs: `3`, I);
6330	break;
6331	}
6332	case Intrinsic::aarch64_neon_ld3lane: {
6333	LLT Ty = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
6334	unsigned Opc;
6335	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8) \|\| Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6336	Opc = AArch64::LD3i8;
6337	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16) \|\| Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6338	Opc = AArch64::LD3i16;
6339	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32) \|\| Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6340	Opc = AArch64::LD3i32;
6341	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\|
6342	Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0) \|\| Ty == S64 \|\| Ty == P0)
6343	Opc = AArch64::LD3i64;
6344	else
6345	llvm_unreachable("Unexpected type for st3lane!");
6346	if (!selectVectorLoadLaneIntrinsic(Opc, NumVecs: `3`, I))
6347	return false;
6348	break;
6349	}
6350	case Intrinsic::aarch64_neon_ld3r: {
6351	LLT Ty = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
6352	unsigned Opc = `0`;
6353	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8))
6354	Opc = AArch64::LD3Rv8b;
6355	else if (Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6356	Opc = AArch64::LD3Rv16b;
6357	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16))
6358	Opc = AArch64::LD3Rv4h;
6359	else if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6360	Opc = AArch64::LD3Rv8h;
6361	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32))
6362	Opc = AArch64::LD3Rv2s;
6363	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6364	Opc = AArch64::LD3Rv4s;
6365	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\| Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0))
6366	Opc = AArch64::LD3Rv2d;
6367	else if (Ty == S64 \|\| Ty == P0)
6368	Opc = AArch64::LD3Rv1d;
6369	else
6370	llvm_unreachable("Unexpected type for ld3r!");
6371	selectVectorLoadIntrinsic(Opc, NumVecs: `3`, I);
6372	break;
6373	}
6374	case Intrinsic::aarch64_neon_ld4: {
6375	LLT Ty = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
6376	unsigned Opc = `0`;
6377	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8))
6378	Opc = AArch64::LD4Fourv8b;
6379	else if (Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6380	Opc = AArch64::LD4Fourv16b;
6381	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16))
6382	Opc = AArch64::LD4Fourv4h;
6383	else if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6384	Opc = AArch64::LD4Fourv8h;
6385	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32))
6386	Opc = AArch64::LD4Fourv2s;
6387	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6388	Opc = AArch64::LD4Fourv4s;
6389	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\| Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0))
6390	Opc = AArch64::LD4Fourv2d;
6391	else if (Ty == S64 \|\| Ty == P0)
6392	Opc = AArch64::LD1Fourv1d;
6393	else
6394	llvm_unreachable("Unexpected type for ld4!");
6395	selectVectorLoadIntrinsic(Opc, NumVecs: `4`, I);
6396	break;
6397	}
6398	case Intrinsic::aarch64_neon_ld4lane: {
6399	LLT Ty = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
6400	unsigned Opc;
6401	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8) \|\| Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6402	Opc = AArch64::LD4i8;
6403	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16) \|\| Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6404	Opc = AArch64::LD4i16;
6405	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32) \|\| Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6406	Opc = AArch64::LD4i32;
6407	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\|
6408	Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0) \|\| Ty == S64 \|\| Ty == P0)
6409	Opc = AArch64::LD4i64;
6410	else
6411	llvm_unreachable("Unexpected type for st4lane!");
6412	if (!selectVectorLoadLaneIntrinsic(Opc, NumVecs: `4`, I))
6413	return false;
6414	break;
6415	}
6416	case Intrinsic::aarch64_neon_ld4r: {
6417	LLT Ty = MRI.getType(Reg: I.getOperand(i: `0`).getReg());
6418	unsigned Opc = `0`;
6419	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8))
6420	Opc = AArch64::LD4Rv8b;
6421	else if (Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6422	Opc = AArch64::LD4Rv16b;
6423	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16))
6424	Opc = AArch64::LD4Rv4h;
6425	else if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6426	Opc = AArch64::LD4Rv8h;
6427	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32))
6428	Opc = AArch64::LD4Rv2s;
6429	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6430	Opc = AArch64::LD4Rv4s;
6431	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\| Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0))
6432	Opc = AArch64::LD4Rv2d;
6433	else if (Ty == S64 \|\| Ty == P0)
6434	Opc = AArch64::LD4Rv1d;
6435	else
6436	llvm_unreachable("Unexpected type for ld4r!");
6437	selectVectorLoadIntrinsic(Opc, NumVecs: `4`, I);
6438	break;
6439	}
6440	case Intrinsic::aarch64_neon_st1x2: {
6441	LLT Ty = MRI.getType(Reg: I.getOperand(i: `1`).getReg());
6442	unsigned Opc;
6443	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8))
6444	Opc = AArch64::ST1Twov8b;
6445	else if (Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6446	Opc = AArch64::ST1Twov16b;
6447	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16))
6448	Opc = AArch64::ST1Twov4h;
6449	else if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6450	Opc = AArch64::ST1Twov8h;
6451	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32))
6452	Opc = AArch64::ST1Twov2s;
6453	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6454	Opc = AArch64::ST1Twov4s;
6455	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\| Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0))
6456	Opc = AArch64::ST1Twov2d;
6457	else if (Ty == S64 \|\| Ty == P0)
6458	Opc = AArch64::ST1Twov1d;
6459	else
6460	llvm_unreachable("Unexpected type for st1x2!");
6461	selectVectorStoreIntrinsic(I, NumVecs: `2`, Opc);
6462	break;
6463	}
6464	case Intrinsic::aarch64_neon_st1x3: {
6465	LLT Ty = MRI.getType(Reg: I.getOperand(i: `1`).getReg());
6466	unsigned Opc;
6467	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8))
6468	Opc = AArch64::ST1Threev8b;
6469	else if (Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6470	Opc = AArch64::ST1Threev16b;
6471	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16))
6472	Opc = AArch64::ST1Threev4h;
6473	else if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6474	Opc = AArch64::ST1Threev8h;
6475	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32))
6476	Opc = AArch64::ST1Threev2s;
6477	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6478	Opc = AArch64::ST1Threev4s;
6479	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\| Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0))
6480	Opc = AArch64::ST1Threev2d;
6481	else if (Ty == S64 \|\| Ty == P0)
6482	Opc = AArch64::ST1Threev1d;
6483	else
6484	llvm_unreachable("Unexpected type for st1x3!");
6485	selectVectorStoreIntrinsic(I, NumVecs: `3`, Opc);
6486	break;
6487	}
6488	case Intrinsic::aarch64_neon_st1x4: {
6489	LLT Ty = MRI.getType(Reg: I.getOperand(i: `1`).getReg());
6490	unsigned Opc;
6491	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8))
6492	Opc = AArch64::ST1Fourv8b;
6493	else if (Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6494	Opc = AArch64::ST1Fourv16b;
6495	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16))
6496	Opc = AArch64::ST1Fourv4h;
6497	else if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6498	Opc = AArch64::ST1Fourv8h;
6499	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32))
6500	Opc = AArch64::ST1Fourv2s;
6501	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6502	Opc = AArch64::ST1Fourv4s;
6503	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\| Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0))
6504	Opc = AArch64::ST1Fourv2d;
6505	else if (Ty == S64 \|\| Ty == P0)
6506	Opc = AArch64::ST1Fourv1d;
6507	else
6508	llvm_unreachable("Unexpected type for st1x4!");
6509	selectVectorStoreIntrinsic(I, NumVecs: `4`, Opc);
6510	break;
6511	}
6512	case Intrinsic::aarch64_neon_st2: {
6513	LLT Ty = MRI.getType(Reg: I.getOperand(i: `1`).getReg());
6514	unsigned Opc;
6515	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8))
6516	Opc = AArch64::ST2Twov8b;
6517	else if (Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6518	Opc = AArch64::ST2Twov16b;
6519	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16))
6520	Opc = AArch64::ST2Twov4h;
6521	else if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6522	Opc = AArch64::ST2Twov8h;
6523	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32))
6524	Opc = AArch64::ST2Twov2s;
6525	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6526	Opc = AArch64::ST2Twov4s;
6527	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\| Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0))
6528	Opc = AArch64::ST2Twov2d;
6529	else if (Ty == S64 \|\| Ty == P0)
6530	Opc = AArch64::ST1Twov1d;
6531	else
6532	llvm_unreachable("Unexpected type for st2!");
6533	selectVectorStoreIntrinsic(I, NumVecs: `2`, Opc);
6534	break;
6535	}
6536	case Intrinsic::aarch64_neon_st3: {
6537	LLT Ty = MRI.getType(Reg: I.getOperand(i: `1`).getReg());
6538	unsigned Opc;
6539	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8))
6540	Opc = AArch64::ST3Threev8b;
6541	else if (Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6542	Opc = AArch64::ST3Threev16b;
6543	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16))
6544	Opc = AArch64::ST3Threev4h;
6545	else if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6546	Opc = AArch64::ST3Threev8h;
6547	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32))
6548	Opc = AArch64::ST3Threev2s;
6549	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6550	Opc = AArch64::ST3Threev4s;
6551	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\| Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0))
6552	Opc = AArch64::ST3Threev2d;
6553	else if (Ty == S64 \|\| Ty == P0)
6554	Opc = AArch64::ST1Threev1d;
6555	else
6556	llvm_unreachable("Unexpected type for st3!");
6557	selectVectorStoreIntrinsic(I, NumVecs: `3`, Opc);
6558	break;
6559	}
6560	case Intrinsic::aarch64_neon_st4: {
6561	LLT Ty = MRI.getType(Reg: I.getOperand(i: `1`).getReg());
6562	unsigned Opc;
6563	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8))
6564	Opc = AArch64::ST4Fourv8b;
6565	else if (Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6566	Opc = AArch64::ST4Fourv16b;
6567	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16))
6568	Opc = AArch64::ST4Fourv4h;
6569	else if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6570	Opc = AArch64::ST4Fourv8h;
6571	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32))
6572	Opc = AArch64::ST4Fourv2s;
6573	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6574	Opc = AArch64::ST4Fourv4s;
6575	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\| Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0))
6576	Opc = AArch64::ST4Fourv2d;
6577	else if (Ty == S64 \|\| Ty == P0)
6578	Opc = AArch64::ST1Fourv1d;
6579	else
6580	llvm_unreachable("Unexpected type for st4!");
6581	selectVectorStoreIntrinsic(I, NumVecs: `4`, Opc);
6582	break;
6583	}
6584	case Intrinsic::aarch64_neon_st2lane: {
6585	LLT Ty = MRI.getType(Reg: I.getOperand(i: `1`).getReg());
6586	unsigned Opc;
6587	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8) \|\| Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6588	Opc = AArch64::ST2i8;
6589	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16) \|\| Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6590	Opc = AArch64::ST2i16;
6591	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32) \|\| Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6592	Opc = AArch64::ST2i32;
6593	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\|
6594	Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0) \|\| Ty == S64 \|\| Ty == P0)
6595	Opc = AArch64::ST2i64;
6596	else
6597	llvm_unreachable("Unexpected type for st2lane!");
6598	if (!selectVectorStoreLaneIntrinsic(I, NumVecs: `2`, Opc))
6599	return false;
6600	break;
6601	}
6602	case Intrinsic::aarch64_neon_st3lane: {
6603	LLT Ty = MRI.getType(Reg: I.getOperand(i: `1`).getReg());
6604	unsigned Opc;
6605	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8) \|\| Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6606	Opc = AArch64::ST3i8;
6607	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16) \|\| Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6608	Opc = AArch64::ST3i16;
6609	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32) \|\| Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6610	Opc = AArch64::ST3i32;
6611	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\|
6612	Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0) \|\| Ty == S64 \|\| Ty == P0)
6613	Opc = AArch64::ST3i64;
6614	else
6615	llvm_unreachable("Unexpected type for st3lane!");
6616	if (!selectVectorStoreLaneIntrinsic(I, NumVecs: `3`, Opc))
6617	return false;
6618	break;
6619	}
6620	case Intrinsic::aarch64_neon_st4lane: {
6621	LLT Ty = MRI.getType(Reg: I.getOperand(i: `1`).getReg());
6622	unsigned Opc;
6623	if (Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S8) \|\| Ty == LLT::fixed_vector(NumElements: `16`, ScalarTy: S8))
6624	Opc = AArch64::ST4i8;
6625	else if (Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S16) \|\| Ty == LLT::fixed_vector(NumElements: `8`, ScalarTy: S16))
6626	Opc = AArch64::ST4i16;
6627	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S32) \|\| Ty == LLT::fixed_vector(NumElements: `4`, ScalarTy: S32))
6628	Opc = AArch64::ST4i32;
6629	else if (Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: S64) \|\|
6630	Ty == LLT::fixed_vector(NumElements: `2`, ScalarTy: P0) \|\| Ty == S64 \|\| Ty == P0)
6631	Opc = AArch64::ST4i64;
6632	else
6633	llvm_unreachable("Unexpected type for st4lane!");
6634	if (!selectVectorStoreLaneIntrinsic(I, NumVecs: `4`, Opc))
6635	return false;
6636	break;
6637	}
6638	case Intrinsic::aarch64_mops_memset_tag: {
6639	// Transform
6640	// %dst:gpr(p0) = \
6641	// G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.mops.memset.tag),
6642	// \ %dst:gpr(p0), %val:gpr(s64), %n:gpr(s64)
6643	// where %dst is updated, into
6644	// %Rd:GPR64common, %Rn:GPR64) = \
6645	// MOPSMemorySetTaggingPseudo \
6646	// %Rd:GPR64common, %Rn:GPR64, %Rm:GPR64
6647	// where Rd and Rn are tied.
6648	// It is expected that %val has been extended to s64 in legalization.
6649	// Note that the order of the size/value operands are swapped.
6650
6651	Register DstDef = I.getOperand(i: `0`).getReg();
6652	// I.getOperand(1) is the intrinsic function
6653	Register DstUse = I.getOperand(i: `2`).getReg();
6654	Register ValUse = I.getOperand(i: `3`).getReg();
6655	Register SizeUse = I.getOperand(i: `4`).getReg();
6656
6657	// MOPSMemorySetTaggingPseudo has two defs; the intrinsic call has only one.
6658	// Therefore an additional virtual register is required for the updated size
6659	// operand. This value is not accessible via the semantics of the intrinsic.
6660	Register SizeDef = MRI.createGenericVirtualRegister(Ty: LLT::scalar(SizeInBits: `64`));
6661
6662	auto Memset = MIB.buildInstr(Opc: AArch64::MOPSMemorySetTaggingPseudo,
6663	DstOps: {DstDef, SizeDef}, SrcOps: {DstUse, SizeUse, ValUse});
6664	Memset.cloneMemRefs(OtherMI: I);
6665	constrainSelectedInstRegOperands(I&: *Memset, TII, TRI, RBI);
6666	break;
6667	}
6668	}
6669
6670	I.eraseFromParent();
6671	return true;
6672	}
6673
6674	bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
6675	MachineRegisterInfo &MRI) {
6676	unsigned IntrinID = cast<GIntrinsic>(Val&: I).getIntrinsicID();
6677
6678	switch (IntrinID) {
6679	default:
6680	break;
6681	case Intrinsic::aarch64_crypto_sha1h: {
6682	Register DstReg = I.getOperand(i: `0`).getReg();
6683	Register SrcReg = I.getOperand(i: `2`).getReg();
6684
6685	// FIXME: Should this be an assert?
6686	if (MRI.getType(Reg: DstReg).getSizeInBits() != `32` \|\|
6687	MRI.getType(Reg: SrcReg).getSizeInBits() != `32`)
6688	return false;
6689
6690	// The operation has to happen on FPRs. Set up some new FPR registers for
6691	// the source and destination if they are on GPRs.
6692	if (RBI.getRegBank(Reg: SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
6693	SrcReg = MRI.createVirtualRegister(RegClass: &AArch64::FPR32RegClass);
6694	MIB.buildCopy(Res: {SrcReg}, Op: {I.getOperand(i: `2`)});
6695
6696	// Make sure the copy ends up getting constrained properly.
6697	RBI.constrainGenericRegister(Reg: I.getOperand(i: `2`).getReg(),
6698	RC: AArch64::GPR32RegClass, MRI);
6699	}
6700
6701	if (RBI.getRegBank(Reg: DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID)
6702	DstReg = MRI.createVirtualRegister(RegClass: &AArch64::FPR32RegClass);
6703
6704	// Actually insert the instruction.
6705	auto SHA1Inst = MIB.buildInstr(Opc: AArch64::SHA1Hrr, DstOps: {DstReg}, SrcOps: {SrcReg});
6706	constrainSelectedInstRegOperands(I&: *SHA1Inst, TII, TRI, RBI);
6707
6708	// Did we create a new register for the destination?
6709	if (DstReg != I.getOperand(i: `0`).getReg()) {
6710	// Yep. Copy the result of the instruction back into the original
6711	// destination.
6712	MIB.buildCopy(Res: {I.getOperand(i: `0`)}, Op: {DstReg});
6713	RBI.constrainGenericRegister(Reg: I.getOperand(i: `0`).getReg(),
6714	RC: AArch64::GPR32RegClass, MRI);
6715	}
6716
6717	I.eraseFromParent();
6718	return true;
6719	}
6720	case Intrinsic::ptrauth_resign: {
6721	Register DstReg = I.getOperand(i: `0`).getReg();
6722	Register ValReg = I.getOperand(i: `2`).getReg();
6723	uint64_t AUTKey = I.getOperand(i: `3`).getImm();
6724	Register AUTDisc = I.getOperand(i: `4`).getReg();
6725	uint64_t PACKey = I.getOperand(i: `5`).getImm();
6726	Register PACDisc = I.getOperand(i: `6`).getReg();
6727
6728	Register AUTAddrDisc = AUTDisc;
6729	uint16_t AUTConstDiscC = `0`;
6730	std::tie(args&: AUTConstDiscC, args&: AUTAddrDisc) =
6731	extractPtrauthBlendDiscriminators(Disc: AUTDisc, MRI);
6732
6733	Register PACAddrDisc = PACDisc;
6734	uint16_t PACConstDiscC = `0`;
6735	std::tie(args&: PACConstDiscC, args&: PACAddrDisc) =
6736	extractPtrauthBlendDiscriminators(Disc: PACDisc, MRI);
6737
6738	MIB.buildCopy(Res: {AArch64::X16}, Op: {ValReg});
6739	MIB.buildInstr(Opc: TargetOpcode::IMPLICIT_DEF, DstOps: {AArch64::X17}, SrcOps: {});
6740	MIB.buildInstr(Opcode: AArch64::AUTPAC)
6741	.addImm(Val: AUTKey)
6742	.addImm(Val: AUTConstDiscC)
6743	.addUse(RegNo: AUTAddrDisc)
6744	.addImm(Val: PACKey)
6745	.addImm(Val: PACConstDiscC)
6746	.addUse(RegNo: PACAddrDisc)
6747	.constrainAllUses(TII, TRI, RBI);
6748	MIB.buildCopy(Res: {DstReg}, Op: Register (AArch64::X16));
6749
6750	RBI.constrainGenericRegister(Reg: DstReg, RC: AArch64::GPR64RegClass, MRI);
6751	I.eraseFromParent();
6752	return true;
6753	}
6754	case Intrinsic::ptrauth_auth: {
6755	Register DstReg = I.getOperand(i: `0`).getReg();
6756	Register ValReg = I.getOperand(i: `2`).getReg();
6757	uint64_t AUTKey = I.getOperand(i: `3`).getImm();
6758	Register AUTDisc = I.getOperand(i: `4`).getReg();
6759
6760	Register AUTAddrDisc = AUTDisc;
6761	uint16_t AUTConstDiscC = `0`;
6762	std::tie(args&: AUTConstDiscC, args&: AUTAddrDisc) =
6763	extractPtrauthBlendDiscriminators(Disc: AUTDisc, MRI);
6764
6765	MIB.buildCopy(Res: {AArch64::X16}, Op: {ValReg});
6766	MIB.buildInstr(Opc: TargetOpcode::IMPLICIT_DEF, DstOps: {AArch64::X17}, SrcOps: {});
6767	MIB.buildInstr(Opcode: AArch64::AUT)
6768	.addImm(Val: AUTKey)
6769	.addImm(Val: AUTConstDiscC)
6770	.addUse(RegNo: AUTAddrDisc)
6771	.constrainAllUses(TII, TRI, RBI);
6772	MIB.buildCopy(Res: {DstReg}, Op: Register (AArch64::X16));
6773
6774	RBI.constrainGenericRegister(Reg: DstReg, RC: AArch64::GPR64RegClass, MRI);
6775	I.eraseFromParent();
6776	return true;
6777	}
6778	case Intrinsic::frameaddress:
6779	case Intrinsic::returnaddress: {
6780	MachineFunction &MF = *I.getParent()->getParent();
6781	MachineFrameInfo &MFI = MF.getFrameInfo();
6782
6783	unsigned Depth = I.getOperand(i: `2`).getImm();
6784	Register DstReg = I.getOperand(i: `0`).getReg();
6785	RBI.constrainGenericRegister(Reg: DstReg, RC: AArch64::GPR64RegClass, MRI);
6786
6787	if (Depth == `0` && IntrinID == Intrinsic::returnaddress) {
6788	if (!MFReturnAddr) {
6789	// Insert the copy from LR/X30 into the entry block, before it can be
6790	// clobbered by anything.
6791	MFI.setReturnAddressIsTaken(true);
6792	MFReturnAddr = getFunctionLiveInPhysReg(
6793	MF, TII, PhysReg: AArch64::LR, RC: AArch64::GPR64RegClass, DL: I.getDebugLoc());
6794	}
6795
6796	if (STI.hasPAuth()) {
6797	MIB.buildInstr(Opc: AArch64::XPACI, DstOps: {DstReg}, SrcOps: {MFReturnAddr});
6798	} else {
6799	MIB.buildCopy(Res: {Register (AArch64::LR)}, Op: {MFReturnAddr});
6800	MIB.buildInstr(Opcode: AArch64::XPACLRI);
6801	MIB.buildCopy(Res: {DstReg}, Op: {Register (AArch64::LR)});
6802	}
6803
6804	I.eraseFromParent();
6805	return true;
6806	}
6807
6808	MFI.setFrameAddressIsTaken(true);
6809	Register FrameAddr(AArch64::FP);
6810	while (Depth--) {
6811	Register NextFrame = MRI.createVirtualRegister(RegClass: &AArch64::GPR64spRegClass);
6812	auto Ldr =
6813	MIB.buildInstr(Opc: AArch64::LDRXui, DstOps: {NextFrame}, SrcOps: {FrameAddr}).addImm(Val: `0`);
6814	constrainSelectedInstRegOperands(I&: *Ldr, TII, TRI, RBI);
6815	FrameAddr = NextFrame;
6816	}
6817
6818	if (IntrinID == Intrinsic::frameaddress)
6819	MIB.buildCopy(Res: {DstReg}, Op: {FrameAddr});
6820	else {
6821	MFI.setReturnAddressIsTaken(true);
6822
6823	if (STI.hasPAuth()) {
6824	Register TmpReg = MRI.createVirtualRegister(RegClass: &AArch64::GPR64RegClass);
6825	MIB.buildInstr(Opc: AArch64::LDRXui, DstOps: {TmpReg}, SrcOps: {FrameAddr}).addImm(Val: `1`);
6826	MIB.buildInstr(Opc: AArch64::XPACI, DstOps: {DstReg}, SrcOps: {TmpReg});
6827	} else {
6828	MIB.buildInstr(Opc: AArch64::LDRXui, DstOps: {Register (AArch64::LR)}, SrcOps: {FrameAddr})
6829	.addImm(Val: `1`);
6830	MIB.buildInstr(Opcode: AArch64::XPACLRI);
6831	MIB.buildCopy(Res: {DstReg}, Op: {Register (AArch64::LR)});
6832	}
6833	}
6834
6835	I.eraseFromParent();
6836	return true;
6837	}
6838	case Intrinsic::aarch64_neon_tbl2:
6839	SelectTable(I, MRI, NumVecs: `2`, Opc1: AArch64::TBLv8i8Two, Opc2: AArch64::TBLv16i8Two, isExt: false);
6840	return true;
6841	case Intrinsic::aarch64_neon_tbl3:
6842	SelectTable(I, MRI, NumVecs: `3`, Opc1: AArch64::TBLv8i8Three, Opc2: AArch64::TBLv16i8Three,
6843	isExt: false);
6844	return true;
6845	case Intrinsic::aarch64_neon_tbl4:
6846	SelectTable(I, MRI, NumVecs: `4`, Opc1: AArch64::TBLv8i8Four, Opc2: AArch64::TBLv16i8Four, isExt: false);
6847	return true;
6848	case Intrinsic::aarch64_neon_tbx2:
6849	SelectTable(I, MRI, NumVecs: `2`, Opc1: AArch64::TBXv8i8Two, Opc2: AArch64::TBXv16i8Two, isExt: true);
6850	return true;
6851	case Intrinsic::aarch64_neon_tbx3:
6852	SelectTable(I, MRI, NumVecs: `3`, Opc1: AArch64::TBXv8i8Three, Opc2: AArch64::TBXv16i8Three, isExt: true);
6853	return true;
6854	case Intrinsic::aarch64_neon_tbx4:
6855	SelectTable(I, MRI, NumVecs: `4`, Opc1: AArch64::TBXv8i8Four, Opc2: AArch64::TBXv16i8Four, isExt: true);
6856	return true;
6857	case Intrinsic::swift_async_context_addr:
6858	auto Sub = MIB.buildInstr(Opc: AArch64::SUBXri, DstOps: {I.getOperand(i: `0`).getReg()},
6859	SrcOps: {Register (AArch64::FP)})
6860	.addImm(Val: `8`)
6861	.addImm(Val: `0`);
6862	constrainSelectedInstRegOperands(I&: *Sub, TII, TRI, RBI);
6863
6864	MF->getFrameInfo().setFrameAddressIsTaken(true);
6865	MF->getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
6866	I.eraseFromParent();
6867	return true;
6868	}
6869	return false;
6870	}
6871
6872	// G_PTRAUTH_GLOBAL_VALUE lowering
6873	//
6874	// We have 3 lowering alternatives to choose from:
6875	// - MOVaddrPAC: similar to MOVaddr, with added PAC.
6876	// If the GV doesn't need a GOT load (i.e., is locally defined)
6877	// materialize the pointer using adrp+add+pac. See LowerMOVaddrPAC.
6878	//
6879	// - LOADgotPAC: similar to LOADgot, with added PAC.
6880	// If the GV needs a GOT load, materialize the pointer using the usual
6881	// GOT adrp+ldr, +pac. Pointers in GOT are assumed to be not signed, the GOT
6882	// section is assumed to be read-only (for example, via relro mechanism). See
6883	// LowerMOVaddrPAC.
6884	//
6885	// - LOADauthptrstatic: similar to LOADgot, but use a
6886	// special stub slot instead of a GOT slot.
6887	// Load a signed pointer for symbol 'sym' from a stub slot named
6888	// 'sym$auth_ptr$key$disc' filled by dynamic linker during relocation
6889	// resolving. This usually lowers to adrp+ldr, but also emits an entry into
6890	// .data with an
6891	// @AUTH relocation. See LowerLOADauthptrstatic.
6892	//
6893	// All 3 are pseudos that are expand late to longer sequences: this lets us
6894	// provide integrity guarantees on the to-be-signed intermediate values.
6895	//
6896	// LOADauthptrstatic is undesirable because it requires a large section filled
6897	// with often similarly-signed pointers, making it a good harvesting target.
6898	// Thus, it's only used for ptrauth references to extern_weak to avoid null
6899	// checks.
6900
6901	bool AArch64InstructionSelector::selectPtrAuthGlobalValue(
6902	MachineInstr &I, MachineRegisterInfo &MRI) const {
6903	Register DefReg = I.getOperand(i: `0`).getReg();
6904	Register Addr = I.getOperand(i: `1`).getReg();
6905	uint64_t Key = I.getOperand(i: `2`).getImm();
6906	Register AddrDisc = I.getOperand(i: `3`).getReg();
6907	uint64_t Disc = I.getOperand(i: `4`).getImm();
6908	int64_t Offset = `0`;
6909
6910	if (Key > AArch64PACKey::LAST)
6911	report_fatal_error(reason: "key in ptrauth global out of range [0, " +
6912	Twine ((int)AArch64PACKey::LAST) + "]");
6913
6914	// Blend only works if the integer discriminator is 16-bit wide.
6915	if (!isUInt<`16`>(x: Disc))
6916	report_fatal_error(
6917	reason: "constant discriminator in ptrauth global out of range [0, 0xffff]");
6918
6919	// Choosing between 3 lowering alternatives is target-specific.
6920	if (!STI.isTargetELF() && !STI.isTargetMachO())
6921	report_fatal_error(reason: "ptrauth global lowering only supported on MachO/ELF");
6922
6923	if (!MRI.hasOneDef(RegNo: Addr))
6924	return false;
6925
6926	// First match any offset we take from the real global.
6927	const MachineInstr DefMI = &MRI.def_instr_begin(RegNo: Addr);
6928	if (DefMI->getOpcode() == TargetOpcode::G_PTR_ADD) {
6929	Register OffsetReg = DefMI->getOperand(i: `2`).getReg();
6930	if (!MRI.hasOneDef(RegNo: OffsetReg))
6931	return false;
6932	const MachineInstr &OffsetMI = *MRI.def_instr_begin(RegNo: OffsetReg);
6933	if (OffsetMI.getOpcode() != TargetOpcode::G_CONSTANT)
6934	return false;
6935
6936	Addr = DefMI->getOperand(i: `1`).getReg();
6937	if (!MRI.hasOneDef(RegNo: Addr))
6938	return false;
6939
6940	DefMI = &*MRI.def_instr_begin(RegNo: Addr);
6941	Offset = OffsetMI.getOperand(i: `1`).getCImm()->getSExtValue();
6942	}
6943
6944	// We should be left with a genuine unauthenticated GlobalValue.
6945	const GlobalValue *GV;
6946	if (DefMI->getOpcode() == TargetOpcode::G_GLOBAL_VALUE) {
6947	GV = DefMI->getOperand(i: `1`).getGlobal();
6948	Offset += DefMI->getOperand(i: `1`).getOffset();
6949	} else if (DefMI->getOpcode() == AArch64::G_ADD_LOW) {
6950	GV = DefMI->getOperand(i: `2`).getGlobal();
6951	Offset += DefMI->getOperand(i: `2`).getOffset();
6952	} else {
6953	return false;
6954	}
6955
6956	MachineIRBuilder MIB(I);
6957
6958	// Classify the reference to determine whether it needs a GOT load.
6959	unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM);
6960	const bool NeedsGOTLoad = ((OpFlags & AArch64II::MO_GOT) != `0`);
6961	assert(((OpFlags & (~AArch64II::MO_GOT)) == `0`) &&
6962	"unsupported non-GOT op flags on ptrauth global reference");
6963	assert((!GV->hasExternalWeakLinkage() \|\| NeedsGOTLoad) &&
6964	"unsupported non-GOT reference to weak ptrauth global");
6965
6966	std::optional<APInt> AddrDiscVal = getIConstantVRegVal(VReg: AddrDisc, MRI);
6967	bool HasAddrDisc = !AddrDiscVal \|\| *AddrDiscVal != `0`;
6968
6969	// Non-extern_weak:
6970	// - No GOT load needed -> MOVaddrPAC
6971	// - GOT load for non-extern_weak -> LOADgotPAC
6972	// Note that we disallow extern_weak refs to avoid null checks later.
6973	if (!GV->hasExternalWeakLinkage()) {
6974	MIB.buildInstr(Opc: TargetOpcode::IMPLICIT_DEF, DstOps: {AArch64::X16}, SrcOps: {});
6975	MIB.buildInstr(Opc: TargetOpcode::IMPLICIT_DEF, DstOps: {AArch64::X17}, SrcOps: {});
6976	MIB.buildInstr(Opcode: NeedsGOTLoad ? AArch64::LOADgotPAC : AArch64::MOVaddrPAC)
6977	.addGlobalAddress(GV, Offset)
6978	.addImm(Val: Key)
6979	.addReg(RegNo: HasAddrDisc ? AddrDisc : AArch64::XZR)
6980	.addImm(Val: Disc)
6981	.constrainAllUses(TII, TRI, RBI);
6982	MIB.buildCopy(Res: DefReg, Op: Register (AArch64::X16));
6983	RBI.constrainGenericRegister(Reg: DefReg, RC: AArch64::GPR64RegClass, MRI);
6984	I.eraseFromParent();
6985	return true;
6986	}
6987
6988	// extern_weak -> LOADauthptrstatic
6989
6990	// Offsets and extern_weak don't mix well: ptrauth aside, you'd get the
6991	// offset alone as a pointer if the symbol wasn't available, which would
6992	// probably break null checks in users. Ptrauth complicates things further:
6993	// error out.
6994	if (Offset != `0`)
6995	report_fatal_error(
6996	reason: "unsupported non-zero offset in weak ptrauth global reference");
6997
6998	if (HasAddrDisc)
6999	report_fatal_error(reason: "unsupported weak addr-div ptrauth global");
7000
7001	MIB.buildInstr(Opc: AArch64::LOADauthptrstatic, DstOps: {DefReg}, SrcOps: {})
7002	.addGlobalAddress(GV, Offset)
7003	.addImm(Val: Key)
7004	.addImm(Val: Disc);
7005	RBI.constrainGenericRegister(Reg: DefReg, RC: AArch64::GPR64RegClass, MRI);
7006
7007	I.eraseFromParent();
7008	return true;
7009	}
7010
7011	void AArch64InstructionSelector::SelectTable(MachineInstr &I,
7012	MachineRegisterInfo &MRI,
7013	unsigned NumVec, unsigned Opc1,
7014	unsigned Opc2, bool isExt) {
7015	Register DstReg = I.getOperand(i: `0`).getReg();
7016	unsigned Opc = MRI.getType(Reg: DstReg) == LLT::fixed_vector(NumElements: `8`, ScalarSizeInBits: `8`) ? Opc1 : Opc2;
7017
7018	// Create the REG_SEQUENCE
7019	SmallVector<Register, `4`> Regs;
7020	for (unsigned i = `0`; i < NumVec; i++)
7021	Regs.push_back(Elt: I.getOperand(i: i + `2` + isExt).getReg());
7022	Register RegSeq = createQTuple(Regs, MIB);
7023
7024	Register IdxReg = I.getOperand(i: `2` + NumVec + isExt).getReg();
7025	MachineInstrBuilder Instr;
7026	if (isExt) {
7027	Register Reg = I.getOperand(i: `2`).getReg();
7028	Instr = MIB.buildInstr(Opc, DstOps: {DstReg}, SrcOps: {Reg, RegSeq, IdxReg});
7029	} else
7030	Instr = MIB.buildInstr(Opc, DstOps: {DstReg}, SrcOps: {RegSeq, IdxReg});
7031	constrainSelectedInstRegOperands(I&: *Instr, TII, TRI, RBI);
7032	I.eraseFromParent();
7033	}
7034
7035	InstructionSelector::ComplexRendererFns
7036	AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const {
7037	auto MaybeImmed = getImmedFromMO(Root);
7038	if (MaybeImmed == std::nullopt \|\| *MaybeImmed > `31`)
7039	return std::nullopt;
7040	uint64_t Enc = (`32` - *MaybeImmed) & `0x1f`;
7041	return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Val: Enc); }}};
7042	}
7043
7044	InstructionSelector::ComplexRendererFns
7045	AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const {
7046	auto MaybeImmed = getImmedFromMO(Root);
7047	if (MaybeImmed == std::nullopt \|\| *MaybeImmed > `31`)
7048	return std::nullopt;
7049	uint64_t Enc = `31` - *MaybeImmed;
7050	return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Val: Enc); }}};
7051	}
7052
7053	InstructionSelector::ComplexRendererFns
7054	AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const {
7055	auto MaybeImmed = getImmedFromMO(Root);
7056	if (MaybeImmed == std::nullopt \|\| *MaybeImmed > `63`)
7057	return std::nullopt;
7058	uint64_t Enc = (`64` - *MaybeImmed) & `0x3f`;
7059	return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Val: Enc); }}};
7060	}
7061
7062	InstructionSelector::ComplexRendererFns
7063	AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const {
7064	auto MaybeImmed = getImmedFromMO(Root);
7065	if (MaybeImmed == std::nullopt \|\| *MaybeImmed > `63`)
7066	return std::nullopt;
7067	uint64_t Enc = `63` - *MaybeImmed;
7068	return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Val: Enc); }}};
7069	}
7070
7071	/// Helper to select an immediate value that can be represented as a 12-bit
7072	/// value shifted left by either 0 or 12. If it is possible to do so, return
7073	/// the immediate and shift value. If not, return std::nullopt.
7074	///
7075	/// Used by selectArithImmed and selectNegArithImmed.
7076	InstructionSelector::ComplexRendererFns
7077	AArch64InstructionSelector::select12BitValueWithLeftShift(
7078	uint64_t Immed) const {
7079	unsigned ShiftAmt;
7080	if (Immed >> `12` == `0`) {
7081	ShiftAmt = `0`;
7082	} else if ((Immed & `0xfff`) == `0` && Immed >> `24` == `0`) {
7083	ShiftAmt = `12`;
7084	Immed = Immed >> `12`;
7085	} else
7086	return std::nullopt;
7087
7088	unsigned ShVal = AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: ShiftAmt);
7089	return {{
7090	[=](MachineInstrBuilder &MIB) { MIB.addImm(Val: Immed); },
7091	[=](MachineInstrBuilder &MIB) { MIB.addImm(Val: ShVal); },
7092	}};
7093	}
7094
7095	/// SelectArithImmed - Select an immediate value that can be represented as
7096	/// a 12-bit value shifted left by either 0 or 12. If so, return true with
7097	/// Val set to the 12-bit value and Shift set to the shifter operand.
7098	InstructionSelector::ComplexRendererFns
7099	AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
7100	// This function is called from the addsub_shifted_imm ComplexPattern,
7101	// which lists [imm] as the list of opcode it's interested in, however
7102	// we still need to check whether the operand is actually an immediate
7103	// here because the ComplexPattern opcode list is only used in
7104	// root-level opcode matching.
7105	auto MaybeImmed = getImmedFromMO(Root);
7106	if (MaybeImmed == std::nullopt)
7107	return std::nullopt;
7108	return select12BitValueWithLeftShift(Immed: *MaybeImmed);
7109	}
7110
7111	/// SelectNegArithImmed - As above, but negates the value before trying to
7112	/// select it.
7113	InstructionSelector::ComplexRendererFns
7114	AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const {
7115	// We need a register here, because we need to know if we have a 64 or 32
7116	// bit immediate.
7117	if (!Root.isReg())
7118	return std::nullopt;
7119	auto MaybeImmed = getImmedFromMO(Root);
7120	if (MaybeImmed == std::nullopt)
7121	return std::nullopt;
7122	uint64_t Immed = *MaybeImmed;
7123
7124	// This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
7125	// have the opposite effect on the C flag, so this pattern mustn't match under
7126	// those circumstances.
7127	if (Immed == `0`)
7128	return std::nullopt;
7129
7130	// Check if we're dealing with a 32-bit type on the root or a 64-bit type on
7131	// the root.
7132	MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7133	if (MRI.getType(Reg: Root.getReg()).getSizeInBits() == `32`)
7134	Immed = ~((uint32_t)Immed) + `1`;
7135	else
7136	Immed = ~Immed + `1ULL`;
7137
7138	if (Immed & `0xFFFFFFFFFF000000ULL`)
7139	return std::nullopt;
7140
7141	Immed &= `0xFFFFFFULL`;
7142	return select12BitValueWithLeftShift(Immed);
7143	}
7144
7145	/// Checks if we are sure that folding MI into load/store addressing mode is
7146	/// beneficial or not.
7147	///
7148	/// Returns:
7149	/// - true if folding MI would be beneficial.
7150	/// - false if folding MI would be bad.
7151	/// - std::nullopt if it is not sure whether folding MI is beneficial.
7152	///
7153	/// \p MI can be the offset operand of G_PTR_ADD, e.g. G_SHL in the example:
7154	///
7155	/// %13:gpr(s64) = G_CONSTANT i64 1
7156	/// %8:gpr(s64) = G_SHL %6, %13(s64)
7157	/// %9:gpr(p0) = G_PTR_ADD %0, %8(s64)
7158	/// %12:gpr(s32) = G_LOAD %9(p0) :: (load (s16))
7159	std::optional<bool> AArch64InstructionSelector::isWorthFoldingIntoAddrMode(
7160	MachineInstr &MI, const MachineRegisterInfo &MRI) const {
7161	if (MI.getOpcode() == AArch64::G_SHL) {
7162	// Address operands with shifts are free, except for running on subtargets
7163	// with AddrLSLSlow14.
7164	if (const auto ValAndVeg = getIConstantVRegValWithLookThrough(
7165	VReg: MI.getOperand(i: `2`).getReg(), MRI)) {
7166	const APInt ShiftVal = ValAndVeg ->Value;
7167
7168	// Don't fold if we know this will be slow.
7169	return !(STI.hasAddrLSLSlow14() && (ShiftVal == `1` \|\| ShiftVal == `4`));
7170	}
7171	}
7172	return std::nullopt;
7173	}
7174
7175	/// Return true if it is worth folding MI into an extended register. That is,
7176	/// if it's safe to pull it into the addressing mode of a load or store as a
7177	/// shift.
7178	/// \p IsAddrOperand whether the def of MI is used as an address operand
7179	/// (e.g. feeding into an LDR/STR).
7180	bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
7181	MachineInstr &MI, const MachineRegisterInfo &MRI,
7182	bool IsAddrOperand) const {
7183
7184	// Always fold if there is one use, or if we're optimizing for size.
7185	Register DefReg = MI.getOperand(i: `0`).getReg();
7186	if (MRI.hasOneNonDBGUse(RegNo: DefReg) \|\|
7187	MI.getParent()->getParent()->getFunction().hasOptSize())
7188	return true;
7189
7190	if (IsAddrOperand) {
7191	// If we are already sure that folding MI is good or bad, return the result.
7192	if (const auto Worth = isWorthFoldingIntoAddrMode(MI, MRI))
7193	return *Worth;
7194
7195	// Fold G_PTR_ADD if its offset operand can be folded
7196	if (MI.getOpcode() == AArch64::G_PTR_ADD) {
7197	MachineInstr *OffsetInst =
7198	getDefIgnoringCopies(Reg: MI.getOperand(i: `2`).getReg(), MRI);
7199
7200	// Note, we already know G_PTR_ADD is used by at least two instructions.
7201	// If we are also sure about whether folding is beneficial or not,
7202	// return the result.
7203	if (const auto Worth = isWorthFoldingIntoAddrMode(MI&: *OffsetInst, MRI))
7204	return *Worth;
7205	}
7206	}
7207
7208	// FIXME: Consider checking HasALULSLFast as appropriate.
7209
7210	// We have a fastpath, so folding a shift in and potentially computing it
7211	// many times may be beneficial. Check if this is only used in memory ops.
7212	// If it is, then we should fold.
7213	return all_of(Range: MRI.use_nodbg_instructions(Reg: DefReg),
7214	P: [](MachineInstr &Use) { return Use.mayLoadOrStore(); });
7215	}
7216
7217	static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type) {
7218	switch (Type) {
7219	case AArch64_AM::SXTB:
7220	case AArch64_AM::SXTH:
7221	case AArch64_AM::SXTW:
7222	return true;
7223	default:
7224	return false;
7225	}
7226	}
7227
7228	InstructionSelector::ComplexRendererFns
7229	AArch64InstructionSelector::selectExtendedSHL(
7230	MachineOperand &Root, MachineOperand &Base, MachineOperand &Offset,
7231	unsigned SizeInBytes, bool WantsExt) const {
7232	assert(Base.isReg() && "Expected base to be a register operand");
7233	assert(Offset.isReg() && "Expected offset to be a register operand");
7234
7235	MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7236	MachineInstr *OffsetInst = MRI.getVRegDef(Reg: Offset.getReg());
7237
7238	unsigned OffsetOpc = OffsetInst->getOpcode();
7239	bool LookedThroughZExt = false;
7240	if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) {
7241	// Try to look through a ZEXT.
7242	if (OffsetOpc != TargetOpcode::G_ZEXT \|\| !WantsExt)
7243	return std::nullopt;
7244
7245	OffsetInst = MRI.getVRegDef(Reg: OffsetInst->getOperand(i: `1`).getReg());
7246	OffsetOpc = OffsetInst->getOpcode();
7247	LookedThroughZExt = true;
7248
7249	if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
7250	return std::nullopt;
7251	}
7252	// Make sure that the memory op is a valid size.
7253	int64_t LegalShiftVal = Log2_32(Value: SizeInBytes);
7254	if (LegalShiftVal == `0`)
7255	return std::nullopt;
7256	if (!isWorthFoldingIntoExtendedReg(MI&: OffsetInst, MRI, IsAddrOperand: true*))
7257	return std::nullopt;
7258
7259	// Now, try to find the specific G_CONSTANT. Start by assuming that the
7260	// register we will offset is the LHS, and the register containing the
7261	// constant is the RHS.
7262	Register OffsetReg = OffsetInst->getOperand(i: `1`).getReg();
7263	Register ConstantReg = OffsetInst->getOperand(i: `2`).getReg();
7264	auto ValAndVReg = getIConstantVRegValWithLookThrough(VReg: ConstantReg, MRI);
7265	if (!ValAndVReg) {
7266	// We didn't get a constant on the RHS. If the opcode is a shift, then
7267	// we're done.
7268	if (OffsetOpc == TargetOpcode::G_SHL)
7269	return std::nullopt;
7270
7271	// If we have a G_MUL, we can use either register. Try looking at the RHS.
7272	std::swap(a&: OffsetReg, b&: ConstantReg);
7273	ValAndVReg = getIConstantVRegValWithLookThrough(VReg: ConstantReg, MRI);
7274	if (!ValAndVReg)
7275	return std::nullopt;
7276	}
7277
7278	// The value must fit into 3 bits, and must be positive. Make sure that is
7279	// true.
7280	int64_t ImmVal = ValAndVReg ->Value.getSExtValue();
7281
7282	// Since we're going to pull this into a shift, the constant value must be
7283	// a power of 2. If we got a multiply, then we need to check this.
7284	if (OffsetOpc == TargetOpcode::G_MUL) {
7285	if (!llvm::has_single_bit<uint32_t>(Value: ImmVal))
7286	return std::nullopt;
7287
7288	// Got a power of 2. So, the amount we'll shift is the log base-2 of that.
7289	ImmVal = Log2_32(Value: ImmVal);
7290	}
7291
7292	if ((ImmVal & `0x7`) != ImmVal)
7293	return std::nullopt;
7294
7295	// We are only allowed to shift by LegalShiftVal. This shift value is built
7296	// into the instruction, so we can't just use whatever we want.
7297	if (ImmVal != LegalShiftVal)
7298	return std::nullopt;
7299
7300	unsigned SignExtend = `0`;
7301	if (WantsExt) {
7302	// Check if the offset is defined by an extend, unless we looked through a
7303	// G_ZEXT earlier.
7304	if (!LookedThroughZExt) {
7305	MachineInstr *ExtInst = getDefIgnoringCopies(Reg: OffsetReg, MRI);
7306	auto Ext = getExtendTypeForInst(MI&: ExtInst, MRI, IsLoadStore: true*);
7307	if (Ext == AArch64_AM::InvalidShiftExtend)
7308	return std::nullopt;
7309
7310	SignExtend = isSignExtendShiftType(Type: Ext) ? `1` : `0`;
7311	// We only support SXTW for signed extension here.
7312	if (SignExtend && Ext != AArch64_AM::SXTW)
7313	return std::nullopt;
7314	OffsetReg = ExtInst->getOperand(i: `1`).getReg();
7315	}
7316
7317	// Need a 32-bit wide register here.
7318	MachineIRBuilder MIB(*MRI.getVRegDef(Reg: Root.getReg()));
7319	OffsetReg = moveScalarRegClass(Reg: OffsetReg, RC: AArch64::GPR32RegClass, MIB);
7320	}
7321
7322	// We can use the LHS of the GEP as the base, and the LHS of the shift as an
7323	// offset. Signify that we are shifting by setting the shift flag to 1.
7324	return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: Base.getReg()); },
7325	[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: OffsetReg); },
7326	[=](MachineInstrBuilder &MIB) {
7327	// Need to add both immediates here to make sure that they are both
7328	// added to the instruction.
7329	MIB.addImm(Val: SignExtend);
7330	MIB.addImm(Val: `1`);
7331	}}};
7332	}
7333
7334	/// This is used for computing addresses like this:
7335	///
7336	/// ldr x1, [x2, x3, lsl #3]
7337	///
7338	/// Where x2 is the base register, and x3 is an offset register. The shift-left
7339	/// is a constant value specific to this load instruction. That is, we'll never
7340	/// see anything other than a 3 here (which corresponds to the size of the
7341	/// element being loaded.)
7342	InstructionSelector::ComplexRendererFns
7343	AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
7344	MachineOperand &Root, unsigned SizeInBytes) const {
7345	if (!Root.isReg())
7346	return std::nullopt;
7347	MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7348
7349	// We want to find something like this:
7350	//
7351	// val = G_CONSTANT LegalShiftVal
7352	// shift = G_SHL off_reg val
7353	// ptr = G_PTR_ADD base_reg shift
7354	// x = G_LOAD ptr
7355	//
7356	// And fold it into this addressing mode:
7357	//
7358	// ldr x, [base_reg, off_reg, lsl #LegalShiftVal]
7359
7360	// Check if we can find the G_PTR_ADD.
7361	MachineInstr *PtrAdd =
7362	getOpcodeDef(Opcode: TargetOpcode::G_PTR_ADD, Reg: Root.getReg(), MRI);
7363	if (!PtrAdd \|\| !isWorthFoldingIntoExtendedReg(MI&: PtrAdd, MRI, IsAddrOperand: true*))
7364	return std::nullopt;
7365
7366	// Now, try to match an opcode which will match our specific offset.
7367	// We want a G_SHL or a G_MUL.
7368	MachineInstr *OffsetInst =
7369	getDefIgnoringCopies(Reg: PtrAdd->getOperand(i: `2`).getReg(), MRI);
7370	return selectExtendedSHL(Root, Base&: PtrAdd->getOperand(i: `1`),
7371	Offset&: OffsetInst->getOperand(i: `0`), SizeInBytes,
7372	/WantsExt=/false);
7373	}
7374
7375	/// This is used for computing addresses like this:
7376	///
7377	/// ldr x1, [x2, x3]
7378	///
7379	/// Where x2 is the base register, and x3 is an offset register.
7380	///
7381	/// When possible (or profitable) to fold a G_PTR_ADD into the address
7382	/// calculation, this will do so. Otherwise, it will return std::nullopt.
7383	InstructionSelector::ComplexRendererFns
7384	AArch64InstructionSelector::selectAddrModeRegisterOffset(
7385	MachineOperand &Root) const {
7386	MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7387
7388	// We need a GEP.
7389	MachineInstr *Gep = MRI.getVRegDef(Reg: Root.getReg());
7390	if (Gep->getOpcode() != TargetOpcode::G_PTR_ADD)
7391	return std::nullopt;
7392
7393	// If this is used more than once, let's not bother folding.
7394	// TODO: Check if they are memory ops. If they are, then we can still fold
7395	// without having to recompute anything.
7396	if (!MRI.hasOneNonDBGUse(RegNo: Gep->getOperand(i: `0`).getReg()))
7397	return std::nullopt;
7398
7399	// Base is the GEP's LHS, offset is its RHS.
7400	return {{[=](MachineInstrBuilder &MIB) {
7401	MIB.addUse(RegNo: Gep->getOperand(i: `1`).getReg());
7402	},
7403	[=](MachineInstrBuilder &MIB) {
7404	MIB.addUse(RegNo: Gep->getOperand(i: `2`).getReg());
7405	},
7406	[=](MachineInstrBuilder &MIB) {
7407	// Need to add both immediates here to make sure that they are both
7408	// added to the instruction.
7409	MIB.addImm(Val: `0`);
7410	MIB.addImm(Val: `0`);
7411	}}};
7412	}
7413
7414	/// This is intended to be equivalent to selectAddrModeXRO in
7415	/// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads.
7416	InstructionSelector::ComplexRendererFns
7417	AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
7418	unsigned SizeInBytes) const {
7419	MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7420	if (!Root.isReg())
7421	return std::nullopt;
7422	MachineInstr *PtrAdd =
7423	getOpcodeDef(Opcode: TargetOpcode::G_PTR_ADD, Reg: Root.getReg(), MRI);
7424	if (!PtrAdd)
7425	return std::nullopt;
7426
7427	// Check for an immediates which cannot be encoded in the [base + imm]
7428	// addressing mode, and can't be encoded in an add/sub. If this happens, we'll
7429	// end up with code like:
7430	//
7431	// mov x0, wide
7432	// add x1 base, x0
7433	// ldr x2, [x1, x0]
7434	//
7435	// In this situation, we can use the [base, xreg] addressing mode to save an
7436	// add/sub:
7437	//
7438	// mov x0, wide
7439	// ldr x2, [base, x0]
7440	auto ValAndVReg =
7441	getIConstantVRegValWithLookThrough(VReg: PtrAdd->getOperand(i: `2`).getReg(), MRI);
7442	if (ValAndVReg) {
7443	unsigned Scale = Log2_32(Value: SizeInBytes);
7444	int64_t ImmOff = ValAndVReg ->Value.getSExtValue();
7445
7446	// Skip immediates that can be selected in the load/store addressing
7447	// mode.
7448	if (ImmOff % SizeInBytes == `0` && ImmOff >= `0` &&
7449	ImmOff < (`0x1000` << Scale))
7450	return std::nullopt;
7451
7452	// Helper lambda to decide whether or not it is preferable to emit an add.
7453	auto isPreferredADD = [](int64_t ImmOff) {
7454	// Constants in [0x0, 0xfff] can be encoded in an add.
7455	if ((ImmOff & `0xfffffffffffff000LL`) == `0x0LL`)
7456	return true;
7457
7458	// Can it be encoded in an add lsl #12?
7459	if ((ImmOff & `0xffffffffff000fffLL`) != `0x0LL`)
7460	return false;
7461
7462	// It can be encoded in an add lsl #12, but we may not want to. If it is
7463	// possible to select this as a single movz, then prefer that. A single
7464	// movz is faster than an add with a shift.
7465	return (ImmOff & `0xffffffffff00ffffLL`) != `0x0LL` &&
7466	(ImmOff & `0xffffffffffff0fffLL`) != `0x0LL`;
7467	};
7468
7469	// If the immediate can be encoded in a single add/sub, then bail out.
7470	if (isPreferredADD (ImmOff) \|\| isPreferredADD (-ImmOff))
7471	return std::nullopt;
7472	}
7473
7474	// Try to fold shifts into the addressing mode.
7475	auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
7476	if (AddrModeFns)
7477	return AddrModeFns;
7478
7479	// If that doesn't work, see if it's possible to fold in registers from
7480	// a GEP.
7481	return selectAddrModeRegisterOffset(Root);
7482	}
7483
7484	/// This is used for computing addresses like this:
7485	///
7486	/// ldr x0, [xBase, wOffset, sxtw #LegalShiftVal]
7487	///
7488	/// Where we have a 64-bit base register, a 32-bit offset register, and an
7489	/// extend (which may or may not be signed).
7490	InstructionSelector::ComplexRendererFns
7491	AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root,
7492	unsigned SizeInBytes) const {
7493	MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7494
7495	MachineInstr *PtrAdd =
7496	getOpcodeDef(Opcode: TargetOpcode::G_PTR_ADD, Reg: Root.getReg(), MRI);
7497	if (!PtrAdd \|\| !isWorthFoldingIntoExtendedReg(MI&: PtrAdd, MRI, IsAddrOperand: true*))
7498	return std::nullopt;
7499
7500	MachineOperand &LHS = PtrAdd->getOperand(i: `1`);
7501	MachineOperand &RHS = PtrAdd->getOperand(i: `2`);
7502	MachineInstr *OffsetInst = getDefIgnoringCopies(Reg: RHS.getReg(), MRI);
7503
7504	// The first case is the same as selectAddrModeXRO, except we need an extend.
7505	// In this case, we try to find a shift and extend, and fold them into the
7506	// addressing mode.
7507	//
7508	// E.g.
7509	//
7510	// off_reg = G_Z/S/ANYEXT ext_reg
7511	// val = G_CONSTANT LegalShiftVal
7512	// shift = G_SHL off_reg val
7513	// ptr = G_PTR_ADD base_reg shift
7514	// x = G_LOAD ptr
7515	//
7516	// In this case we can get a load like this:
7517	//
7518	// ldr x0, [base_reg, ext_reg, sxtw #LegalShiftVal]
7519	auto ExtendedShl = selectExtendedSHL(Root, Base&: LHS, Offset&: OffsetInst->getOperand(i: `0`),
7520	SizeInBytes, /WantsExt=/true);
7521	if (ExtendedShl)
7522	return ExtendedShl;
7523
7524	// There was no shift. We can try and fold a G_Z/S/ANYEXT in alone though.
7525	//
7526	// e.g.
7527	// ldr something, [base_reg, ext_reg, sxtw]
7528	if (!isWorthFoldingIntoExtendedReg(MI&: OffsetInst, MRI, IsAddrOperand: true*))
7529	return std::nullopt;
7530
7531	// Check if this is an extend. We'll get an extend type if it is.
7532	AArch64_AM::ShiftExtendType Ext =
7533	getExtendTypeForInst(MI&: OffsetInst, MRI, /IsLoadStore=/*true);
7534	if (Ext == AArch64_AM::InvalidShiftExtend)
7535	return std::nullopt;
7536
7537	// Need a 32-bit wide register.
7538	MachineIRBuilder MIB(*PtrAdd);
7539	Register ExtReg = moveScalarRegClass(Reg: OffsetInst->getOperand(i: `1`).getReg(),
7540	RC: AArch64::GPR32RegClass, MIB);
7541	unsigned SignExtend = Ext == AArch64_AM::SXTW;
7542
7543	// Base is LHS, offset is ExtReg.
7544	return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: LHS.getReg()); },
7545	[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: ExtReg); },
7546	[=](MachineInstrBuilder &MIB) {
7547	MIB.addImm(Val: SignExtend);
7548	MIB.addImm(Val: `0`);
7549	}}};
7550	}
7551
7552	/// Select a "register plus unscaled signed 9-bit immediate" address. This
7553	/// should only match when there is an offset that is not valid for a scaled
7554	/// immediate addressing mode. The "Size" argument is the size in bytes of the
7555	/// memory reference, which is needed here to know what is valid for a scaled
7556	/// immediate.
7557	InstructionSelector::ComplexRendererFns
7558	AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
7559	unsigned Size) const {
7560	MachineRegisterInfo &MRI =
7561	Root.getParent()->getParent()->getParent()->getRegInfo();
7562
7563	if (!Root.isReg())
7564	return std::nullopt;
7565
7566	if (!isBaseWithConstantOffset(Root, MRI))
7567	return std::nullopt;
7568
7569	MachineInstr *RootDef = MRI.getVRegDef(Reg: Root.getReg());
7570
7571	MachineOperand &OffImm = RootDef->getOperand(i: `2`);
7572	if (!OffImm.isReg())
7573	return std::nullopt;
7574	MachineInstr *RHS = MRI.getVRegDef(Reg: OffImm.getReg());
7575	if (RHS->getOpcode() != TargetOpcode::G_CONSTANT)
7576	return std::nullopt;
7577	int64_t RHSC;
7578	MachineOperand &RHSOp1 = RHS->getOperand(i: `1`);
7579	if (!RHSOp1.isCImm() \|\| RHSOp1.getCImm()->getBitWidth() > `64`)
7580	return std::nullopt;
7581	RHSC = RHSOp1.getCImm()->getSExtValue();
7582
7583	if (RHSC >= -`256` && RHSC < `256`) {
7584	MachineOperand &Base = RootDef->getOperand(i: `1`);
7585	return {{
7586	[=](MachineInstrBuilder &MIB) { MIB.add(MO: Base); },
7587	[=](MachineInstrBuilder &MIB) { MIB.addImm(Val: RHSC); },
7588	}};
7589	}
7590	return std::nullopt;
7591	}
7592
7593	InstructionSelector::ComplexRendererFns
7594	AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef,
7595	unsigned Size,
7596	MachineRegisterInfo &MRI) const {
7597	if (RootDef.getOpcode() != AArch64::G_ADD_LOW)
7598	return std::nullopt;
7599	MachineInstr &Adrp = *MRI.getVRegDef(Reg: RootDef.getOperand(i: `1`).getReg());
7600	if (Adrp.getOpcode() != AArch64::ADRP)
7601	return std::nullopt;
7602
7603	// TODO: add heuristics like isWorthFoldingADDlow() from SelectionDAG.
7604	auto Offset = Adrp.getOperand(i: `1`).getOffset();
7605	if (Offset % Size != `0`)
7606	return std::nullopt;
7607
7608	auto GV = Adrp.getOperand(i: `1`).getGlobal();
7609	if (GV->isThreadLocal())
7610	return std::nullopt;
7611
7612	auto &MF = *RootDef.getParent()->getParent();
7613	if (GV->getPointerAlignment(DL: MF.getDataLayout()) < Size)
7614	return std::nullopt;
7615
7616	unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM: MF.getTarget());
7617	MachineIRBuilder MIRBuilder(RootDef);
7618	Register AdrpReg = Adrp.getOperand(i: `0`).getReg();
7619	return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: AdrpReg); },
7620	[=](MachineInstrBuilder &MIB) {
7621	MIB.addGlobalAddress(GV, Offset,
7622	TargetFlags: OpFlags \| AArch64II::MO_PAGEOFF \|
7623	AArch64II::MO_NC);
7624	}}};
7625	}
7626
7627	/// Select a "register plus scaled unsigned 12-bit immediate" address. The
7628	/// "Size" argument is the size in bytes of the memory reference, which
7629	/// determines the scale.
7630	InstructionSelector::ComplexRendererFns
7631	AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
7632	unsigned Size) const {
7633	MachineFunction &MF = *Root.getParent()->getParent()->getParent();
7634	MachineRegisterInfo &MRI = MF.getRegInfo();
7635
7636	if (!Root.isReg())
7637	return std::nullopt;
7638
7639	MachineInstr *RootDef = MRI.getVRegDef(Reg: Root.getReg());
7640	if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
7641	return {{
7642	[=](MachineInstrBuilder &MIB) { MIB.add(MO: RootDef->getOperand(i: `1`)); },
7643	[=](MachineInstrBuilder &MIB) { MIB.addImm(Val: `0`); },
7644	}};
7645	}
7646
7647	CodeModel::Model CM = MF.getTarget().getCodeModel();
7648	// Check if we can fold in the ADD of small code model ADRP + ADD address.
7649	// HACK: ld64 on Darwin doesn't support relocations on PRFM, so we can't fold
7650	// globals into the offset.
7651	MachineInstr *RootParent = Root.getParent();
7652	if (CM == CodeModel::Small &&
7653	!(RootParent->getOpcode() == AArch64::G_AARCH64_PREFETCH &&
7654	STI.isTargetDarwin())) {
7655	auto OpFns = tryFoldAddLowIntoImm(RootDef&: *RootDef, Size, MRI);
7656	if (OpFns)
7657	return OpFns;
7658	}
7659
7660	if (isBaseWithConstantOffset(Root, MRI)) {
7661	MachineOperand &LHS = RootDef->getOperand(i: `1`);
7662	MachineOperand &RHS = RootDef->getOperand(i: `2`);
7663	MachineInstr *LHSDef = MRI.getVRegDef(Reg: LHS.getReg());
7664	MachineInstr *RHSDef = MRI.getVRegDef(Reg: RHS.getReg());
7665
7666	int64_t RHSC = (int64_t)RHSDef->getOperand(i: `1`).getCImm()->getZExtValue();
7667	unsigned Scale = Log2_32(Value: Size);
7668	if ((RHSC & (Size - `1`)) == `0` && RHSC >= `0` && RHSC < (`0x1000` << Scale)) {
7669	if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
7670	return {{
7671	[=](MachineInstrBuilder &MIB) { MIB.add(MO: LHSDef->getOperand(i: `1`)); },
7672	[=](MachineInstrBuilder &MIB) { MIB.addImm(Val: RHSC >> Scale); },
7673	}};
7674
7675	return {{
7676	[=](MachineInstrBuilder &MIB) { MIB.add(MO: LHS); },
7677	[=](MachineInstrBuilder &MIB) { MIB.addImm(Val: RHSC >> Scale); },
7678	}};
7679	}
7680	}
7681
7682	// Before falling back to our general case, check if the unscaled
7683	// instructions can handle this. If so, that's preferable.
7684	if (selectAddrModeUnscaled(Root, Size))
7685	return std::nullopt;
7686
7687	return {{
7688	[=](MachineInstrBuilder &MIB) { MIB.add(MO: Root); },
7689	[=](MachineInstrBuilder &MIB) { MIB.addImm(Val: `0`); },
7690	}};
7691	}
7692
7693	/// Given a shift instruction, return the correct shift type for that
7694	/// instruction.
7695	static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI) {
7696	switch (MI.getOpcode()) {
7697	default:
7698	return AArch64_AM::InvalidShiftExtend;
7699	case TargetOpcode::G_SHL:
7700	return AArch64_AM::LSL;
7701	case TargetOpcode::G_LSHR:
7702	return AArch64_AM::LSR;
7703	case TargetOpcode::G_ASHR:
7704	return AArch64_AM::ASR;
7705	case TargetOpcode::G_ROTR:
7706	return AArch64_AM::ROR;
7707	}
7708	}
7709
7710	/// Select a "shifted register" operand. If the value is not shifted, set the
7711	/// shift operand to a default value of "lsl 0".
7712	InstructionSelector::ComplexRendererFns
7713	AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root,
7714	bool AllowROR) const {
7715	if (!Root.isReg())
7716	return std::nullopt;
7717	MachineRegisterInfo &MRI =
7718	Root.getParent()->getParent()->getParent()->getRegInfo();
7719
7720	// Check if the operand is defined by an instruction which corresponds to
7721	// a ShiftExtendType. E.g. a G_SHL, G_LSHR, etc.
7722	MachineInstr *ShiftInst = MRI.getVRegDef(Reg: Root.getReg());
7723	AArch64_AM::ShiftExtendType ShType = getShiftTypeForInst(MI&: *ShiftInst);
7724	if (ShType == AArch64_AM::InvalidShiftExtend)
7725	return std::nullopt;
7726	if (ShType == AArch64_AM::ROR && !AllowROR)
7727	return std::nullopt;
7728	if (!isWorthFoldingIntoExtendedReg(MI&: ShiftInst, MRI, IsAddrOperand: false*))
7729	return std::nullopt;
7730
7731	// Need an immediate on the RHS.
7732	MachineOperand &ShiftRHS = ShiftInst->getOperand(i: `2`);
7733	auto Immed = getImmedFromMO(Root: ShiftRHS);
7734	if (!Immed)
7735	return std::nullopt;
7736
7737	// We have something that we can fold. Fold in the shift's LHS and RHS into
7738	// the instruction.
7739	MachineOperand &ShiftLHS = ShiftInst->getOperand(i: `1`);
7740	Register ShiftReg = ShiftLHS.getReg();
7741
7742	unsigned NumBits = MRI.getType(Reg: ShiftReg).getSizeInBits();
7743	unsigned Val = *Immed & (NumBits - `1`);
7744	unsigned ShiftVal = AArch64_AM::getShifterImm(ST: ShType, Imm: Val);
7745
7746	return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: ShiftReg); },
7747	[=](MachineInstrBuilder &MIB) { MIB.addImm(Val: ShiftVal); }}};
7748	}
7749
7750	AArch64_AM::ShiftExtendType AArch64InstructionSelector::getExtendTypeForInst(
7751	MachineInstr &MI, MachineRegisterInfo &MRI, bool IsLoadStore) const {
7752	unsigned Opc = MI.getOpcode();
7753
7754	// Handle explicit extend instructions first.
7755	if (Opc == TargetOpcode::G_SEXT \|\| Opc == TargetOpcode::G_SEXT_INREG) {
7756	unsigned Size;
7757	if (Opc == TargetOpcode::G_SEXT)
7758	Size = MRI.getType(Reg: MI.getOperand(i: `1`).getReg()).getSizeInBits();
7759	else
7760	Size = MI.getOperand(i: `2`).getImm();
7761	assert(Size != `64` && "Extend from 64 bits?");
7762	switch (Size) {
7763	case `8`:
7764	return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTB;
7765	case `16`:
7766	return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTH;
7767	case `32`:
7768	return AArch64_AM::SXTW;
7769	default:
7770	return AArch64_AM::InvalidShiftExtend;
7771	}
7772	}
7773
7774	if (Opc == TargetOpcode::G_ZEXT \|\| Opc == TargetOpcode::G_ANYEXT) {
7775	unsigned Size = MRI.getType(Reg: MI.getOperand(i: `1`).getReg()).getSizeInBits();
7776	assert(Size != `64` && "Extend from 64 bits?");
7777	switch (Size) {
7778	case `8`:
7779	return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTB;
7780	case `16`:
7781	return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTH;
7782	case `32`:
7783	return AArch64_AM::UXTW;
7784	default:
7785	return AArch64_AM::InvalidShiftExtend;
7786	}
7787	}
7788
7789	// Don't have an explicit extend. Try to handle a G_AND with a constant mask
7790	// on the RHS.
7791	if (Opc != TargetOpcode::G_AND)
7792	return AArch64_AM::InvalidShiftExtend;
7793
7794	std::optional<uint64_t> MaybeAndMask = getImmedFromMO(Root: MI.getOperand(i: `2`));
7795	if (!MaybeAndMask)
7796	return AArch64_AM::InvalidShiftExtend;
7797	uint64_t AndMask = *MaybeAndMask;
7798	switch (AndMask) {
7799	default:
7800	return AArch64_AM::InvalidShiftExtend;
7801	case `0xFF`:
7802	return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
7803	case `0xFFFF`:
7804	return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
7805	case `0xFFFFFFFF`:
7806	return AArch64_AM::UXTW;
7807	}
7808	}
7809
7810	Register AArch64InstructionSelector::moveScalarRegClass(
7811	Register Reg, const TargetRegisterClass &RC, MachineIRBuilder &MIB) const {
7812	MachineRegisterInfo &MRI = *MIB.getMRI();
7813	auto Ty = MRI.getType(Reg);
7814	assert(!Ty.isVector() && "Expected scalars only!");
7815	if (Ty.getSizeInBits() == TRI.getRegSizeInBits(RC))
7816	return Reg;
7817
7818	// Create a copy and immediately select it.
7819	// FIXME: We should have an emitCopy function?
7820	auto Copy = MIB.buildCopy(Res: {&RC}, Op: {Reg});
7821	selectCopy(I&: *Copy, TII, MRI, TRI, RBI);
7822	return Copy.getReg(Idx: `0`);
7823	}
7824
7825	/// Select an "extended register" operand. This operand folds in an extend
7826	/// followed by an optional left shift.
7827	InstructionSelector::ComplexRendererFns
7828	AArch64InstructionSelector::selectArithExtendedRegister(
7829	MachineOperand &Root) const {
7830	if (!Root.isReg())
7831	return std::nullopt;
7832	MachineRegisterInfo &MRI =
7833	Root.getParent()->getParent()->getParent()->getRegInfo();
7834
7835	uint64_t ShiftVal = `0`;
7836	Register ExtReg;
7837	AArch64_AM::ShiftExtendType Ext;
7838	MachineInstr *RootDef = getDefIgnoringCopies(Reg: Root.getReg(), MRI);
7839	if (!RootDef)
7840	return std::nullopt;
7841
7842	if (!isWorthFoldingIntoExtendedReg(MI&: RootDef, MRI, IsAddrOperand: false*))
7843	return std::nullopt;
7844
7845	// Check if we can fold a shift and an extend.
7846	if (RootDef->getOpcode() == TargetOpcode::G_SHL) {
7847	// Look for a constant on the RHS of the shift.
7848	MachineOperand &RHS = RootDef->getOperand(i: `2`);
7849	std::optional<uint64_t> MaybeShiftVal = getImmedFromMO(Root: RHS);
7850	if (!MaybeShiftVal)
7851	return std::nullopt;
7852	ShiftVal = *MaybeShiftVal;
7853	if (ShiftVal > `4`)
7854	return std::nullopt;
7855	// Look for a valid extend instruction on the LHS of the shift.
7856	MachineOperand &LHS = RootDef->getOperand(i: `1`);
7857	MachineInstr *ExtDef = getDefIgnoringCopies(Reg: LHS.getReg(), MRI);
7858	if (!ExtDef)
7859	return std::nullopt;
7860	Ext = getExtendTypeForInst(MI&: *ExtDef, MRI);
7861	if (Ext == AArch64_AM::InvalidShiftExtend)
7862	return std::nullopt;
7863	ExtReg = ExtDef->getOperand(i: `1`).getReg();
7864	} else {
7865	// Didn't get a shift. Try just folding an extend.
7866	Ext = getExtendTypeForInst(MI&: *RootDef, MRI);
7867	if (Ext == AArch64_AM::InvalidShiftExtend)
7868	return std::nullopt;
7869	ExtReg = RootDef->getOperand(i: `1`).getReg();
7870
7871	// If we have a 32 bit instruction which zeroes out the high half of a
7872	// register, we get an implicit zero extend for free. Check if we have one.
7873	// FIXME: We actually emit the extend right now even though we don't have
7874	// to.
7875	if (Ext == AArch64_AM::UXTW && MRI.getType(Reg: ExtReg).getSizeInBits() == `32`) {
7876	MachineInstr *ExtInst = MRI.getVRegDef(Reg: ExtReg);
7877	if (isDef32(MI: *ExtInst))
7878	return std::nullopt;
7879	}
7880	}
7881
7882	// We require a GPR32 here. Narrow the ExtReg if needed using a subregister
7883	// copy.
7884	MachineIRBuilder MIB(*RootDef);
7885	ExtReg = moveScalarRegClass(Reg: ExtReg, RC: AArch64::GPR32RegClass, MIB);
7886
7887	return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: ExtReg); },
7888	[=](MachineInstrBuilder &MIB) {
7889	MIB.addImm(Val: getArithExtendImm(ET: Ext, Imm: ShiftVal));
7890	}}};
7891	}
7892
7893	InstructionSelector::ComplexRendererFns
7894	AArch64InstructionSelector::selectExtractHigh(MachineOperand &Root) const {
7895	if (!Root.isReg())
7896	return std::nullopt;
7897	MachineRegisterInfo &MRI =
7898	Root.getParent()->getParent()->getParent()->getRegInfo();
7899
7900	auto Extract = getDefSrcRegIgnoringCopies(Reg: Root.getReg(), MRI);
7901	while (Extract && Extract ->MI->getOpcode() == TargetOpcode::G_BITCAST &&
7902	STI.isLittleEndian())
7903	Extract =
7904	getDefSrcRegIgnoringCopies(Reg: Extract ->MI->getOperand(i: `1`).getReg(), MRI);
7905	if (!Extract)
7906	return std::nullopt;
7907
7908	if (Extract ->MI->getOpcode() == TargetOpcode::G_UNMERGE_VALUES) {
7909	if (Extract ->Reg == Extract ->MI->getOperand(i: `1`).getReg()) {
7910	Register ExtReg = Extract ->MI->getOperand(i: `2`).getReg();
7911	return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: ExtReg); }}};
7912	}
7913	}
7914	if (Extract ->MI->getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT) {
7915	LLT SrcTy = MRI.getType(Reg: Extract ->MI->getOperand(i: `1`).getReg());
7916	auto LaneIdx = getIConstantVRegValWithLookThrough(
7917	VReg: Extract ->MI->getOperand(i: `2`).getReg(), MRI);
7918	if (LaneIdx && SrcTy == LLT::fixed_vector(NumElements: `2`, ScalarSizeInBits: `64`) &&
7919	LaneIdx ->Value.getSExtValue() == `1`) {
7920	Register ExtReg = Extract ->MI->getOperand(i: `1`).getReg();
7921	return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(RegNo: ExtReg); }}};
7922	}
7923	}
7924
7925	return std::nullopt;
7926	}
7927
7928	void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
7929	const MachineInstr &MI,
7930	int OpIdx) const {
7931	const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
7932	assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -`1` &&
7933	"Expected G_CONSTANT");
7934	std::optional<int64_t> CstVal =
7935	getIConstantVRegSExtVal(VReg: MI.getOperand(i: `0`).getReg(), MRI);
7936	assert(CstVal && "Expected constant value");
7937	MIB.addImm(Val: *CstVal);
7938	}
7939
7940	void AArch64InstructionSelector::renderLogicalImm32(
7941	MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
7942	assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -`1` &&
7943	"Expected G_CONSTANT");
7944	uint64_t CstVal = I.getOperand(i: `1`).getCImm()->getZExtValue();
7945	uint64_t Enc = AArch64_AM::encodeLogicalImmediate(imm: CstVal, regSize: `32`);
7946	MIB.addImm(Val: Enc);
7947	}
7948
7949	void AArch64InstructionSelector::renderLogicalImm64(
7950	MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
7951	assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -`1` &&
7952	"Expected G_CONSTANT");
7953	uint64_t CstVal = I.getOperand(i: `1`).getCImm()->getZExtValue();
7954	uint64_t Enc = AArch64_AM::encodeLogicalImmediate(imm: CstVal, regSize: `64`);
7955	MIB.addImm(Val: Enc);
7956	}
7957
7958	void AArch64InstructionSelector::renderUbsanTrap(MachineInstrBuilder &MIB,
7959	const MachineInstr &MI,
7960	int OpIdx) const {
7961	assert(MI.getOpcode() == TargetOpcode::G_UBSANTRAP && OpIdx == `0` &&
7962	"Expected G_UBSANTRAP");
7963	MIB.addImm(Val: MI.getOperand(i: `0`).getImm() \| (`'U'` << `8`));
7964	}
7965
7966	void AArch64InstructionSelector::renderFPImm16(MachineInstrBuilder &MIB,
7967	const MachineInstr &MI,
7968	int OpIdx) const {
7969	assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -`1` &&
7970	"Expected G_FCONSTANT");
7971	MIB.addImm(
7972	Val: AArch64_AM::getFP16Imm(FPImm: MI.getOperand(i: `1`).getFPImm()->getValueAPF()));
7973	}
7974
7975	void AArch64InstructionSelector::renderFPImm32(MachineInstrBuilder &MIB,
7976	const MachineInstr &MI,
7977	int OpIdx) const {
7978	assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -`1` &&
7979	"Expected G_FCONSTANT");
7980	MIB.addImm(
7981	Val: AArch64_AM::getFP32Imm(FPImm: MI.getOperand(i: `1`).getFPImm()->getValueAPF()));
7982	}
7983
7984	void AArch64InstructionSelector::renderFPImm64(MachineInstrBuilder &MIB,
7985	const MachineInstr &MI,
7986	int OpIdx) const {
7987	assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -`1` &&
7988	"Expected G_FCONSTANT");
7989	MIB.addImm(
7990	Val: AArch64_AM::getFP64Imm(FPImm: MI.getOperand(i: `1`).getFPImm()->getValueAPF()));
7991	}
7992
7993	void AArch64InstructionSelector::renderFPImm32SIMDModImmType4(
7994	MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const {
7995	assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -`1` &&
7996	"Expected G_FCONSTANT");
7997	MIB.addImm(Val: AArch64_AM::encodeAdvSIMDModImmType4(Imm: MI.getOperand(i: `1`)
7998	.getFPImm()
7999	->getValueAPF()
8000	.bitcastToAPInt()
8001	.getZExtValue()));
8002	}
8003
8004	bool AArch64InstructionSelector::isLoadStoreOfNumBytes(
8005	const MachineInstr &MI, unsigned NumBytes) const {
8006	if (!MI.mayLoadOrStore())
8007	return false;
8008	assert(MI.hasOneMemOperand() &&
8009	"Expected load/store to have only one mem op!");
8010	return (*MI.memoperands_begin())->getSize() == NumBytes;
8011	}
8012
8013	bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const {
8014	const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
8015	if (MRI.getType(Reg: MI.getOperand(i: `0`).getReg()).getSizeInBits() != `32`)
8016	return false;
8017
8018	// Only return true if we know the operation will zero-out the high half of
8019	// the 64-bit register. Truncates can be subregister copies, which don't
8020	// zero out the high bits. Copies and other copy-like instructions can be
8021	// fed by truncates, or could be lowered as subregister copies.
8022	switch (MI.getOpcode()) {
8023	default:
8024	return true;
8025	case TargetOpcode::COPY:
8026	case TargetOpcode::G_BITCAST:
8027	case TargetOpcode::G_TRUNC:
8028	case TargetOpcode::G_PHI:
8029	return false;
8030	}
8031	}
8032
8033
8034	// Perform fixups on the given PHI instruction's operands to force them all
8035	// to be the same as the destination regbank.
8036	static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI,
8037	const AArch64RegisterBankInfo &RBI) {
8038	assert(MI.getOpcode() == TargetOpcode::G_PHI && "Expected a G_PHI");
8039	Register DstReg = MI.getOperand(i: `0`).getReg();
8040	const RegisterBank *DstRB = MRI.getRegBankOrNull(Reg: DstReg);
8041	assert(DstRB && "Expected PHI dst to have regbank assigned");
8042	MachineIRBuilder MIB(MI);
8043
8044	// Go through each operand and ensure it has the same regbank.
8045	for (MachineOperand &MO : llvm::drop_begin(RangeOrContainer: MI.operands())) {
8046	if (!MO.isReg())
8047	continue;
8048	Register OpReg = MO.getReg();
8049	const RegisterBank *RB = MRI.getRegBankOrNull(Reg: OpReg);
8050	if (RB != DstRB) {
8051	// Insert a cross-bank copy.
8052	auto *OpDef = MRI.getVRegDef(Reg: OpReg);
8053	const LLT &Ty = MRI.getType(Reg: OpReg);
8054	MachineBasicBlock &OpDefBB = *OpDef->getParent();
8055
8056	// Any instruction we insert must appear after all PHIs in the block
8057	// for the block to be valid MIR.
8058	MachineBasicBlock::iterator InsertPt = std::next(x: OpDef->getIterator());
8059	if (InsertPt != OpDefBB.end() && InsertPt ->isPHI())
8060	InsertPt = OpDefBB.getFirstNonPHI();
8061	MIB.setInsertPt(MBB&: *OpDef->getParent(), II: InsertPt);
8062	auto Copy = MIB.buildCopy(Res: Ty, Op: OpReg);
8063	MRI.setRegBank(Reg: Copy.getReg(Idx: `0`), RegBank: *DstRB);
8064	MO.setReg(Copy.getReg(Idx: `0`));
8065	}
8066	}
8067	}
8068
8069	void AArch64InstructionSelector::processPHIs(MachineFunction &MF) {
8070	// We're looking for PHIs, build a list so we don't invalidate iterators.
8071	MachineRegisterInfo &MRI = MF.getRegInfo();
8072	SmallVector<MachineInstr *, `32`> Phis;
8073	for (auto &BB : MF) {
8074	for (auto &MI : BB) {
8075	if (MI.getOpcode() == TargetOpcode::G_PHI)
8076	Phis.emplace_back(Args: &MI);
8077	}
8078	}
8079
8080	for (auto *MI : Phis) {
8081	// We need to do some work here if the operand types are < 16 bit and they
8082	// are split across fpr/gpr banks. Since all types <32b on gpr
8083	// end up being assigned gpr32 regclasses, we can end up with PHIs here
8084	// which try to select between a gpr32 and an fpr16. Ideally RBS shouldn't
8085	// be selecting heterogenous regbanks for operands if possible, but we
8086	// still need to be able to deal with it here.
8087	//
8088	// To fix this, if we have a gpr-bank operand < 32b in size and at least
8089	// one other operand is on the fpr bank, then we add cross-bank copies
8090	// to homogenize the operand banks. For simplicity the bank that we choose
8091	// to settle on is whatever bank the def operand has. For example:
8092	//
8093	// %endbb:
8094	// %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2:fpr(s16), %bb2
8095	// =>
8096	// %bb2:
8097	// ...
8098	// %in2_copy:gpr(s16) = COPY %in2:fpr(s16)
8099	// ...
8100	// %endbb:
8101	// %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2_copy:gpr(s16), %bb2
8102	bool HasGPROp = false, HasFPROp = false;
8103	for (const MachineOperand &MO : llvm::drop_begin(RangeOrContainer: MI->operands())) {
8104	if (!MO.isReg())
8105	continue;
8106	const LLT &Ty = MRI.getType(Reg: MO.getReg());
8107	if (!Ty.isValid() \|\| !Ty.isScalar())
8108	break;
8109	if (Ty.getSizeInBits() >= `32`)
8110	break;
8111	const RegisterBank *RB = MRI.getRegBankOrNull(Reg: MO.getReg());
8112	// If for some reason we don't have a regbank yet. Don't try anything.
8113	if (!RB)
8114	break;
8115
8116	if (RB->getID() == AArch64::GPRRegBankID)
8117	HasGPROp = true;
8118	else
8119	HasFPROp = true;
8120	}
8121	// We have heterogenous regbanks, need to fixup.
8122	if (HasGPROp && HasFPROp)
8123	fixupPHIOpBanks(MI&: *MI, MRI, RBI);
8124	}
8125	}
8126
8127	namespace llvm {
8128	InstructionSelector *
8129	createAArch64InstructionSelector(const AArch64TargetMachine &TM,
8130	const AArch64Subtarget &Subtarget,
8131	const AArch64RegisterBankInfo &RBI) {
8132	return new AArch64InstructionSelector (TM, Subtarget, RBI);
8133	}
8134	}
8135

Browse the source code of llvm_projects/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp